mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2024-11-29 00:24:16 +01:00
525.53
This commit is contained in:
parent
7c345b838b
commit
758b4ee818
19
CHANGELOG.md
19
CHANGELOG.md
@ -1,5 +1,22 @@
|
||||
# Changelog
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.53] 2022-11-10
|
||||
|
||||
#### Changed
|
||||
|
||||
- GSP firmware is now distributed as multiple firmware files: this release has `gsp_tu10x.bin` and `gsp_ad10x.bin` replacing `gsp.bin` from previous releases.
|
||||
- Each file is named after a GPU architecture and supports GPUs from one or more architectures. This allows GSP firmware to better leverage each architecture's capabilities.
|
||||
- The .run installer will continue to install firmware to `/lib/firmware/nvidia/<version>` and the `nvidia.ko` kernel module will load the appropriate firmware for each GPU at runtime.
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Add support for IBT (indirect branch tracking) on supported platforms, [#256](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/256) by @rnd-ash
|
||||
- Return EINVAL when [failing to] allocating memory, [#280](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/280) by @YusufKhan-gamedev
|
||||
- Fix various typos in nvidia/src/kernel, [#16](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/16) by @alexisgeoffrey
|
||||
- Added support for rotation in X11, Quadro Sync, Stereo, and YUV 4:2:0 on Turing.
|
||||
|
||||
## Release 520 Entries
|
||||
|
||||
### [520.56.06] 2022-10-12
|
||||
@ -29,6 +46,8 @@
|
||||
- Improved compatibility with new Linux kernel releases
|
||||
- Fixed possible excessive GPU power draw on an idle X11 or Wayland desktop when driving high resolutions or refresh rates
|
||||
|
||||
### [515.65.07] 2022-10-19
|
||||
|
||||
### [515.65.01] 2022-08-02
|
||||
|
||||
#### Fixed
|
||||
|
21
README.md
21
README.md
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 520.56.06.
|
||||
version 525.53.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -15,9 +15,9 @@ as root:
|
||||
|
||||
make modules_install -j$(nproc)
|
||||
|
||||
Note that the kernel modules built here must be used with gsp.bin
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
520.56.06 driver release. This can be achieved by installing
|
||||
525.53 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -167,7 +167,7 @@ for the target kernel.
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 520.56.06 release,
|
||||
(see the table below). However, in the 525.53 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@ -175,7 +175,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/520.56.06/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/525.53/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@ -652,6 +652,17 @@ Subsystem Device ID.
|
||||
| NVIDIA PG506-232 | 20B6 10DE 1492 |
|
||||
| NVIDIA A30 | 20B7 10DE 1532 |
|
||||
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
|
||||
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
|
||||
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
|
||||
| NVIDIA A800 40GB PCIe | 20F6 10DE 17A3 |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2182 |
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
|
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall -MD $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"520.56.06\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"525.53\"
|
||||
|
||||
EXTRA_CFLAGS += -Wno-unused-function
|
||||
|
||||
@ -229,6 +229,7 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
drm/drm_ioctl.h \
|
||||
drm/drm_device.h \
|
||||
drm/drm_mode_config.h \
|
||||
drm/drm_modeset_lock.h \
|
||||
dt-bindings/interconnect/tegra_icc_id.h \
|
||||
generated/autoconf.h \
|
||||
generated/compile.h \
|
||||
@ -243,6 +244,8 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/log2.h \
|
||||
linux/of.h \
|
||||
linux/bug.h \
|
||||
linux/sched.h \
|
||||
linux/sched/mm.h \
|
||||
linux/sched/signal.h \
|
||||
linux/sched/task.h \
|
||||
linux/sched/task_stack.h \
|
||||
@ -286,7 +289,10 @@ NV_HEADER_PRESENCE_TESTS = \
|
||||
linux/ioasid.h \
|
||||
linux/stdarg.h \
|
||||
linux/iosys-map.h \
|
||||
asm/coco.h
|
||||
asm/coco.h \
|
||||
linux/vfio_pci_core.h \
|
||||
soc/tegra/bpmp-abi.h \
|
||||
soc/tegra/bpmp.h
|
||||
|
||||
# Filename to store the define for the header in $(1); this is only consumed by
|
||||
# the rule below that concatenates all of these together.
|
||||
|
@ -242,7 +242,7 @@
|
||||
#endif
|
||||
|
||||
/* For verification-only features not intended to be included in normal drivers */
|
||||
#if (defined(NV_MODS) || defined(NV_GSP_MODS)) && defined(DEBUG) && !defined(DISABLE_VERIF_FEATURES)
|
||||
#if defined(ENABLE_VERIF_FEATURES)
|
||||
#define NV_VERIF_FEATURES
|
||||
#endif
|
||||
|
||||
@ -276,12 +276,6 @@
|
||||
#define NV_IS_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_GSP_MODS)
|
||||
#define NV_IS_GSP_MODS 1
|
||||
#else
|
||||
#define NV_IS_GSP_MODS 0
|
||||
#endif
|
||||
|
||||
#if defined(NV_WINDOWS)
|
||||
#define NVOS_IS_WINDOWS 1
|
||||
#else
|
||||
|
132
kernel-open/common/inc/nv-firmware.h
Normal file
132
kernel-open/common/inc/nv-firmware.h
Normal file
@ -0,0 +1,132 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef NV_FIRMWARE_H
|
||||
#define NV_FIRMWARE_H
|
||||
|
||||
|
||||
|
||||
#include <nvtypes.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_TYPE_GSP,
|
||||
NV_FIRMWARE_TYPE_GSP_LOG
|
||||
} nv_firmware_type_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_CHIP_FAMILY_NULL = 0,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU10X = 1,
|
||||
NV_FIRMWARE_CHIP_FAMILY_TU11X = 2,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA100 = 3,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GA10X = 4,
|
||||
NV_FIRMWARE_CHIP_FAMILY_AD10X = 5,
|
||||
NV_FIRMWARE_CHIP_FAMILY_GH100 = 6,
|
||||
NV_FIRMWARE_CHIP_FAMILY_END,
|
||||
} nv_firmware_chip_family_t;
|
||||
|
||||
static inline const char *nv_firmware_chip_family_to_string(
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
switch (fw_chip_family) {
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: return "gh100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X: return "ad10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: return "ga10x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: return "ga100";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: return "tu11x";
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X: return "tu10x";
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return NULL;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_PATH_FOR_FILENAME(filename)
|
||||
// to return a string "path" given a gsp_*.bin or gsp_log_*.bin filename.
|
||||
//
|
||||
// The function nv_firmware_path will then be available.
|
||||
#if defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
static inline const char *nv_firmware_path(
|
||||
nv_firmware_type_t fw_type,
|
||||
nv_firmware_chip_family_t fw_chip_family
|
||||
)
|
||||
{
|
||||
if (fw_type == NV_FIRMWARE_TYPE_GSP)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_ad10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
else if (fw_type == NV_FIRMWARE_TYPE_GSP_LOG)
|
||||
{
|
||||
switch (fw_chip_family)
|
||||
{
|
||||
case NV_FIRMWARE_CHIP_FAMILY_AD10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_ad10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GH100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA100: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_GA10X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU11X: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_TU10X:
|
||||
return NV_FIRMWARE_PATH_FOR_FILENAME("gsp_log_tu10x.bin");
|
||||
|
||||
case NV_FIRMWARE_CHIP_FAMILY_END: // fall through
|
||||
case NV_FIRMWARE_CHIP_FAMILY_NULL:
|
||||
return "";
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
}
|
||||
#endif // defined(NV_FIRMWARE_PATH_FOR_FILENAME)
|
||||
|
||||
// The includer (presumably nv.c) may optionally define
|
||||
// NV_FIRMWARE_DECLARE_GSP_FILENAME(filename)
|
||||
// which will then be invoked (at the top-level) for each
|
||||
// gsp_*.bin (but not gsp_log_*.bin)
|
||||
#if defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_ad10x.bin")
|
||||
NV_FIRMWARE_DECLARE_GSP_FILENAME("gsp_tu10x.bin")
|
||||
#endif // defined(NV_FIRMWARE_DECLARE_GSP_FILENAME)
|
||||
|
||||
#endif // NV_FIRMWARE_DECLARE_GSP_FILENAME
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-22 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -91,6 +91,6 @@ static inline void _nv_hash_init(struct hlist_head *ht, unsigned int sz)
|
||||
* @key: the key of the objects to iterate over
|
||||
*/
|
||||
#define nv_hash_for_each_possible(name, obj, member, key) \
|
||||
nv_hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
hlist_for_each_entry(obj, &name[NV_HASH_MIN(key, NV_HASH_BITS(name))], member)
|
||||
|
||||
#endif // __NV_HASH_H__
|
||||
|
@ -27,15 +27,13 @@
|
||||
#include <nv-kernel-interface-api.h>
|
||||
|
||||
// Enums for supported hypervisor types.
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_CUSTOM_FORCED
|
||||
// New hypervisor type should be added before OS_HYPERVISOR_UNKNOWN
|
||||
typedef enum _HYPERVISOR_TYPE
|
||||
{
|
||||
OS_HYPERVISOR_XEN = 0,
|
||||
OS_HYPERVISOR_VMWARE,
|
||||
OS_HYPERVISOR_HYPERV,
|
||||
OS_HYPERVISOR_KVM,
|
||||
OS_HYPERVISOR_PARALLELS,
|
||||
OS_HYPERVISOR_CUSTOM_FORCED,
|
||||
OS_HYPERVISOR_UNKNOWN
|
||||
} HYPERVISOR_TYPE;
|
||||
|
||||
|
@ -115,11 +115,6 @@ struct nv_kthread_q_item
|
||||
void *function_args;
|
||||
};
|
||||
|
||||
#if defined(NV_KTHREAD_CREATE_ON_NODE_PRESENT)
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 1
|
||||
#else
|
||||
#define NV_KTHREAD_Q_SUPPORTS_AFFINITY() 0
|
||||
#endif
|
||||
|
||||
#ifndef NUMA_NO_NODE
|
||||
#define NUMA_NO_NODE (-1)
|
||||
@ -142,18 +137,12 @@ struct nv_kthread_q_item
|
||||
//
|
||||
// A short prefix of the qname arg will show up in []'s, via the ps(1) utility.
|
||||
//
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node if
|
||||
// NUMA-affinity (NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1) is supported, but
|
||||
// fallback to another node is possible because kernel allocators do not
|
||||
// The kernel thread stack is preferably allocated on the specified NUMA node,
|
||||
// but fallback to another node is possible because kernel allocators do not
|
||||
// guarantee affinity. Note that NUMA-affinity applies only to
|
||||
// the kthread stack. This API does not do anything about limiting the CPU
|
||||
// affinity of the kthread. That is left to the caller.
|
||||
//
|
||||
// On kernels, which do not support NUMA-aware kthread stack allocations
|
||||
// (NV_KTHTREAD_Q_SUPPORTS_AFFINITY() == 0), the API will return -ENOTSUPP
|
||||
// if the value supplied for 'preferred_node' is anything other than
|
||||
// NV_KTHREAD_NO_NODE.
|
||||
//
|
||||
// Reusing a queue: once a queue is initialized, it must be safely shut down
|
||||
// (see "Stopping the queue(s)", below), before it can be reused. So, for
|
||||
// a simple queue use case, the following will work:
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -191,13 +191,6 @@
|
||||
*/
|
||||
#define NV_CURRENT_EUID() (__kuid_val(current->cred->euid))
|
||||
|
||||
#if !defined(NV_KUID_T_PRESENT)
|
||||
static inline uid_t __kuid_val(uid_t uid)
|
||||
{
|
||||
return uid;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_VGA_ARB)
|
||||
#include <linux/vgaarb.h>
|
||||
#endif
|
||||
@ -234,18 +227,6 @@ static inline uid_t __kuid_val(uid_t uid)
|
||||
#include <asm-generic/pci-dma-compat.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_EFI_ENABLED_PRESENT) && defined(NV_EFI_ENABLED_ARGUMENT_COUNT)
|
||||
#if (NV_EFI_ENABLED_ARGUMENT_COUNT == 1)
|
||||
#define NV_EFI_ENABLED() efi_enabled(EFI_BOOT)
|
||||
#else
|
||||
#error "NV_EFI_ENABLED_ARGUMENT_COUNT value unrecognized!"
|
||||
#endif
|
||||
#elif (defined(NV_EFI_ENABLED_PRESENT) || defined(efi_enabled))
|
||||
#define NV_EFI_ENABLED() efi_enabled
|
||||
#else
|
||||
#define NV_EFI_ENABLED() 0
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_CRAY_XT)
|
||||
#include <cray/cray_nvidia.h>
|
||||
NV_STATUS nvos_forward_error_to_cray(struct pci_dev *, NvU32,
|
||||
@ -521,7 +502,7 @@ static inline void *nv_vmalloc(unsigned long size)
|
||||
return ptr;
|
||||
}
|
||||
|
||||
static inline void nv_vfree(void *ptr, NvU32 size)
|
||||
static inline void nv_vfree(void *ptr, NvU64 size)
|
||||
{
|
||||
NV_MEMDBG_REMOVE(ptr, size);
|
||||
vfree(ptr);
|
||||
@ -592,11 +573,7 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
{
|
||||
if (node_id < 0 || node_id >= MAX_NUMNODES)
|
||||
return NV_FALSE;
|
||||
#if defined(NV_NODE_STATES_N_MEMORY_PRESENT)
|
||||
return node_state(node_id, N_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#else
|
||||
return node_state(node_id, N_HIGH_MEMORY) ? NV_TRUE : NV_FALSE;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_KMALLOC(ptr, size) \
|
||||
@ -606,6 +583,13 @@ static NvBool nv_numa_node_has_memory(int node_id)
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KZALLOC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kzalloc(size, NV_GFP_KERNEL); \
|
||||
if (ptr) \
|
||||
NV_MEMDBG_ADD(ptr, size); \
|
||||
}
|
||||
|
||||
#define NV_KMALLOC_ATOMIC(ptr, size) \
|
||||
{ \
|
||||
(ptr) = kmalloc(size, NV_GFP_ATOMIC); \
|
||||
@ -838,10 +822,8 @@ static inline dma_addr_t nv_phys_to_dma(struct device *dev, NvU64 pa)
|
||||
})
|
||||
#endif
|
||||
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.4.9
|
||||
#if defined(NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT) // introduced in 3.18-rc1 for aarch64
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_stop_and_remove_bus_device(pci_dev)
|
||||
#elif defined(NV_PCI_REMOVE_BUS_DEVICE_PRESENT) // introduced in 2.6
|
||||
#define NV_PCI_STOP_AND_REMOVE_BUS_DEVICE(pci_dev) pci_remove_bus_device(pci_dev)
|
||||
#endif
|
||||
|
||||
#define NV_PRINT_AT(nv_debug_level,at) \
|
||||
@ -1139,11 +1121,14 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
#if defined(NVCPU_X86_64)
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
sp = NV_KMEM_CACHE_ALLOC(nvidia_stack_t_cache);
|
||||
if (sp == NULL)
|
||||
return -ENOMEM;
|
||||
sp->size = sizeof(sp->stack);
|
||||
sp->top = sp->stack + sp->size;
|
||||
}
|
||||
#endif
|
||||
*stack = sp;
|
||||
return 0;
|
||||
@ -1152,7 +1137,7 @@ static inline int nv_kmem_cache_alloc_stack(nvidia_stack_t **stack)
|
||||
static inline void nv_kmem_cache_free_stack(nvidia_stack_t *stack)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (stack != NULL)
|
||||
if (stack != NULL && rm_is_altstack_in_use())
|
||||
{
|
||||
NV_KMEM_CACHE_FREE(stack, nvidia_stack_t_cache);
|
||||
}
|
||||
@ -1386,8 +1371,7 @@ typedef struct nv_dma_map_s {
|
||||
* xen_swiotlb_map_sg_attrs may try to route to the SWIOTLB. We must only use
|
||||
* single-page sg elements on Xen Server.
|
||||
*/
|
||||
#if defined(NV_SG_ALLOC_TABLE_FROM_PAGES_PRESENT) && \
|
||||
!defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#if !defined(NV_DOM0_KERNEL_PRESENT)
|
||||
#define NV_ALLOC_DMA_SUBMAP_SCATTERLIST(dm, sm, i) \
|
||||
((sg_alloc_table_from_pages(&sm->sgt, \
|
||||
&dm->pages[NV_DMA_SUBMAP_IDX_TO_PAGE_IDX(i)], \
|
||||
@ -1667,6 +1651,27 @@ static inline nv_linux_file_private_t *nv_get_nvlfp_from_nvfp(nv_file_private_t
|
||||
|
||||
#define NV_STATE_PTR(nvl) &(((nv_linux_state_t *)(nvl))->nv_state)
|
||||
|
||||
static inline nvidia_stack_t *nv_nvlfp_get_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
down(&nvlfp->fops_sp_lock[which]);
|
||||
return nvlfp->fops_sp[which];
|
||||
}
|
||||
#endif
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static inline void nv_nvlfp_put_sp(nv_linux_file_private_t *nvlfp, nvidia_entry_point_index_t which)
|
||||
{
|
||||
#if defined(NVCPU_X86_64)
|
||||
if (rm_is_altstack_in_use())
|
||||
{
|
||||
up(&nvlfp->fops_sp_lock[which]);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#define NV_ATOMIC_READ(data) atomic_read(&(data))
|
||||
#define NV_ATOMIC_SET(data,val) atomic_set(&(data), (val))
|
||||
@ -1895,20 +1900,12 @@ static inline NvU32 nv_default_irq_flags(nv_state_t *nv)
|
||||
#define NV_GET_UNUSED_FD_FLAGS(flags) (-1)
|
||||
#endif
|
||||
|
||||
#if defined(NV_SET_CLOSE_ON_EXEC_PRESENT)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_close_on_exec(fd, fdt)
|
||||
#elif defined(NV_LINUX_TIME_H_PRESENT) && defined(FD_SET)
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) FD_SET(fd, fdt->close_on_exec)
|
||||
#else
|
||||
#define NV_SET_CLOSE_ON_EXEC(fd, fdt) __set_bit(fd, fdt->close_on_exec)
|
||||
#endif
|
||||
|
||||
#define MODULE_BASE_NAME "nvidia"
|
||||
#define MODULE_INSTANCE_NUMBER 0
|
||||
#define MODULE_INSTANCE_STRING ""
|
||||
#define MODULE_NAME MODULE_BASE_NAME MODULE_INSTANCE_STRING
|
||||
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32);
|
||||
NvS32 nv_request_soc_irq(nv_linux_state_t *, NvU32, nv_soc_irq_type_t, NvU32, NvU32, const char*);
|
||||
|
||||
static inline void nv_mutex_destroy(struct mutex *lock)
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2013-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -73,21 +73,4 @@
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT)
|
||||
#if NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT == 3
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
hlist_for_each_entry(pos, head, member)
|
||||
#else
|
||||
#if !defined(hlist_entry_safe)
|
||||
#define hlist_entry_safe(ptr, type, member) \
|
||||
(ptr) ? hlist_entry(ptr, type, member) : NULL
|
||||
#endif
|
||||
|
||||
#define nv_hlist_for_each_entry(pos, head, member) \
|
||||
for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \
|
||||
pos; \
|
||||
pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member))
|
||||
#endif
|
||||
#endif // NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT
|
||||
|
||||
#endif // __NV_LIST_HELPERS_H__
|
||||
|
@ -29,6 +29,25 @@
|
||||
typedef int vm_fault_t;
|
||||
#endif
|
||||
|
||||
/* pin_user_pages
|
||||
* Presence of pin_user_pages() also implies the presence of unpin-user_page().
|
||||
* Both were added in the v5.6-rc1
|
||||
*
|
||||
* pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6-rc1 (2020-01-30)
|
||||
*
|
||||
*/
|
||||
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
#if defined(NV_PIN_USER_PAGES_PRESENT)
|
||||
#define NV_PIN_USER_PAGES pin_user_pages
|
||||
#define NV_UNPIN_USER_PAGE unpin_user_page
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES NV_GET_USER_PAGES
|
||||
#define NV_UNPIN_USER_PAGE put_page
|
||||
#endif // NV_PIN_USER_PAGES_PRESENT
|
||||
|
||||
/* get_user_pages
|
||||
*
|
||||
* The 8-argument version of get_user_pages was deprecated by commit
|
||||
@ -47,51 +66,57 @@ typedef int vm_fault_t;
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
#define NV_GET_USER_PAGES get_user_pages
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, write, force, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, write, force, pages, vmas)
|
||||
#elif defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
|
||||
#define NV_GET_USER_PAGES(start, nr_pages, flags, pages, vmas) \
|
||||
get_user_pages(current, current->mm, start, nr_pages, flags, pages, vmas)
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES(unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_FLAGS)
|
||||
return get_user_pages(current, current->mm, start, nr_pages, flags,
|
||||
pages, vmas);
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE)
|
||||
return get_user_pages(start, nr_pages, write, force, pages, vmas);
|
||||
#else
|
||||
// remaining defination(NV_GET_USER_PAGES_HAS_ARGS_FLAGS)
|
||||
return get_user_pages(start, nr_pages, flags, pages, vmas);
|
||||
#endif
|
||||
// NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
return get_user_pages(current, current->mm, start, nr_pages, write,
|
||||
force, pages, vmas);
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_WRITE_FORCE
|
||||
}
|
||||
#endif
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_FLAGS
|
||||
|
||||
/* pin_user_pages_remote
|
||||
*
|
||||
* pin_user_pages_remote() was added by commit eddb1c228f7951d399240
|
||||
* ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in v5.6 (2020-01-30)
|
||||
*
|
||||
* pin_user_pages_remote() removed 'tsk' parameter by commit
|
||||
* 64019a2e467a ("mm/gup: remove task_struct pointer for all gup code")
|
||||
* in v5.9-rc1 (2020-08-11). *
|
||||
*
|
||||
*/
|
||||
|
||||
#if defined(NV_PIN_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined (NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK)
|
||||
#define NV_PIN_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
pin_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE pin_user_pages_remote
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK
|
||||
#else
|
||||
#define NV_PIN_USER_PAGES_REMOTE NV_GET_USER_PAGES_REMOTE
|
||||
#endif // NV_PIN_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* get_user_pages_remote() was added by commit 1e9877902dc7
|
||||
* ("mm/gup: Introduce get_user_pages_remote()") in v4.6 (2016-02-12).
|
||||
*
|
||||
* The very next commit cde70140fed8 ("mm/gup: Overload get_user_pages()
|
||||
* functions") deprecated the 8-argument version of get_user_pages for the
|
||||
* non-remote case (calling get_user_pages with current and current->mm).
|
||||
*
|
||||
* The guidelines are: call NV_GET_USER_PAGES_REMOTE if you need the 8-argument
|
||||
* version that uses something other than current and current->mm. Use
|
||||
* NV_GET_USER_PAGES if you are refering to current and current->mm.
|
||||
*
|
||||
* Note that get_user_pages_remote() requires the caller to hold a reference on
|
||||
* the task_struct (if non-NULL and if this API has tsk argument) and the mm_struct.
|
||||
* This will always be true when using current and current->mm. If the kernel passes
|
||||
@ -113,66 +138,55 @@ typedef int vm_fault_t;
|
||||
*/
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_PRESENT)
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages_remote
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas, locked)
|
||||
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages_remote(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
|
||||
#else
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
// NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_WRITE_FORCE
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
#if defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS)
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
return get_user_pages_remote(NULL, mm, start, nr_pages, write, force,
|
||||
pages, vmas);
|
||||
#elif defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_TSK_FLAGS_LOCKED)
|
||||
return get_user_pages_remote(tsk, mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#else
|
||||
// remaining defined(NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED)
|
||||
return get_user_pages_remote(mm, start, nr_pages, flags,
|
||||
pages, vmas, NULL);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_HAS_ARGS_FLAGS_LOCKED
|
||||
#else
|
||||
#if defined(NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE)
|
||||
#define NV_GET_USER_PAGES_REMOTE get_user_pages
|
||||
#else
|
||||
#include <linux/mm.h>
|
||||
#include <linux/sched.h>
|
||||
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
static inline long NV_GET_USER_PAGES_REMOTE(struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
int write,
|
||||
int force,
|
||||
unsigned int flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas)
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked)
|
||||
{
|
||||
unsigned int flags = 0;
|
||||
int write = flags & FOLL_WRITE;
|
||||
int force = flags & FOLL_FORCE;
|
||||
|
||||
if (write)
|
||||
flags |= FOLL_WRITE;
|
||||
if (force)
|
||||
flags |= FOLL_FORCE;
|
||||
|
||||
return get_user_pages(tsk, mm, start, nr_pages, flags, pages, vmas);
|
||||
return get_user_pages(NULL, mm, start, nr_pages, write, force, pages, vmas);
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#else
|
||||
#define NV_GET_USER_PAGES_REMOTE(mm, start, nr_pages, flags, pages, vmas, locked) \
|
||||
get_user_pages(NULL, mm, start, nr_pages, flags, pages, vmas)
|
||||
#endif // NV_GET_USER_PAGES_HAS_ARGS_TSK_WRITE_FORCE
|
||||
#endif // NV_GET_USER_PAGES_REMOTE_PRESENT
|
||||
|
||||
/*
|
||||
* The .virtual_address field was effectively renamed to .address, by these
|
||||
|
@ -27,17 +27,6 @@
|
||||
#include <linux/pci.h>
|
||||
#include "nv-linux.h"
|
||||
|
||||
#if defined(NV_DEV_IS_PCI_PRESENT)
|
||||
#define nv_dev_is_pci(dev) dev_is_pci(dev)
|
||||
#else
|
||||
/*
|
||||
* Non-PCI devices are only supported on kernels which expose the
|
||||
* dev_is_pci() function. For older kernels, we only support PCI
|
||||
* devices, hence returning true to take all the PCI code paths.
|
||||
*/
|
||||
#define nv_dev_is_pci(dev) (true)
|
||||
#endif
|
||||
|
||||
int nv_pci_register_driver(void);
|
||||
void nv_pci_unregister_driver(void);
|
||||
int nv_pci_count_devices(void);
|
||||
|
@ -78,13 +78,8 @@ static inline pgprot_t pgprot_modify_writecombine(pgprot_t old_prot)
|
||||
|
||||
#define NV_PGPROT_UNCACHED_DEVICE(old_prot) pgprot_noncached(old_prot)
|
||||
#if defined(NVCPU_AARCH64)
|
||||
#if defined(NV_MT_DEVICE_GRE_PRESENT)
|
||||
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
|
||||
PTE_ATTRINDX(MT_DEVICE_GRE))
|
||||
#else
|
||||
#define NV_PROT_WRITE_COMBINED_DEVICE (PROT_DEFAULT | PTE_PXN | PTE_UXN | \
|
||||
PTE_ATTRINDX(MT_DEVICE_nGnRE))
|
||||
#endif
|
||||
#define NV_PGPROT_WRITE_COMBINED_DEVICE(old_prot) \
|
||||
__pgprot_modify(old_prot, PTE_ATTRINDX_MASK, NV_PROT_WRITE_COMBINED_DEVICE)
|
||||
#define NV_PGPROT_WRITE_COMBINED(old_prot) NV_PGPROT_UNCACHED(old_prot)
|
||||
|
@ -74,21 +74,8 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
__entry; \
|
||||
})
|
||||
|
||||
/*
|
||||
* proc_mkdir_mode exists in Linux 2.6.9, but isn't exported until Linux 3.0.
|
||||
* Use the older interface instead unless the newer interface is necessary.
|
||||
*/
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
proc_mkdir_mode(name, mode, parent)
|
||||
#else
|
||||
# define NV_PROC_MKDIR_MODE(name, mode, parent) \
|
||||
({ \
|
||||
struct proc_dir_entry *__entry; \
|
||||
__entry = create_proc_entry(name, mode, parent); \
|
||||
__entry; \
|
||||
})
|
||||
#endif
|
||||
|
||||
#define NV_CREATE_PROC_DIR(name,parent) \
|
||||
({ \
|
||||
@ -104,16 +91,6 @@ typedef struct file_operations nv_proc_ops_t;
|
||||
#define NV_PDE_DATA(inode) PDE_DATA(inode)
|
||||
#endif
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
proc_remove(entry);
|
||||
#else
|
||||
# define NV_REMOVE_PROC_ENTRY(entry) \
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
#endif
|
||||
|
||||
void nv_procfs_unregister_all(struct proc_dir_entry *entry,
|
||||
struct proc_dir_entry *delimiter);
|
||||
#define NV_DEFINE_SINGLE_PROCFS_FILE_HELPER(name, lock) \
|
||||
static int nv_procfs_open_##name( \
|
||||
struct inode *inode, \
|
||||
|
@ -54,8 +54,6 @@ void nv_free_contig_pages (nv_alloc_t *);
|
||||
NV_STATUS nv_alloc_system_pages (nv_state_t *, nv_alloc_t *);
|
||||
void nv_free_system_pages (nv_alloc_t *);
|
||||
|
||||
void nv_address_space_init_once (struct address_space *mapping);
|
||||
|
||||
int nv_uvm_init (void);
|
||||
void nv_uvm_exit (void);
|
||||
NV_STATUS nv_uvm_suspend (void);
|
||||
|
@ -40,6 +40,7 @@
|
||||
#include <nvstatus.h>
|
||||
#include "nv_stdarg.h"
|
||||
#include <nv-caps.h>
|
||||
#include <nv-firmware.h>
|
||||
#include <nv-ioctl.h>
|
||||
#include <nvmisc.h>
|
||||
|
||||
@ -160,8 +161,14 @@ typedef enum _TEGRASOC_WHICH_CLK
|
||||
TEGRASOC_WHICH_CLK_MAUD,
|
||||
TEGRASOC_WHICH_CLK_AZA_2XBIT,
|
||||
TEGRASOC_WHICH_CLK_AZA_BIT,
|
||||
TEGRA234_CLK_MIPI_CAL,
|
||||
TEGRA234_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_UART_FST_MIPI_CAL,
|
||||
TEGRASOC_WHICH_CLK_SOR0_DIV,
|
||||
TEGRASOC_WHICH_CLK_DISP_ROOT,
|
||||
TEGRASOC_WHICH_CLK_HUB_ROOT,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISP,
|
||||
TEGRASOC_WHICH_CLK_PLLA_DISPHUB,
|
||||
TEGRASOC_WHICH_CLK_PLLA,
|
||||
TEGRASOC_WHICH_CLK_MAX, // TEGRASOC_WHICH_CLK_MAX is defined for boundary checks only.
|
||||
} TEGRASOC_WHICH_CLK;
|
||||
|
||||
@ -304,7 +311,7 @@ typedef struct nv_alloc_mapping_context_s {
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_SOC_IRQ_DISPLAY_TYPE,
|
||||
NV_SOC_IRQ_DISPLAY_TYPE = 0x1,
|
||||
NV_SOC_IRQ_DPAUX_TYPE,
|
||||
NV_SOC_IRQ_GPIO_TYPE,
|
||||
NV_SOC_IRQ_HDACODEC_TYPE,
|
||||
@ -368,6 +375,7 @@ typedef struct nv_state_t
|
||||
nv_aperture_t *mipical_regs;
|
||||
nv_aperture_t *fb, ud;
|
||||
nv_aperture_t *simregs;
|
||||
nv_aperture_t *emc_regs;
|
||||
|
||||
NvU32 num_dpaux_instance;
|
||||
NvU32 interrupt_line;
|
||||
@ -430,9 +438,6 @@ typedef struct nv_state_t
|
||||
/* Variable to force allocation of 32-bit addressable memory */
|
||||
NvBool force_dma32_alloc;
|
||||
|
||||
/* Variable to track if device has entered dynamic power state */
|
||||
NvBool dynamic_power_entered;
|
||||
|
||||
/* PCI power state should be D0 during system suspend */
|
||||
NvBool d0_state_in_suspend;
|
||||
|
||||
@ -465,6 +470,9 @@ typedef struct nv_state_t
|
||||
/* Check if NVPCF DSM function is implemented under NVPCF or GPU device scope */
|
||||
NvBool nvpcf_dsm_in_gpu_scope;
|
||||
|
||||
/* Bool to check if the device received a shutdown notification */
|
||||
NvBool is_shutdown;
|
||||
|
||||
} nv_state_t;
|
||||
|
||||
// These define need to be in sync with defines in system.h
|
||||
@ -473,6 +481,10 @@ typedef struct nv_state_t
|
||||
#define OS_TYPE_SUNOS 0x3
|
||||
#define OS_TYPE_VMWARE 0x4
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
#define NVFP_TYPE_REFCOUNTED 0x1
|
||||
#define NVFP_TYPE_REGISTERED 0x2
|
||||
|
||||
struct nv_file_private_t
|
||||
{
|
||||
NvHandle *handles;
|
||||
@ -482,6 +494,7 @@ struct nv_file_private_t
|
||||
|
||||
nv_file_private_t *ctl_nvfp;
|
||||
void *ctl_nvfp_priv;
|
||||
NvU32 register_or_refcount;
|
||||
};
|
||||
|
||||
// Forward define the gpu ops structures
|
||||
@ -513,8 +526,9 @@ typedef struct UvmGpuChannelResourceBindParams_tag *nvgpuChannelResourceBindPar
|
||||
typedef struct UvmGpuPagingChannelAllocParams_tag nvgpuPagingChannelAllocParams_t;
|
||||
typedef struct UvmGpuPagingChannel_tag *nvgpuPagingChannelHandle_t;
|
||||
typedef struct UvmGpuPagingChannelInfo_tag *nvgpuPagingChannelInfo_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64);
|
||||
typedef enum UvmPmaGpuMemoryType_tag nvgpuGpuMemoryType_t;
|
||||
typedef NV_STATUS (*nvPmaEvictPagesCallback)(void *, NvU32, NvU64 *, NvU32, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
typedef NV_STATUS (*nvPmaEvictRangeCallback)(void *, NvU64, NvU64, nvgpuGpuMemoryType_t);
|
||||
|
||||
/*
|
||||
* flags
|
||||
@ -566,12 +580,6 @@ typedef enum
|
||||
NV_POWER_STATE_RUNNING
|
||||
} nv_power_state_t;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_FIRMWARE_GSP,
|
||||
NV_FIRMWARE_GSP_LOG
|
||||
} nv_firmware_t;
|
||||
|
||||
#define NV_PRIMARY_VGA(nv) ((nv)->primary_vga)
|
||||
|
||||
#define NV_IS_CTL_DEVICE(nv) ((nv)->flags & NV_FLAG_CONTROL)
|
||||
@ -587,12 +595,6 @@ typedef enum
|
||||
#define NV_SOC_IS_ISO_IOMMU_PRESENT(nv) \
|
||||
((nv)->iso_iommu_present)
|
||||
|
||||
/*
|
||||
* NVIDIA ACPI event ID to be passed into the core NVIDIA driver for
|
||||
* AC/DC event.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_BATTERY_POWER_EVENT 0x8002
|
||||
|
||||
/*
|
||||
* GPU add/remove events
|
||||
*/
|
||||
@ -604,8 +606,6 @@ typedef enum
|
||||
* to core NVIDIA driver for ACPI events.
|
||||
*/
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DISPLAY_SWITCH_DEFAULT 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_AC 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_POWER_EVENT_BATTERY 1
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_UNDOCKED 0
|
||||
#define NV_SYSTEM_ACPI_EVENT_VALUE_DOCK_EVENT_DOCKED 1
|
||||
|
||||
@ -616,14 +616,18 @@ typedef enum
|
||||
#define NV_EVAL_ACPI_METHOD_NVIF 0x01
|
||||
#define NV_EVAL_ACPI_METHOD_WMMX 0x02
|
||||
|
||||
#define NV_I2C_CMD_READ 1
|
||||
#define NV_I2C_CMD_WRITE 2
|
||||
#define NV_I2C_CMD_SMBUS_READ 3
|
||||
#define NV_I2C_CMD_SMBUS_WRITE 4
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_WRITE 5
|
||||
#define NV_I2C_CMD_SMBUS_QUICK_READ 6
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_READ 7
|
||||
#define NV_I2C_CMD_SMBUS_BLOCK_WRITE 8
|
||||
typedef enum {
|
||||
NV_I2C_CMD_READ = 1,
|
||||
NV_I2C_CMD_WRITE,
|
||||
NV_I2C_CMD_SMBUS_READ,
|
||||
NV_I2C_CMD_SMBUS_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_WRITE,
|
||||
NV_I2C_CMD_SMBUS_QUICK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_READ,
|
||||
NV_I2C_CMD_SMBUS_BLOCK_WRITE,
|
||||
NV_I2C_CMD_BLOCK_READ,
|
||||
NV_I2C_CMD_BLOCK_WRITE
|
||||
} nv_i2c_cmd_t;
|
||||
|
||||
// Flags needed by OSAllocPagesNode
|
||||
#define NV_ALLOC_PAGES_NODE_NONE 0x0
|
||||
@ -636,27 +640,33 @@ typedef enum
|
||||
#define NV_GET_NV_STATE(pGpu) \
|
||||
(nv_state_t *)((pGpu) ? (pGpu)->pOsGpuInfo : NULL)
|
||||
|
||||
#define IS_REG_OFFSET(nv, offset, length) \
|
||||
(((offset) >= (nv)->regs->cpu_address) && \
|
||||
(((offset) + ((length)-1)) <= \
|
||||
(nv)->regs->cpu_address + ((nv)->regs->size-1)))
|
||||
static inline NvBool IS_REG_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((offset >= nv->regs->cpu_address) &&
|
||||
((offset + (length - 1)) <= (nv->regs->cpu_address + (nv->regs->size - 1))));
|
||||
}
|
||||
|
||||
#define IS_FB_OFFSET(nv, offset, length) \
|
||||
(((nv)->fb) && ((offset) >= (nv)->fb->cpu_address) && \
|
||||
(((offset) + ((length)-1)) <= (nv)->fb->cpu_address + ((nv)->fb->size-1)))
|
||||
static inline NvBool IS_FB_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->fb) && (offset >= nv->fb->cpu_address) &&
|
||||
((offset + (length - 1)) <= (nv->fb->cpu_address + (nv->fb->size - 1))));
|
||||
}
|
||||
|
||||
#define IS_UD_OFFSET(nv, offset, length) \
|
||||
(((nv)->ud.cpu_address != 0) && ((nv)->ud.size != 0) && \
|
||||
((offset) >= (nv)->ud.cpu_address) && \
|
||||
(((offset) + ((length)-1)) <= (nv)->ud.cpu_address + ((nv)->ud.size-1)))
|
||||
static inline NvBool IS_UD_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->ud.cpu_address != 0) && (nv->ud.size != 0) &&
|
||||
(offset >= nv->ud.cpu_address) &&
|
||||
((offset + (length - 1)) <= (nv->ud.cpu_address + (nv->ud.size - 1))));
|
||||
}
|
||||
|
||||
#define IS_IMEM_OFFSET(nv, offset, length) \
|
||||
(((nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) && \
|
||||
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) && \
|
||||
((offset) >= (nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) && \
|
||||
(((offset) + ((length) - 1)) <= \
|
||||
(nv)->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address + \
|
||||
((nv)->bars[NV_GPU_BAR_INDEX_IMEM].size - 1)))
|
||||
static inline NvBool IS_IMEM_OFFSET(nv_state_t *nv, NvU64 offset, NvU64 length)
|
||||
{
|
||||
return ((nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address != 0) &&
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size != 0) &&
|
||||
(offset >= nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address) &&
|
||||
((offset + (length - 1)) <= (nv->bars[NV_GPU_BAR_INDEX_IMEM].cpu_address +
|
||||
(nv->bars[NV_GPU_BAR_INDEX_IMEM].size - 1))));
|
||||
}
|
||||
|
||||
#define NV_RM_MAX_MSIX_LINES 8
|
||||
|
||||
@ -787,7 +797,7 @@ NV_STATUS NV_API_CALL nv_pci_trigger_recovery (nv_state_t *);
|
||||
NvBool NV_API_CALL nv_requires_dma_remap (nv_state_t *);
|
||||
|
||||
NvBool NV_API_CALL nv_is_rm_firmware_active(nv_state_t *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_t, const void **, NvU32 *);
|
||||
const void*NV_API_CALL nv_get_firmware(nv_state_t *, nv_firmware_type_t, nv_firmware_chip_family_t, const void **, NvU32 *);
|
||||
void NV_API_CALL nv_put_firmware(const void *);
|
||||
|
||||
nv_file_private_t* NV_API_CALL nv_get_file_private(NvS32, NvBool, void **);
|
||||
@ -828,6 +838,7 @@ NV_STATUS NV_API_CALL nv_acquire_fabric_mgmt_cap (int, int*);
|
||||
int NV_API_CALL nv_cap_drv_init(void);
|
||||
void NV_API_CALL nv_cap_drv_exit(void);
|
||||
NvBool NV_API_CALL nv_is_gpu_accessible(nv_state_t *);
|
||||
NvBool NV_API_CALL nv_match_gpu_os_info(nv_state_t *, void *);
|
||||
|
||||
NvU32 NV_API_CALL nv_get_os_type(void);
|
||||
|
||||
@ -916,11 +927,11 @@ NvBool NV_API_CALL rm_is_supported_pci_device(NvU8 pci_class,
|
||||
|
||||
void NV_API_CALL rm_i2c_remove_adapters (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_i2c_is_smbus_capable (nvidia_stack_t *, nv_state_t *, void *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, NvU8, NvU8, NvU8, NvU32, NvU8 *);
|
||||
NV_STATUS NV_API_CALL rm_i2c_transfer (nvidia_stack_t *, nv_state_t *, void *, nv_i2c_cmd_t, NvU8, NvU8, NvU32, NvU8 *);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_perform_version_check (nvidia_stack_t *, void *, NvU32);
|
||||
|
||||
NV_STATUS NV_API_CALL rm_system_event (nvidia_stack_t *, NvU32, NvU32);
|
||||
void NV_API_CALL rm_power_source_change_event (nvidia_stack_t *, NvU32);
|
||||
|
||||
void NV_API_CALL rm_disable_gpu_state_persistence (nvidia_stack_t *sp, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_p2p_init_mapping (nvidia_stack_t *, NvU64, NvU64 *, NvU64 *, NvU64 *, NvU64 *, NvU64, NvU64, NvU64, NvU64, void (*)(void *), void *);
|
||||
@ -944,6 +955,7 @@ void NV_API_CALL rm_kernel_rmapi_op(nvidia_stack_t *sp, void *ops_cmd);
|
||||
NvBool NV_API_CALL rm_get_device_remove_flag(nvidia_stack_t *sp, NvU32 gpu_id);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_copy_mmu_faults_unlocked(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_handle_mmu_faults(nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
NvBool NV_API_CALL rm_gpu_need_4k_page_isolation(nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_chipset_io_coherent(nv_stack_t *);
|
||||
NvBool NV_API_CALL rm_init_event_locks(nvidia_stack_t *, nv_state_t *);
|
||||
@ -969,12 +981,13 @@ const char* NV_API_CALL rm_get_dynamic_power_management_status(nvidia_stack_t *,
|
||||
const char* NV_API_CALL rm_get_gpu_gcx_support(nvidia_stack_t *, nv_state_t *, NvBool);
|
||||
|
||||
void NV_API_CALL rm_acpi_notify(nvidia_stack_t *, nv_state_t *, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_get_clientnvpcf_power_limits(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *);
|
||||
|
||||
NvBool NV_API_CALL rm_is_altstack_in_use(void);
|
||||
|
||||
/* vGPU VFIO specific functions */
|
||||
NV_STATUS NV_API_CALL nv_vgpu_create_request(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU32, NvU16 *, NvU32, NvBool *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_delete(nvidia_stack_t *, const NvU8 *, NvU16);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 **, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_ids(nvidia_stack_t *, nv_state_t *, NvU32 *, NvU32 *, NvBool, NvU8, NvBool);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_type_info(nvidia_stack_t *, nv_state_t *, NvU32, char *, int, NvU8);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_get_bar_info(nvidia_stack_t *, nv_state_t *, const NvU8 *, NvU64 *, NvU32, void *);
|
||||
NV_STATUS NV_API_CALL nv_vgpu_start(nvidia_stack_t *, const NvU8 *, void *, NvS32 *, NvU8 *, NvU32);
|
||||
@ -987,6 +1000,10 @@ NV_STATUS NV_API_CALL nv_get_usermap_access_params(nv_state_t*, nv_usermap_acces
|
||||
nv_soc_irq_type_t NV_API_CALL nv_get_current_irq_type(nv_state_t*);
|
||||
void NV_API_CALL nv_flush_coherent_cpu_cache_range(nv_state_t *nv, NvU64 cpu_virtual, NvU64 size);
|
||||
|
||||
#if defined(NV_VMWARE)
|
||||
const void* NV_API_CALL rm_get_firmware(nv_firmware_type_t fw_type, const void **fw_buf, NvU32 *fw_size);
|
||||
#endif
|
||||
|
||||
/* Callbacks should occur roughly every 10ms. */
|
||||
#define NV_SNAPSHOT_TIMER_HZ 100
|
||||
void NV_API_CALL nv_start_snapshot_timer(void (*snapshot_callback)(void *context));
|
||||
@ -998,6 +1015,16 @@ static inline const NvU8 *nv_get_cached_uuid(nv_state_t *nv)
|
||||
return nv->nv_uuid_cache.valid ? nv->nv_uuid_cache.uuid : NULL;
|
||||
}
|
||||
|
||||
/* nano second resolution timer callback structure */
|
||||
typedef struct nv_nano_timer nv_nano_timer_t;
|
||||
|
||||
/* nano timer functions */
|
||||
void NV_API_CALL nv_create_nano_timer(nv_state_t *, void *pTmrEvent, nv_nano_timer_t **);
|
||||
void NV_API_CALL nv_start_nano_timer(nv_state_t *nv, nv_nano_timer_t *, NvU64 timens);
|
||||
NV_STATUS NV_API_CALL rm_run_nano_timer_callback(nvidia_stack_t *, nv_state_t *, void *pTmrEvent);
|
||||
void NV_API_CALL nv_cancel_nano_timer(nv_state_t *, nv_nano_timer_t *);
|
||||
void NV_API_CALL nv_destroy_nano_timer(nv_state_t *nv, nv_nano_timer_t *);
|
||||
|
||||
#if defined(NVCPU_X86_64)
|
||||
|
||||
static inline NvU64 nv_rdtsc(void)
|
||||
|
@ -331,10 +331,14 @@ typedef NV_STATUS (*uvmPmaEvictPagesCallback)(void *callbackData,
|
||||
NvU64 *pPages,
|
||||
NvU32 count,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd);
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
// Mirrors pmaEvictRangeCb_t, see its documentation in pma.h.
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData, NvU64 physBegin, NvU64 physEnd);
|
||||
typedef NV_STATUS (*uvmPmaEvictRangeCallback)(void *callbackData,
|
||||
NvU64 physBegin,
|
||||
NvU64 physEnd,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfacePmaRegisterEvictionCallbacks
|
||||
|
@ -897,6 +897,16 @@ typedef struct UvmGpuAccessCntrConfig_tag
|
||||
NvU32 threshold;
|
||||
} UvmGpuAccessCntrConfig;
|
||||
|
||||
//
|
||||
// When modifying this enum, make sure they are compatible with the mirrored
|
||||
// MEMORY_PROTECTION enum in phys_mem_allocator.h.
|
||||
//
|
||||
typedef enum UvmPmaGpuMemoryType_tag
|
||||
{
|
||||
UVM_PMA_GPU_MEMORY_TYPE_UNPROTECTED = 0,
|
||||
UVM_PMA_GPU_MEMORY_TYPE_PROTECTED = 1
|
||||
} UVM_PMA_GPU_MEMORY_TYPE;
|
||||
|
||||
typedef UvmGpuChannelInfo gpuChannelInfo;
|
||||
typedef UvmGpuChannelAllocParams gpuChannelAllocParams;
|
||||
typedef UvmGpuCaps gpuCaps;
|
||||
|
@ -150,9 +150,7 @@ typedef struct NvSyncPointFenceRec {
|
||||
|* *|
|
||||
\***************************************************************************/
|
||||
|
||||
#if !defined(XAPIGEN) /* NvOffset is XAPIGEN builtin type, so skip typedef */
|
||||
typedef NvU64 NvOffset; /* GPU address */
|
||||
#endif
|
||||
|
||||
#define NvOffset_HI32(n) ((NvU32)(((NvU64)(n)) >> 32))
|
||||
#define NvOffset_LO32(n) ((NvU32)((NvU64)(n)))
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -29,6 +29,7 @@
|
||||
#include <nvlimits.h>
|
||||
|
||||
#define NVKMS_MAX_SUBDEVICES NV_MAX_SUBDEVICES
|
||||
#define NVKMS_MAX_HEADS_PER_DISP NV_MAX_HEADS
|
||||
|
||||
#define NVKMS_LEFT 0
|
||||
#define NVKMS_RIGHT 1
|
||||
@ -530,4 +531,78 @@ typedef struct {
|
||||
NvBool noncoherent;
|
||||
} NvKmsDispIOCoherencyModes;
|
||||
|
||||
enum NvKmsInputColorSpace {
|
||||
/* Unknown colorspace; no de-gamma will be applied */
|
||||
NVKMS_INPUT_COLORSPACE_NONE = 0,
|
||||
|
||||
/* Linear, Rec.709 [-0.5, 7.5) */
|
||||
NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR = 1,
|
||||
|
||||
/* PQ, Rec.2020 unity */
|
||||
NVKMS_INPUT_COLORSPACE_BT2100_PQ = 2,
|
||||
};
|
||||
|
||||
enum NvKmsOutputTf {
|
||||
/*
|
||||
* NVKMS itself won't apply any OETF (clients are still
|
||||
* free to provide a custom OLUT)
|
||||
*/
|
||||
NVKMS_OUTPUT_TF_NONE = 0,
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR = 1,
|
||||
NVKMS_OUTPUT_TF_PQ = 2,
|
||||
};
|
||||
|
||||
/*!
|
||||
* HDR Static Metadata Type1 Descriptor as per CEA-861.3 spec.
|
||||
* This is expected to match exactly with the spec.
|
||||
*/
|
||||
struct NvKmsHDRStaticMetadata {
|
||||
/*!
|
||||
* Color primaries of the data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} displayPrimaries[3];
|
||||
|
||||
/*!
|
||||
* White point of colorspace data.
|
||||
* These are coded as unsigned 16-bit values in units of 0.00002,
|
||||
* where 0x0000 represents zero and 0xC350 represents 1.0000.
|
||||
*/
|
||||
struct {
|
||||
NvU16 x, y;
|
||||
} whitePoint;
|
||||
|
||||
/**
|
||||
* Maximum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Minimum mastering display luminance.
|
||||
* This value is coded as an unsigned 16-bit value in units of
|
||||
* 0.0001 cd/m2, where 0x0001 represents 0.0001 cd/m2 and 0xFFFF
|
||||
* represents 6.5535 cd/m2.
|
||||
*/
|
||||
NvU16 minDisplayMasteringLuminance;
|
||||
|
||||
/*!
|
||||
* Maximum content light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxCLL;
|
||||
|
||||
/*!
|
||||
* Maximum frame-average light level.
|
||||
* This value is coded as an unsigned 16-bit value in units of 1 cd/m2,
|
||||
* where 0x0001 represents 1 cd/m2 and 0xFFFF represents 65535 cd/m2.
|
||||
*/
|
||||
NvU16 maxFALL;
|
||||
};
|
||||
|
||||
#endif /* NVKMS_API_TYPES_H */
|
||||
|
@ -86,8 +86,9 @@ enum NvKmsSurfaceMemoryFormat {
|
||||
NvKmsSurfaceMemoryFormatY12___V12U12_N420 = 32,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N444 = 33,
|
||||
NvKmsSurfaceMemoryFormatY8___U8___V8_N420 = 34,
|
||||
NvKmsSurfaceMemoryFormatRF16GF16BF16XF16 = 35,
|
||||
NvKmsSurfaceMemoryFormatMin = NvKmsSurfaceMemoryFormatI8,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatY8___U8___V8_N420,
|
||||
NvKmsSurfaceMemoryFormatMax = NvKmsSurfaceMemoryFormatRF16GF16BF16XF16,
|
||||
};
|
||||
|
||||
typedef struct NvKmsSurfaceMemoryFormatInfo {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2015-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -149,6 +149,7 @@ struct NvKmsKapiDeviceResourcesInfo {
|
||||
} caps;
|
||||
|
||||
NvU64 supportedSurfaceMemoryFormats[NVKMS_KAPI_LAYER_MAX];
|
||||
NvBool supportsHDR[NVKMS_KAPI_LAYER_MAX];
|
||||
};
|
||||
|
||||
#define NVKMS_KAPI_LAYER_MASK(layerType) (1 << (layerType))
|
||||
@ -218,6 +219,11 @@ struct NvKmsKapiLayerConfig {
|
||||
struct NvKmsRRParams rrParams;
|
||||
struct NvKmsKapiSyncpt syncptParams;
|
||||
|
||||
struct NvKmsHDRStaticMetadata hdrMetadata;
|
||||
NvBool hdrMetadataSpecified;
|
||||
|
||||
enum NvKmsOutputTf tf;
|
||||
|
||||
NvU8 minPresentInterval;
|
||||
NvBool tearing;
|
||||
|
||||
@ -226,6 +232,8 @@ struct NvKmsKapiLayerConfig {
|
||||
|
||||
NvS16 dstX, dstY;
|
||||
NvU16 dstWidth, dstHeight;
|
||||
|
||||
enum NvKmsInputColorSpace inputColorSpace;
|
||||
};
|
||||
|
||||
struct NvKmsKapiLayerRequestedConfig {
|
||||
@ -277,6 +285,8 @@ struct NvKmsKapiHeadModeSetConfig {
|
||||
NvKmsKapiDisplay displays[NVKMS_KAPI_MAX_CLONE_DISPLAYS];
|
||||
|
||||
struct NvKmsKapiDisplayMode mode;
|
||||
|
||||
NvBool vrrEnabled;
|
||||
};
|
||||
|
||||
struct NvKmsKapiHeadRequestedConfig {
|
||||
@ -368,6 +378,9 @@ struct NvKmsKapiDynamicDisplayParams {
|
||||
/* [OUT] Connection status */
|
||||
NvU32 connected;
|
||||
|
||||
/* [OUT] VRR status */
|
||||
NvBool vrrSupported;
|
||||
|
||||
/* [IN/OUT] EDID of connected monitor/ Input to override EDID */
|
||||
struct {
|
||||
NvU16 bufferSize;
|
||||
@ -484,6 +497,38 @@ struct NvKmsKapiFunctionsTable {
|
||||
*/
|
||||
void (*releaseOwnership)(struct NvKmsKapiDevice *device);
|
||||
|
||||
/*!
|
||||
* Grant modeset permissions for a display to fd. Only one (dispIndex, head,
|
||||
* display) is currently supported.
|
||||
*
|
||||
* \param [in] fd fd from opening /dev/nvidia-modeset.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \param [in] head head of display.
|
||||
*
|
||||
* \param [in] display The display to grant.
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*grantPermissions)
|
||||
(
|
||||
NvS32 fd,
|
||||
struct NvKmsKapiDevice *device,
|
||||
NvU32 head,
|
||||
NvKmsKapiDisplay display
|
||||
);
|
||||
|
||||
/*!
|
||||
* Revoke modeset permissions previously granted. This currently applies for all
|
||||
* previous grant requests for this device.
|
||||
*
|
||||
* \param [in] device A device returned by allocateDevice().
|
||||
*
|
||||
* \return NV_TRUE on success, NV_FALSE on failure.
|
||||
*/
|
||||
NvBool (*revokePermissions)(struct NvKmsKapiDevice *device);
|
||||
|
||||
/*!
|
||||
* Registers for notification, via
|
||||
* NvKmsKapiAllocateDeviceParams::eventCallback, of the events specified
|
||||
|
@ -234,12 +234,14 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (drf##_HIGH_FIELD)
|
||||
#define DRF_SHIFT(drf) ((drf##_LOW_FIELD) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) ((drf##_HIGH_FIELD) % 32U)
|
||||
#define DRF_SIZE(drf) ((drf##_HIGH_FIELD)-(drf##_LOW_FIELD)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU >> (31U - ((drf##_HIGH_FIELD) % 32U) + ((drf##_LOW_FIELD) % 32U)))
|
||||
#else
|
||||
#define DRF_BASE(drf) (NV_FALSE?drf) // much better
|
||||
#define DRF_EXTENT(drf) (NV_TRUE?drf) // much better
|
||||
#define DRF_SHIFT(drf) (((NvU32)DRF_BASE(drf)) % 32U)
|
||||
#define DRF_SHIFT_RT(drf) (((NvU32)DRF_EXTENT(drf)) % 32U)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31U - DRF_SHIFT_RT(drf) + DRF_SHIFT(drf)))
|
||||
#endif
|
||||
#define DRF_DEF(d,r,f,c) (((NvU32)(NV ## d ## r ## f ## c))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
@ -249,12 +251,12 @@ extern "C" {
|
||||
#define DRF_EXTENT(drf) (1?drf) // much better
|
||||
#define DRF_SHIFT(drf) ((DRF_ISBIT(0,drf)) % 32)
|
||||
#define DRF_SHIFT_RT(drf) ((DRF_ISBIT(1,drf)) % 32)
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
#define DRF_MASK(drf) (0xFFFFFFFFU>>(31-((DRF_ISBIT(1,drf)) % 32)+((DRF_ISBIT(0,drf)) % 32)))
|
||||
#define DRF_DEF(d,r,f,c) ((NV ## d ## r ## f ## c)<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#define DRF_NUM(d,r,f,n) (((n)&DRF_MASK(NV ## d ## r ## f))<<DRF_SHIFT(NV ## d ## r ## f))
|
||||
#endif
|
||||
#define DRF_SHIFTMASK(drf) (DRF_MASK(drf)<<(DRF_SHIFT(drf)))
|
||||
#define DRF_SIZE(drf) (DRF_EXTENT(drf)-DRF_BASE(drf)+1U)
|
||||
|
||||
#define DRF_VAL(d,r,f,v) (((v)>>DRF_SHIFT(NV ## d ## r ## f))&DRF_MASK(NV ## d ## r ## f))
|
||||
#endif
|
||||
@ -907,6 +909,16 @@ static NV_FORCEINLINE void *NV_NVUPTR_TO_PTR(NvUPtr address)
|
||||
return uAddr.p;
|
||||
}
|
||||
|
||||
// Get bit at pos (k) from x
|
||||
#define NV_BIT_GET(k, x) (((x) >> (k)) & 1)
|
||||
// Get bit at pos (n) from (hi) if >= 64, otherwise from (lo). This is paired with NV_BIT_SET_128 which sets the bit.
|
||||
#define NV_BIT_GET_128(n, lo, hi) (((n) < 64) ? NV_BIT_GET((n), (lo)) : NV_BIT_GET((n) - 64, (hi)))
|
||||
//
|
||||
// Set the bit at pos (b) for U64 which is < 128. Since the (b) can be >= 64, we need 2 U64 to store this.
|
||||
// Use (lo) if (b) is less than 64, and (hi) if >= 64.
|
||||
//
|
||||
#define NV_BIT_SET_128(b, lo, hi) { nvAssert( (b) < 128 ); if ( (b) < 64 ) (lo) |= NVBIT64(b); else (hi) |= NVBIT64( b & 0x3F ); }
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif //__cplusplus
|
||||
|
@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUS_H
|
||||
#define SDK_NVSTATUS_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
@ -125,6 +120,4 @@ const char *nvstatusToString(NV_STATUS nvStatusIn);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUS_H */
|
||||
|
@ -24,11 +24,6 @@
|
||||
#ifndef SDK_NVSTATUSCODES_H
|
||||
#define SDK_NVSTATUSCODES_H
|
||||
|
||||
/* XAPIGEN - this file is not suitable for (nor needed by) xapigen. */
|
||||
/* Rather than #ifdef out every such include in every sdk */
|
||||
/* file, punt here. */
|
||||
#if !defined(XAPIGEN) /* rest of file */
|
||||
|
||||
NV_STATUS_CODE(NV_OK, 0x00000000, "Success")
|
||||
NV_STATUS_CODE(NV_ERR_GENERIC, 0x0000FFFF, "Failure: Generic Error")
|
||||
|
||||
@ -153,6 +148,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_CLOCK_ERROR, 0x00000076, "Nvlink Clock
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_TRAINING_ERROR, 0x00000077, "Nvlink Training Error")
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_CONFIGURATION_ERROR, 0x00000078, "Nvlink Configuration Error")
|
||||
NV_STATUS_CODE(NV_ERR_RISCV_ERROR, 0x00000079, "Generic RISC-V assert or halt")
|
||||
NV_STATUS_CODE(NV_ERR_FABRIC_MANAGER_NOT_PRESENT, 0x0000007A, "Fabric Manager is not loaded")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
@ -164,6 +160,4 @@ NV_STATUS_CODE(NV_WARN_NOTHING_TO_DO, 0x00010006, "WARNING Noth
|
||||
NV_STATUS_CODE(NV_WARN_NULL_OBJECT, 0x00010007, "WARNING NULL object found")
|
||||
NV_STATUS_CODE(NV_WARN_OUT_OF_RANGE, 0x00010008, "WARNING value out of range")
|
||||
|
||||
#endif // XAPIGEN
|
||||
|
||||
#endif /* SDK_NVSTATUSCODES_H */
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -143,6 +143,14 @@ void NV_API_CALL os_free_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_semaphore (void *);
|
||||
NV_STATUS NV_API_CALL os_release_semaphore (void *);
|
||||
void* NV_API_CALL os_alloc_rwlock (void);
|
||||
void NV_API_CALL os_free_rwlock (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_acquire_rwlock_write (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_read (void *);
|
||||
NV_STATUS NV_API_CALL os_cond_acquire_rwlock_write(void *);
|
||||
void NV_API_CALL os_release_rwlock_read (void *);
|
||||
void NV_API_CALL os_release_rwlock_write (void *);
|
||||
NvBool NV_API_CALL os_semaphore_may_sleep (void);
|
||||
NV_STATUS NV_API_CALL os_get_version_info (os_version_info*);
|
||||
NvBool NV_API_CALL os_is_isr (void);
|
||||
|
@ -588,7 +588,9 @@ compile_test() {
|
||||
# is present.
|
||||
#
|
||||
# Added by commit 3c299dc22635 ("PCI: add
|
||||
# pci_get_domain_bus_and_slot function") in 2.6.33.
|
||||
# pci_get_domain_bus_and_slot function") in 2.6.33 but aarch64
|
||||
# support was added by commit d1e6dc91b532
|
||||
# ("arm64: Add architectural support for PCI") in 3.18-rc1
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
@ -649,7 +651,7 @@ compile_test() {
|
||||
#include <linux/cpu.h>
|
||||
void conftest_register_cpu_notifier(void) {
|
||||
register_cpu_notifier();
|
||||
}" > conftest$$.c
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_REGISTER_CPU_NOTIFIER_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
@ -665,7 +667,7 @@ compile_test() {
|
||||
#include <linux/cpu.h>
|
||||
void conftest_cpuhp_setup_state(void) {
|
||||
cpuhp_setup_state();
|
||||
}" > conftest$$.c
|
||||
}"
|
||||
compile_check_conftest "$CODE" "NV_CPUHP_SETUP_STATE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
@ -697,66 +699,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_IOREMAP_WC_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
file_operations)
|
||||
# 'ioctl' field removed by commit b19dd42faf41
|
||||
# ("bkl: Remove locked .ioctl file operation") in v2.6.36
|
||||
CODE="
|
||||
#include <linux/fs.h>
|
||||
int conftest_file_operations(void) {
|
||||
return offsetof(struct file_operations, ioctl);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FILE_OPERATIONS_HAS_IOCTL" "" "types"
|
||||
;;
|
||||
|
||||
sg_alloc_table)
|
||||
#
|
||||
# sg_alloc_table_from_pages added by commit efc42bc98058
|
||||
# ("scatterlist: add sg_alloc_table_from_pages function") in v3.6
|
||||
#
|
||||
CODE="
|
||||
#include <linux/scatterlist.h>
|
||||
void conftest_sg_alloc_table_from_pages(void) {
|
||||
sg_alloc_table_from_pages();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_SG_ALLOC_TABLE_FROM_PAGES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
efi_enabled)
|
||||
#
|
||||
# Added in 2.6.12 as a variable
|
||||
#
|
||||
# Determine if the efi_enabled symbol is present (as a variable),
|
||||
# or if the efi_enabled() function is present and how many
|
||||
# arguments it takes.
|
||||
#
|
||||
# Converted from a variable to a function by commit 83e68189745a
|
||||
# ("efi: Make 'efi_enabled' a function to query EFI facilities")
|
||||
# in v3.8
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#if defined(NV_LINUX_EFI_H_PRESENT)
|
||||
#include <linux/efi.h>
|
||||
#endif
|
||||
int conftest_efi_enabled(void) {
|
||||
return efi_enabled(0);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_EFI_ENABLED_PRESENT" | append_conftest "functions"
|
||||
echo "#define NV_EFI_ENABLED_ARGUMENT_COUNT 1" | append_conftest "functions"
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
else
|
||||
echo "#define NV_EFI_ENABLED_PRESENT" | append_conftest "symbols"
|
||||
return
|
||||
fi
|
||||
;;
|
||||
|
||||
dom0_kernel_present)
|
||||
# Add config parameter if running on DOM0.
|
||||
if [ -n "$VGX_BUILD" ]; then
|
||||
@ -967,6 +909,38 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MDEV_GET_TYPE_GROUP_ID_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
vfio_device_mig_state)
|
||||
#
|
||||
# Determine if vfio_device_mig_state enum is present or not
|
||||
#
|
||||
# Added by commit 115dcec65f61d ("vfio: Define device
|
||||
# migration protocol v2") in v5.18
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
#include <linux/vfio.h>
|
||||
enum vfio_device_mig_state device_state;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VFIO_DEVICE_MIG_STATE_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
vfio_migration_ops)
|
||||
#
|
||||
# Determine if vfio_migration_ops struct is present or not
|
||||
#
|
||||
# Added by commit 6e97eba8ad874 ("vfio: Split migration ops
|
||||
# from main device ops") in v6.0
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
#include <linux/vfio.h>
|
||||
struct vfio_migration_ops mig_ops;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VFIO_MIGRATION_OPS_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
mdev_parent)
|
||||
#
|
||||
# Determine if the struct mdev_parent type is present.
|
||||
@ -1051,6 +1025,40 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MDEV_PARENT_OPS_HAS_OPEN_DEVICE" "" "types"
|
||||
;;
|
||||
|
||||
mdev_parent_ops_has_device_driver)
|
||||
#
|
||||
# Determine if 'mdev_parent_ops' structure has 'device_driver' field.
|
||||
#
|
||||
# Added by commit 88a21f265ce5 ("vfio/mdev: Allow the mdev_parent_ops
|
||||
# to specify the device driver to bind) in v5.14 (2021-06-17)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
#include <linux/mdev.h>
|
||||
int conftest_mdev_parent_ops_has_device_driver(void) {
|
||||
return offsetof(struct mdev_parent_ops, device_driver);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MDEV_PARENT_OPS_HAS_DEVICE_DRIVER" "" "types"
|
||||
;;
|
||||
|
||||
mdev_driver_has_supported_type_groups)
|
||||
#
|
||||
# Determine if 'mdev_driver' structure has 'supported_type_groups' field.
|
||||
#
|
||||
# Added by commit 6b42f491e17c ("vfio/mdev: Remove mdev_parent_ops)
|
||||
# in v5.19 (2022-04-11)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
#include <linux/mdev.h>
|
||||
int conftest_mdev_driver_has_supported_type_groups(void) {
|
||||
return offsetof(struct mdev_driver, supported_type_groups);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MDEV_DRIVER_HAS_SUPPORTED_TYPE_GROUPS" "" "types"
|
||||
;;
|
||||
|
||||
pci_irq_vector_helpers)
|
||||
#
|
||||
# Determine if pci_alloc_irq_vectors(), pci_free_irq_vectors()
|
||||
@ -1105,6 +1113,61 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_VFIO_DEVICE_MIGRATION_HAS_START_PFN" "" "types"
|
||||
;;
|
||||
|
||||
vfio_uninit_group_dev)
|
||||
#
|
||||
# Determine if vfio_uninit_group_dev() function is present or not.
|
||||
#
|
||||
# Added by commit ae03c3771b8c (vfio: Introduce a vfio_uninit_group_dev()
|
||||
# API call) in v5.15
|
||||
#
|
||||
CODE="
|
||||
#include <linux/vfio.h>
|
||||
void conftest_vfio_uninit_group_dev() {
|
||||
vfio_uninit_group_dev();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VFIO_UNINIT_GROUP_DEV_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
|
||||
vfio_pci_core_available)
|
||||
# Determine if VFIO_PCI_CORE is available
|
||||
#
|
||||
# Added by commit 7fa005caa35e ("vfio/pci: Introduce
|
||||
# vfio_pci_core.ko") in v5.16 (2021-08-26)
|
||||
#
|
||||
|
||||
CODE="
|
||||
#if defined(NV_LINUX_VFIO_PCI_CORE_H_PRESENT)
|
||||
#include <linux/vfio_pci_core.h>
|
||||
#endif
|
||||
|
||||
#if !defined(CONFIG_VFIO_PCI_CORE) && !defined(CONFIG_VFIO_PCI_CORE_MODULE)
|
||||
#error VFIO_PCI_CORE not enabled
|
||||
#endif
|
||||
void conftest_vfio_pci_core_available(void) {
|
||||
struct vfio_pci_core_device dev;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VFIO_PCI_CORE_PRESENT" "" "generic"
|
||||
;;
|
||||
|
||||
vfio_register_emulated_iommu_dev)
|
||||
#
|
||||
# Determine if vfio_register_emulated_iommu_dev() function is present or not.
|
||||
#
|
||||
# Added by commit c68ea0d00ad8 (vfio: simplify iommu group allocation
|
||||
# for mediated devices) in v5.16
|
||||
#
|
||||
CODE="
|
||||
#include <linux/vfio.h>
|
||||
void conftest_vfio_register_emulated_iommu_dev() {
|
||||
vfio_register_emulated_iommu_dev();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VFIO_REGISTER_EMULATED_IOMMU_DEV_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_available)
|
||||
# Determine if the DRM subsystem is usable
|
||||
CODE="
|
||||
@ -1192,22 +1255,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_GET_NUM_PHYSPAGES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
proc_remove)
|
||||
#
|
||||
# Determine if the proc_remove() function is present.
|
||||
#
|
||||
# Added by commit a8ca16ea7b0a ("proc: Supply a function to
|
||||
# remove a proc entry by PDE") in v3.10
|
||||
#
|
||||
CODE="
|
||||
#include <linux/proc_fs.h>
|
||||
void conftest_proc_remove(void) {
|
||||
proc_remove();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PROC_REMOVE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
backing_dev_info)
|
||||
#
|
||||
# Determine if the 'address_space' structure has
|
||||
@ -1225,77 +1272,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_ADDRESS_SPACE_HAS_BACKING_DEV_INFO" "" "types"
|
||||
;;
|
||||
|
||||
address_space)
|
||||
#
|
||||
# Determine if the 'address_space' structure has
|
||||
# a 'tree_lock' field of type rwlock_t.
|
||||
#
|
||||
# 'tree_lock' was changed to spinlock_t by commit 19fd6231279b
|
||||
# ("mm: spinlock tree_lock") in v2.6.27
|
||||
#
|
||||
# It was removed altogether by commit b93b016313b3 ("page cache:
|
||||
# use xa_lock") in v4.17
|
||||
#
|
||||
CODE="
|
||||
#include <linux/fs.h>
|
||||
int conftest_address_space(void) {
|
||||
struct address_space as;
|
||||
rwlock_init(&as.tree_lock);
|
||||
return offsetof(struct address_space, tree_lock);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ADDRESS_SPACE_HAS_RWLOCK_TREE_LOCK" "" "types"
|
||||
;;
|
||||
|
||||
address_space_init_once)
|
||||
#
|
||||
# Determine if address_space_init_once is present.
|
||||
#
|
||||
# Added by commit 2aa15890f3c1 ("mm: prevent concurrent
|
||||
# unmap_mapping_range() on the same inode") in v2.6.38
|
||||
#
|
||||
# If not present, it will be defined in uvm-linux.h.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/fs.h>
|
||||
void conftest_address_space_init_once(void) {
|
||||
address_space_init_once();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ADDRESS_SPACE_INIT_ONCE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
kuid_t)
|
||||
#
|
||||
# Determine if the 'kuid_t' type is present.
|
||||
#
|
||||
# Added by commit 7a4e7408c5ca ("userns: Add kuid_t and kgid_t
|
||||
# and associated infrastructure in uidgid.h") in v3.5
|
||||
#
|
||||
CODE="
|
||||
#include <linux/sched.h>
|
||||
kuid_t conftest_kuid_t;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_KUID_T_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
pm_vt_switch_required)
|
||||
#
|
||||
# Determine if the pm_vt_switch_required() function is present.
|
||||
#
|
||||
# Added by commit f43f627d2f17 ("PM: make VT switching to the
|
||||
# suspend console optional v3") in v3.10
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pm.h>
|
||||
void conftest_pm_vt_switch_required(void) {
|
||||
pm_vt_switch_required();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PM_VT_SWITCH_REQUIRED_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
xen_ioemu_inject_msi)
|
||||
# Determine if the xen_ioemu_inject_msi() function is present.
|
||||
CODE="
|
||||
@ -1473,39 +1449,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_NVHOST_DMA_FENCE_UNPACK_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
of_get_property)
|
||||
#
|
||||
# Determine if the of_get_property function is present.
|
||||
#
|
||||
# Support for kernels without CONFIG_OF defined added by commit
|
||||
# 89272b8c0d42 ("dt: add empty of_get_property for non-dt") in v3.1
|
||||
#
|
||||
# Test if linux/of.h header file inclusion is successful or not and
|
||||
# define/undefine NV_LINUX_OF_H_USABLE depending upon status of inclusion
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/of.h>
|
||||
" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_LINUX_OF_H_USABLE" | append_conftest "generic"
|
||||
CODE="
|
||||
#include <linux/of.h>
|
||||
void conftest_of_get_property() {
|
||||
of_get_property();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_OF_GET_PROPERTY_PRESENT" "" "functions"
|
||||
else
|
||||
echo "#undef NV_LINUX_OF_H_USABLE" | append_conftest "generic"
|
||||
echo "#undef NV_OF_GET_PROPERTY_PRESENT" | append_conftest "functions"
|
||||
fi
|
||||
;;
|
||||
|
||||
of_find_node_by_phandle)
|
||||
#
|
||||
# Determine if the of_find_node_by_phandle function is present.
|
||||
@ -1594,50 +1537,28 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PNV_PCI_GET_NPU_DEV_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
kernel_write)
|
||||
kernel_write_has_pointer_pos_arg)
|
||||
#
|
||||
# Determine if the function kernel_write() is present.
|
||||
#
|
||||
# First exported by commit 7bb307e894d5 ("export kernel_write(),
|
||||
# convert open-coded instances") in v3.9
|
||||
# Determine the pos argument type, which was changed by
|
||||
# commit e13ec939e96b1 (fs: fix kernel_write prototype) on
|
||||
# 9/1/2017.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/fs.h>
|
||||
void conftest_kernel_write(void) {
|
||||
kernel_write();
|
||||
ssize_t kernel_write(struct file *file, const void *buf,
|
||||
size_t count, loff_t *pos)
|
||||
{
|
||||
return 0;
|
||||
}" > conftest$$.c;
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#undef NV_KERNEL_WRITE_PRESENT" | append_conftest "function"
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_KERNEL_WRITE_HAS_POINTER_POS_ARG" | append_conftest "function"
|
||||
rm -f conftest$$.o
|
||||
else
|
||||
echo "#define NV_KERNEL_WRITE_PRESENT" | append_conftest "function"
|
||||
|
||||
#
|
||||
# Determine the pos argument type, which was changed by
|
||||
# commit e13ec939e96b1 (fs: fix kernel_write prototype) on
|
||||
# 9/1/2017.
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/fs.h>
|
||||
ssize_t kernel_write(struct file *file, const void *buf,
|
||||
size_t count, loff_t *pos)
|
||||
{
|
||||
return 0;
|
||||
}" > conftest$$.c;
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_KERNEL_WRITE_HAS_POINTER_POS_ARG" | append_conftest "function"
|
||||
rm -f conftest$$.o
|
||||
else
|
||||
echo "#undef NV_KERNEL_WRITE_HAS_POINTER_POS_ARG" | append_conftest "function"
|
||||
fi
|
||||
echo "#undef NV_KERNEL_WRITE_HAS_POINTER_POS_ARG" | append_conftest "function"
|
||||
fi
|
||||
;;
|
||||
|
||||
@ -2004,6 +1925,7 @@ compile_test() {
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
@ -2013,7 +1935,7 @@ compile_test() {
|
||||
else
|
||||
echo "#undef NV_DRM_UNIVERSAL_PLANE_INIT_HAS_FORMAT_MODIFIERS_ARG" | append_conftest "types"
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRMP_H_PRESENT)
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
@ -2033,35 +1955,10 @@ compile_test() {
|
||||
0, /* unsigned int format_count */
|
||||
DRM_PLANE_TYPE_PRIMARY,
|
||||
NULL); /* const char *name */
|
||||
}" > conftest$$.c
|
||||
}"
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
|
||||
echo "#define NV_DRM_UNIVERSAL_PLANE_INIT_HAS_NAME_ARG" | append_conftest "types"
|
||||
else
|
||||
echo "#undef NV_DRM_UNIVERSAL_PLANE_INIT_HAS_NAME_ARG" | append_conftest "types"
|
||||
fi
|
||||
compile_check_conftest "$CODE" "NV_DRM_UNIVERSAL_PLANE_INIT_HAS_NAME_ARG" "" "types"
|
||||
fi
|
||||
|
||||
;;
|
||||
|
||||
vzalloc)
|
||||
#
|
||||
# Determine if the vzalloc function is present
|
||||
#
|
||||
# Added by commit e1ca7788dec6 ("mm: add vzalloc() and
|
||||
# vzalloc_node() helpers") in v2.6.37 (2010-10-26)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/vmalloc.h>
|
||||
void conftest_vzalloc() {
|
||||
vzalloc();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_VZALLOC_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_driver_has_set_busid)
|
||||
@ -2186,29 +2083,14 @@ compile_test() {
|
||||
echo "#error wait_on_bit_lock() conftest failed!" | append_conftest "functions"
|
||||
;;
|
||||
|
||||
bitmap_clear)
|
||||
#
|
||||
# Determine if the bitmap_clear function is present
|
||||
#
|
||||
# Added by commit c1a2a962a2ad ("bitmap: introduce bitmap_set,
|
||||
# bitmap_clear, bitmap_find_next_zero_area") in v2.6.33
|
||||
# (2009-12-15)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/bitmap.h>
|
||||
void conftest_bitmap_clear() {
|
||||
bitmap_clear();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_BITMAP_CLEAR_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pci_stop_and_remove_bus_device)
|
||||
#
|
||||
# Determine if the pci_stop_and_remove_bus_device() function is present.
|
||||
#
|
||||
# Added by commit 210647af897a ("PCI: Rename pci_remove_bus_device
|
||||
# to pci_stop_and_remove_bus_device") in v3.4 (2012-02-25)
|
||||
# to pci_stop_and_remove_bus_device") in v3.4 (2012-02-25) but
|
||||
# aarch64 support was added by commit d1e6dc91b532
|
||||
# ("arm64: Add architectural support for PCI") in v3.18-rc1.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
@ -2220,23 +2102,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_STOP_AND_REMOVE_BUS_DEVICE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pci_remove_bus_device)
|
||||
#
|
||||
# Determine if the pci_remove_bus_device() function is present.
|
||||
# Added before Linux-2.6.12-rc2 2005-04-16
|
||||
# Because we support builds on non-PCI platforms, we still need
|
||||
# to check for this function's presence.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#include <linux/pci.h>
|
||||
void conftest_pci_remove_bus_device() {
|
||||
pci_remove_bus_device();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PCI_REMOVE_BUS_DEVICE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_helper_mode_fill_fb_struct | drm_helper_mode_fill_fb_struct_has_const_mode_cmd_arg)
|
||||
#
|
||||
# Determine if the drm_helper_mode_fill_fb_struct function takes
|
||||
@ -2334,23 +2199,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_DEV_HAS_ATS_ENABLED" "" "types"
|
||||
;;
|
||||
|
||||
mt_device_gre)
|
||||
#
|
||||
# Determine if MT_DEVICE_GRE flag is present.
|
||||
#
|
||||
# MT_DEVICE_GRE flag is removed by commit 58cc6b72a21274
|
||||
# ("arm64: mm: Remove unused support for Device-GRE memory type") in v5.14-rc1
|
||||
# (2021-06-01).
|
||||
#
|
||||
CODE="
|
||||
#include <asm/memory.h>
|
||||
unsigned int conftest_mt_device_gre(void) {
|
||||
return MT_DEVICE_GRE;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MT_DEVICE_GRE_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
get_user_pages)
|
||||
#
|
||||
# Conftest for get_user_pages()
|
||||
@ -2668,20 +2516,146 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
usleep_range)
|
||||
pin_user_pages)
|
||||
#
|
||||
# Determine if the function usleep_range() is present.
|
||||
# Determine if the function pin_user_pages() is present.
|
||||
# Presence of pin_user_pages() also implies the presence of
|
||||
# unpin-user_page(). Both were added in the v5.6-rc1
|
||||
#
|
||||
# Added by commit 5e7f5a178bba ("timer: Added usleep_range timer")
|
||||
# in v2.6.36 (2010-08-04)
|
||||
# pin_user_pages() was added by commit eddb1c228f7951d399240
|
||||
# ("mm/gup: introduce pin_user_pages*() and FOLL_PIN") in
|
||||
# v5.6-rc1 (2020-01-30)
|
||||
|
||||
# conftest #1: check if pin_user_pages() is available
|
||||
# return if not available.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/delay.h>
|
||||
void conftest_usleep_range(void) {
|
||||
usleep_range();
|
||||
#include <linux/mm.h>
|
||||
void conftest_pin_user_pages(void) {
|
||||
pin_user_pages();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_USLEEP_RANGE_PRESENT" "" "functions"
|
||||
compile_check_conftest "$CODE" "NV_PIN_USER_PAGES_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
pin_user_pages_remote)
|
||||
# Determine if the function pin_user_pages_remote() is present
|
||||
#
|
||||
# pin_user_pages_remote() was added by commit eddb1c228f7951d399240
|
||||
# ("mm/gup: introduce pin_user_pages*() and FOLL_PIN")
|
||||
# in v5.6 (2020-01-30)
|
||||
|
||||
# pin_user_pages_remote() removed 'tsk' parameter by
|
||||
# commit 64019a2e467a ("mm/gup: remove task_struct pointer for
|
||||
# all gup code") in v5.9-rc1 (2020-08-11).
|
||||
|
||||
#
|
||||
# This function sets the NV_PIN_USER_PAGES_REMOTE_* macros as per
|
||||
# the below passing conftest's
|
||||
#
|
||||
set_pin_user_pages_remote_defines () {
|
||||
if [ "$1" = "" ]; then
|
||||
echo "#undef NV_PIN_USER_PAGES_REMOTE_PRESENT" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_PIN_USER_PAGES_REMOTE_PRESENT" | append_conftest "functions"
|
||||
fi
|
||||
|
||||
if [ "$1" = "NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK" ]; then
|
||||
echo "#define NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK" | append_conftest "functions"
|
||||
else
|
||||
echo "#undef NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK" | append_conftest "functions"
|
||||
fi
|
||||
}
|
||||
|
||||
# conftest #1: check if pin_user_pages_remote() is available
|
||||
# return if not available.
|
||||
# Fall through to conftest #2 if it is present
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/mm.h>
|
||||
void conftest_pin_user_pages_remote(void) {
|
||||
pin_user_pages_remote();
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
set_pin_user_pages_remote_defines ""
|
||||
rm -f conftest$$.o
|
||||
return
|
||||
fi
|
||||
|
||||
# conftest #2: Check if pin_user_pages_remote() has tsk argument
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/mm.h>
|
||||
long pin_user_pages_remote(struct task_struct *tsk,
|
||||
struct mm_struct *mm,
|
||||
unsigned long start,
|
||||
unsigned long nr_pages,
|
||||
unsigned int gup_flags,
|
||||
struct page **pages,
|
||||
struct vm_area_struct **vmas,
|
||||
int *locked) {
|
||||
return 0;
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
set_pin_user_pages_remote_defines "NV_PIN_USER_PAGES_REMOTE_HAS_ARGS_TSK"
|
||||
rm -f conftest$$.o
|
||||
else
|
||||
set_pin_user_pages_remote_defines "NV_PIN_USER_PAGES_REMOTE_PRESENT"
|
||||
fi
|
||||
;;
|
||||
|
||||
vfio_pin_pages)
|
||||
#
|
||||
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
|
||||
# argument instead of "struct device *"
|
||||
#
|
||||
# Replaced "struct device *" with "struct vfio_device *" by commit
|
||||
# 8e432bb015b6c ("vfio/mdev: Pass in a struct vfio_device * to
|
||||
# vfio_pin/unpin_pages()") in v5.19
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/pci.h>
|
||||
#include <linux/vfio.h>
|
||||
int vfio_pin_pages(struct vfio_device *device,
|
||||
unsigned long *user_pfn,
|
||||
int npage,
|
||||
int prot,
|
||||
unsigned long *phys_pfn) {
|
||||
return 0;
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
echo "#define NV_VFIO_PIN_PAGES_HAS_VFIO_DEVICE_ARG" | append_conftest "functions"
|
||||
rm -f conftest$$.o
|
||||
else
|
||||
echo "#undef NV_VFIO_PIN_PAGES_HAS_VFIO_DEVICE_ARG" | append_conftest "functions"
|
||||
fi
|
||||
;;
|
||||
|
||||
pci_driver_has_driver_managed_dma)
|
||||
#
|
||||
# Determine if "struct pci_driver" has .driver_managed_dma member.
|
||||
#
|
||||
# Added by commit 512881eacfa7 ("bus: platform,amba,fsl-mc,PCI:
|
||||
# Add device DMA ownership management") in v5.19
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
int conftest_pci_driver_has_driver_managed_dma(void) {
|
||||
return offsetof(struct pci_driver, driver_managed_dma);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PCI_DRIVER_HAS_DRIVER_MANAGED_DMA" "" "types"
|
||||
;;
|
||||
|
||||
radix_tree_empty)
|
||||
@ -2751,6 +2725,130 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_MASTER_DROP_HAS_FROM_RELEASE_ARG" "" "types"
|
||||
;;
|
||||
|
||||
drm_connector_lookup)
|
||||
#
|
||||
# Determine if function drm_connector_lookup() is present.
|
||||
#
|
||||
# Added by commit b164d31f50b2 ("drm/modes: add connector reference
|
||||
# counting. (v2)") in v4.7 (2016-05-04), when it replaced
|
||||
# drm_connector_find().
|
||||
#
|
||||
# It was originally added in drm_crtc.h, then moved to
|
||||
# drm_connector.h by commit 522171951761
|
||||
# ("drm: Extract drm_connector.[hc]") in v4.9 (2016-08-12)
|
||||
#
|
||||
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_CRTC_H_PRESENT)
|
||||
#include <drm/drm_crtc.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
|
||||
#include <drm/drm_connector.h>
|
||||
#endif
|
||||
void conftest_drm_connector_lookup(void) {
|
||||
drm_connector_lookup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_LOOKUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_connector_put)
|
||||
#
|
||||
# Determine if function drm_connector_put() is present.
|
||||
#
|
||||
# Added by commit ad09360750af ("drm: Introduce
|
||||
# drm_connector_{get,put}()") in v4.12 (2017-02-28),
|
||||
# when it replaced drm_connector_unreference() that
|
||||
# was added with NV_DRM_CONNECTOR_LOOKUP_PRESENT.
|
||||
#
|
||||
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
|
||||
#include <drm/drm_connector.h>
|
||||
#endif
|
||||
void conftest_drm_connector_put(void) {
|
||||
drm_connector_put();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_PUT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_modeset_lock_all_end)
|
||||
#
|
||||
# Determine the number of arguments of the
|
||||
# DRM_MODESET_LOCK_ALL_END() macro.
|
||||
#
|
||||
# DRM_MODESET_LOCK_ALL_END() is added with two arguments by commit
|
||||
# b7ea04d299c7 (drm: drm: Add DRM_MODESET_LOCK_BEGIN/END helpers)
|
||||
# in v5.0 (2018-11-29). The definition and prototype is changed to
|
||||
# also take the third argument drm_device, by commit 77ef38574beb
|
||||
# (drm/modeset-lock: Take the modeset BKL for legacy drivers)
|
||||
# in v5.9 (2020-08-17).
|
||||
#
|
||||
DRM_MODESET_3_COMPILED=0
|
||||
DRM_MODESET_2_COMPILED=0
|
||||
DRM_MODESET_INCLUDES="
|
||||
#if defined(NV_DRM_DRM_DEVICE_H_PRESENT)
|
||||
#include <drm/drm_device.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_DRV_H_PRESENT)
|
||||
#include <drm/drm_drv.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_MODESET_LOCK_H_PRESENT)
|
||||
#include <drm/drm_modeset_lock.h>
|
||||
#endif"
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
$DRM_MODESET_INCLUDES
|
||||
|
||||
void conftest_drm_modeset_lock_all_end(
|
||||
struct drm_device *dev,
|
||||
struct drm_modeset_acquire_ctx ctx,
|
||||
int ret) {
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
|
||||
DRM_MODESET_LOCK_ALL_END(dev, ctx, ret);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
DRM_MODESET_3_COMPILED=1
|
||||
rm -f conftest$$.o
|
||||
fi
|
||||
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
$DRM_MODESET_INCLUDES
|
||||
|
||||
void conftest_drm_modeset_lock_all_end(
|
||||
struct drm_device *dev,
|
||||
struct drm_modeset_acquire_ctx ctx,
|
||||
int ret) {
|
||||
DRM_MODESET_LOCK_ALL_BEGIN(dev, ctx, 0, ret);
|
||||
DRM_MODESET_LOCK_ALL_END(ctx, ret);
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
DRM_MODESET_2_COMPILED=1
|
||||
rm -f conftest$$.o
|
||||
fi
|
||||
|
||||
# If the macro is undefined, both code snippets will still compile,
|
||||
# so we need to check both and make sure only one compiles successfully.
|
||||
if [ "$DRM_MODESET_3_COMPILED" = "1" ] &&
|
||||
[ "$DRM_MODESET_2_COMPILED" = "0" ]; then
|
||||
echo "#define NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT 3" | append_conftest "functions"
|
||||
elif [ "$DRM_MODESET_3_COMPILED" = "0" ] &&
|
||||
[ "$DRM_MODESET_2_COMPILED" = "1" ]; then
|
||||
echo "#define NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT 2" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_DRM_MODESET_LOCK_ALL_END_ARGUMENT_COUNT 0" | append_conftest "functions"
|
||||
fi
|
||||
;;
|
||||
|
||||
drm_atomic_state_ref_counting)
|
||||
#
|
||||
# Determine if functions drm_atomic_state_get/put() are
|
||||
@ -3038,23 +3136,6 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
kthread_create_on_node)
|
||||
#
|
||||
# Determine if kthread_create_on_node is available
|
||||
#
|
||||
# kthread_create_on_node was added in by commit 207205a2ba26
|
||||
# ("kthread: NUMA aware kthread_create_on_node()") in v2.6.39
|
||||
# (2011-03-22).
|
||||
#
|
||||
CODE="
|
||||
#include <linux/kthread.h>
|
||||
void kthread_create_on_node_conftest(void) {
|
||||
(void)kthread_create_on_node();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_KTHREAD_CREATE_ON_NODE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
cpumask_of_node)
|
||||
#
|
||||
# Determine whether cpumask_of_node is available.
|
||||
@ -3397,8 +3478,16 @@ compile_test() {
|
||||
# Note that drm_connector.h by introduced by commit 522171951761
|
||||
# ("drm: Extract drm_connector.[hc]") in v4.9 (2016-08-12)
|
||||
#
|
||||
# Note: up to 4.9 function was provided by drm_crtc.h by commit
|
||||
# f453ba046074 in 2.6.29 (2008-12-29)
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
|
||||
#include <drm/drm_connector.h>
|
||||
#endif
|
||||
#if defined(NV_DRM_DRM_CRTC_H_PRESENT)
|
||||
#include <drm/drm_crtc.h>
|
||||
#endif
|
||||
void conftest_drm_connector_funcs_have_mode_in_name(void) {
|
||||
drm_mode_connector_attach_encoder();
|
||||
}"
|
||||
@ -3406,21 +3495,25 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_FUNCS_HAVE_MODE_IN_NAME" "" "functions"
|
||||
;;
|
||||
|
||||
|
||||
node_states_n_memory)
|
||||
drm_connector_has_vrr_capable_property)
|
||||
#
|
||||
# Determine if the N_MEMORY constant exists.
|
||||
# Determine if drm_connector_attach_vrr_capable_property and
|
||||
# drm_connector_set_vrr_capable_property is present
|
||||
#
|
||||
# Added by commit 8219fc48adb3 ("mm: node_states: introduce
|
||||
# N_MEMORY") in v3.8 (2012-12-12).
|
||||
# Added by commit ba1b0f6c73d4ea1390f0d5381f715ffa20c75f09 ("drm:
|
||||
# Add vrr_capable property to the drm connector") in v5.0-rc1
|
||||
# (2018-11-28)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/nodemask.h>
|
||||
int conftest_node_states_n_memory(void) {
|
||||
return N_MEMORY;
|
||||
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
|
||||
#include <drm/drm_connector.h>
|
||||
#endif
|
||||
|
||||
void conftest_drm_connector_has_vrr_capable_property(void) {
|
||||
drm_connector_attach_vrr_capable_property();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_NODE_STATES_N_MEMORY_PRESENT" "" "types"
|
||||
compile_check_conftest "$CODE" "NV_DRM_CONNECTOR_HAS_VRR_CAPABLE_PROPERTY" "" "functions"
|
||||
;;
|
||||
|
||||
vm_fault_t)
|
||||
@ -3615,92 +3708,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PM_RUNTIME_AVAILABLE" "" "generic"
|
||||
;;
|
||||
|
||||
device_driver_of_match_table)
|
||||
#
|
||||
# Determine if the device_driver struct has an of_match_table member.
|
||||
#
|
||||
# of_match_table was added by commit 597b9d1e44e9 ("drivercore:
|
||||
# Add of_match_table to the common device drivers") in v2.6.35
|
||||
# (2010-04-13).
|
||||
#
|
||||
CODE="
|
||||
#include <linux/device.h>
|
||||
int conftest_device_driver_of_match_table(void) {
|
||||
return offsetof(struct device_driver, of_match_table);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DEVICE_DRIVER_OF_MATCH_TABLE_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
device_of_node)
|
||||
#
|
||||
# Determine if the device struct has an of_node member.
|
||||
#
|
||||
# of_node member was added by commit d706c1b05027 ("driver-core:
|
||||
# Add device node pointer to struct device") in v2.6.35
|
||||
# (2010-04-13).
|
||||
#
|
||||
CODE="
|
||||
#include <linux/device.h>
|
||||
int conftest_device_of_node(void) {
|
||||
return offsetof(struct device, of_node);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DEVICE_OF_NODE_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
dev_is_pci)
|
||||
#
|
||||
# Determine if the dev_is_pci() macro is present.
|
||||
#
|
||||
# dev_is_pci() macro was added by commit fb8a0d9d1bfd ("pci: Add
|
||||
# SR-IOV convenience functions and macros") in v2.6.34
|
||||
# (2010-02-10).
|
||||
#
|
||||
CODE="
|
||||
#include <linux/pci.h>
|
||||
void conftest_dev_is_pci(void) {
|
||||
if(dev_is_pci()) {}
|
||||
}
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DEV_IS_PCI_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
of_find_matching_node)
|
||||
#
|
||||
# Determine if the of_find_matching_node() function is present.
|
||||
#
|
||||
# Test if linux/of.h header file inclusion is successful or not and
|
||||
# define/undefine NV_LINUX_OF_H_USABLE depending upon status of inclusion.
|
||||
#
|
||||
# of_find_matching_node was added by commit 283029d16a88
|
||||
# ("[POWERPC] Add of_find_matching_node() helper function") in
|
||||
# v2.6.25 (2008-01-09).
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/of.h>
|
||||
" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_LINUX_OF_H_USABLE" | append_conftest "generic"
|
||||
CODE="
|
||||
#include <linux/of.h>
|
||||
void conftest_of_find_matching_node() {
|
||||
of_find_matching_node();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_OF_FIND_MATCHING_NODE_PRESENT" "" "functions"
|
||||
else
|
||||
echo "#undef NV_LINUX_OF_H_USABLE" | append_conftest "generic"
|
||||
echo "#undef NV_OF_FIND_MATCHING_NODE_PRESENT" | append_conftest "functions"
|
||||
fi
|
||||
;;
|
||||
|
||||
dma_direct_map_resource)
|
||||
#
|
||||
# Determine whether dma_is_direct() exists.
|
||||
@ -3857,7 +3864,7 @@ compile_test() {
|
||||
#include <drm/drmP.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_H_PRESENT)
|
||||
#if defined(NV_DRM_DRM_CONNECTOR_H_PRESENT)
|
||||
#include <drm/drm_connector.h>
|
||||
#endif
|
||||
|
||||
@ -4009,6 +4016,26 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_CRTC_STATE_HAS_PAGEFLIP_FLAGS" "" "types"
|
||||
;;
|
||||
|
||||
drm_crtc_state_has_vrr_enabled)
|
||||
#
|
||||
# Determine if 'drm_crtc_state' structure has a
|
||||
# 'vrr_enabled' field.
|
||||
#
|
||||
# Added by commit 1398958cfd8d331342d657d37151791dd7256b40 ("drm:
|
||||
# Add vrr_enabled property to drm CRTC") in v5.0-rc1 (2018-11-28)
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_CRTC_H_PRESENT)
|
||||
#include <drm/drm_crtc.h>
|
||||
#endif
|
||||
|
||||
int conftest_drm_crtc_state_has_vrr_enabled(void) {
|
||||
return offsetof(struct drm_crtc_state, vrr_enabled);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_CRTC_STATE_HAS_VRR_ENABLED" "" "types"
|
||||
;;
|
||||
|
||||
ktime_get_raw_ts64)
|
||||
#
|
||||
# Determine if ktime_get_raw_ts64() is present
|
||||
@ -4146,36 +4173,6 @@ compile_test() {
|
||||
fi
|
||||
;;
|
||||
|
||||
hlist_for_each_entry)
|
||||
#
|
||||
# Determine how many arguments hlist_for_each_entry takes.
|
||||
#
|
||||
# Changed by commit b67bfe0d42c ("hlist: drop the node parameter
|
||||
# from iterators") in v3.9 (2013-02-28)
|
||||
#
|
||||
echo "$CONFTEST_PREAMBLE
|
||||
#include <linux/list.h>
|
||||
void conftest_hlist_for_each_entry(void) {
|
||||
struct hlist_head *head;
|
||||
struct dummy
|
||||
{
|
||||
struct hlist_node hlist;
|
||||
};
|
||||
struct dummy *pos;
|
||||
hlist_for_each_entry(pos, head, hlist) {}
|
||||
}" > conftest$$.c
|
||||
|
||||
$CC $CFLAGS -c conftest$$.c > /dev/null 2>&1
|
||||
rm -f conftest$$.c
|
||||
|
||||
if [ -f conftest$$.o ]; then
|
||||
rm -f conftest$$.o
|
||||
echo "#define NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT 3" | append_conftest "functions"
|
||||
else
|
||||
echo "#define NV_HLIST_FOR_EACH_ENTRY_ARGUMENT_COUNT 4" | append_conftest "functions"
|
||||
fi
|
||||
;;
|
||||
|
||||
drm_vma_offset_exact_lookup_locked)
|
||||
#
|
||||
# Determine if the drm_vma_offset_exact_lookup_locked() function
|
||||
@ -4533,38 +4530,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"
|
||||
;;
|
||||
|
||||
set_close_on_exec)
|
||||
#
|
||||
# __set_close_on_exec(() was added by
|
||||
# commit 1dce27c5aa67 ("Wrap accesses to the fd_sets")
|
||||
# in v3.4-rc1 (2012-02-19)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#include <linux/fdtable.h>
|
||||
void conftest_set_close_on_exec(void) {
|
||||
__set_close_on_exec();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_SET_CLOSE_ON_EXEC_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
iterate_fd)
|
||||
#
|
||||
# iterate_fd() was added by
|
||||
# commit c3c073f808b2 ("new helper: iterate_fd()")
|
||||
# in v3.7-rc1 (2012-09-26)
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#include <linux/fdtable.h>
|
||||
void conftest_iterate_fd(void) {
|
||||
iterate_fd();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ITERATE_FD_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
seq_read_iter)
|
||||
#
|
||||
# Determine if seq_read_iter() is present
|
||||
@ -4599,23 +4564,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PCI_CLASS_MULTIMEDIA_HD_AUDIO_PRESENT" "" "generic"
|
||||
;;
|
||||
|
||||
sg_page_iter_page)
|
||||
#
|
||||
# Determine if sg_page_iter_page() is present
|
||||
#
|
||||
# sg_page_iter_page() was added by commit 2db76d7c3c6db
|
||||
# ("lib/scatterlist: sg_page_iter: support sg lists w/o backing
|
||||
# pages") in v3.10-rc1 (2013-05-11).
|
||||
#
|
||||
CODE="
|
||||
#include <linux/scatterlist.h>
|
||||
void conftest_sg_page_iter_page(void) {
|
||||
sg_page_iter_page();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_SG_PAGE_ITER_PAGE_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
unsafe_follow_pfn)
|
||||
#
|
||||
# Determine if unsafe_follow_pfn() is present.
|
||||
@ -5294,6 +5242,48 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_PLATFORM_IRQ_COUNT_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
devm_clk_bulk_get_all)
|
||||
#
|
||||
# Determine if devm_clk_bulk_get_all() function is present
|
||||
#
|
||||
# Added by commit f08c2e286 ("clk: add managed version of clk_bulk_get_all")
|
||||
#
|
||||
CODE="
|
||||
#if defined(NV_LINUX_CLK_H_PRESENT)
|
||||
#include <linux/clk.h>
|
||||
#endif
|
||||
void conftest_devm_clk_bulk_get_all(void)
|
||||
{
|
||||
devm_clk_bulk_get_all();
|
||||
}
|
||||
"
|
||||
compile_check_conftest "$CODE" "NV_DEVM_CLK_BULK_GET_ALL_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
mmget_not_zero)
|
||||
#
|
||||
# Determine if mmget_not_zero() function is present
|
||||
#
|
||||
# mmget_not_zero() function was added by commit
|
||||
# d2005e3f41d4f9299e2df6a967c8beb5086967a9 ("userfaultfd: don't pin
|
||||
# the user memory in userfaultfd_file_create()") in v4.7
|
||||
# (2016-05-20) in linux/sched.h but then moved to linux/sched/mm.h
|
||||
# by commit 68e21be2916b359fd8afb536c1911dc014cfd03e
|
||||
# ("sched/headers: Move task->mm handling methods to
|
||||
# <linux/sched/mm.h>") in v4.11 (2017-02-01).
|
||||
CODE="
|
||||
#if defined(NV_LINUX_SCHED_MM_H_PRESENT)
|
||||
#include <linux/sched/mm.h>
|
||||
#elif defined(NV_LINUX_SCHED_H_PRESENT)
|
||||
#include <linux/sched.h>
|
||||
#endif
|
||||
void conftest_mmget_not_zero(void) {
|
||||
mmget_not_zero();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MMGET_NOT_ZERO_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
dma_resv_add_fence)
|
||||
#
|
||||
# Determine if the dma_resv_add_fence() function is present.
|
||||
@ -5377,7 +5367,7 @@ compile_test() {
|
||||
# Determine if 'num_registered_fb' variable is present.
|
||||
#
|
||||
# 'num_registered_fb' was removed by commit 5727dcfd8486
|
||||
# ("fbdev: Make registered_fb[] private to fbmem.c) for
|
||||
# ("fbdev: Make registered_fb[] private to fbmem.c") for
|
||||
# v5.20 linux-next (2022-07-27).
|
||||
#
|
||||
CODE="
|
||||
@ -5389,6 +5379,31 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_NUM_REGISTERED_FB_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
acpi_video_backlight_use_native)
|
||||
#
|
||||
# Determine if acpi_video_backlight_use_native() function is present
|
||||
#
|
||||
# acpi_video_backlight_use_native was added by commit 2600bfa3df99
|
||||
# (ACPI: video: Add acpi_video_backlight_use_native() helper) for
|
||||
# v6.0 (2022-08-17). Note: the include directive for <linux/types.h>
|
||||
# in this conftest is necessary in order to support kernels between
|
||||
# commit 0b9f7d93ca61 ("ACPI / i915: ignore firmware requests for
|
||||
# backlight change") for v3.16 (2014-07-07) and commit 3bd6bce369f5
|
||||
# ("ACPI / video: Port to new backlight interface selection API")
|
||||
# for v4.2 (2015-07-16). Kernels within this range use the 'bool'
|
||||
# type and the related 'false' value in <acpi/video.h> without first
|
||||
# including the definitions of that type and value.
|
||||
#
|
||||
CODE="
|
||||
#include <linux/types.h>
|
||||
#include <acpi/video.h>
|
||||
void conftest_acpi_video_backglight_use_native(void) {
|
||||
acpi_video_backlight_use_native(0);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE" "" "functions"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit.
|
||||
#
|
||||
|
@ -118,6 +118,11 @@ __nv_drm_detect_encoder(struct NvKmsKapiDynamicDisplayParams *pDetectParams,
|
||||
return false;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_CONNECTOR_HAS_VRR_CAPABLE_PROPERTY)
|
||||
drm_connector_attach_vrr_capable_property(&nv_connector->base);
|
||||
drm_connector_set_vrr_capable_property(&nv_connector->base, pDetectParams->vrrSupported ? true : false);
|
||||
#endif
|
||||
|
||||
if (pDetectParams->connected) {
|
||||
if (!pDetectParams->overrideEdid && pDetectParams->edid.bufferSize) {
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -46,6 +46,35 @@
|
||||
#include <linux/nvhost.h>
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
static int
|
||||
nv_drm_atomic_replace_property_blob_from_id(struct drm_device *dev,
|
||||
struct drm_property_blob **blob,
|
||||
uint64_t blob_id,
|
||||
ssize_t expected_size)
|
||||
{
|
||||
struct drm_property_blob *new_blob = NULL;
|
||||
|
||||
if (blob_id != 0) {
|
||||
new_blob = drm_property_lookup_blob(dev, blob_id);
|
||||
if (new_blob == NULL) {
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if ((expected_size > 0) &&
|
||||
(new_blob->length != expected_size)) {
|
||||
drm_property_blob_put(new_blob);
|
||||
return -EINVAL;
|
||||
}
|
||||
}
|
||||
|
||||
drm_property_replace_blob(blob, new_blob);
|
||||
drm_property_blob_put(new_blob);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
static void nv_drm_plane_destroy(struct drm_plane *plane)
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
@ -84,9 +113,6 @@ cursor_plane_req_config_update(struct drm_plane *plane,
|
||||
{
|
||||
struct nv_drm_plane *nv_plane = to_nv_plane(plane);
|
||||
struct NvKmsKapiCursorRequestedConfig old_config = *req_config;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
cursor_req_config_disable(req_config);
|
||||
@ -186,7 +212,6 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(plane_state);
|
||||
int ret = 0;
|
||||
|
||||
if (plane_state->fb == NULL) {
|
||||
plane_req_config_disable(req_config);
|
||||
@ -309,6 +334,9 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
nv_plane->defaultCompositionMode;
|
||||
#endif
|
||||
|
||||
req_config->config.inputColorSpace =
|
||||
nv_drm_plane_state->input_colorspace;
|
||||
|
||||
req_config->config.syncptParams.preSyncptSpecified = false;
|
||||
req_config->config.syncptParams.postSyncptRequested = false;
|
||||
|
||||
@ -320,10 +348,10 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
#if defined(NV_NVHOST_DMA_FENCE_UNPACK_PRESENT)
|
||||
if (plane_state->fence != NULL) {
|
||||
ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
int ret = nvhost_dma_fence_unpack(
|
||||
plane_state->fence,
|
||||
&req_config->config.syncptParams.preSyncptId,
|
||||
&req_config->config.syncptParams.preSyncptValue);
|
||||
if (ret != 0) {
|
||||
return ret;
|
||||
}
|
||||
@ -339,6 +367,60 @@ plane_req_config_update(struct drm_plane *plane,
|
||||
#endif
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (nv_drm_plane_state->hdr_output_metadata != NULL) {
|
||||
struct hdr_output_metadata *hdr_metadata =
|
||||
nv_drm_plane_state->hdr_output_metadata->data;
|
||||
struct hdr_metadata_infoframe *info_frame =
|
||||
&hdr_metadata->hdmi_metadata_type1;
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
uint32_t i;
|
||||
|
||||
if (hdr_metadata->metadata_type != HDMI_STATIC_METADATA_TYPE1) {
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported Metadata Type");
|
||||
return -1;
|
||||
}
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(info_frame->display_primaries); i ++) {
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].x =
|
||||
info_frame->display_primaries[i].x;
|
||||
req_config->config.hdrMetadata.displayPrimaries[i].y =
|
||||
info_frame->display_primaries[i].y;
|
||||
}
|
||||
|
||||
req_config->config.hdrMetadata.whitePoint.x =
|
||||
info_frame->white_point.x;
|
||||
req_config->config.hdrMetadata.whitePoint.y =
|
||||
info_frame->white_point.y;
|
||||
req_config->config.hdrMetadata.maxDisplayMasteringLuminance =
|
||||
info_frame->max_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.minDisplayMasteringLuminance =
|
||||
info_frame->min_display_mastering_luminance;
|
||||
req_config->config.hdrMetadata.maxCLL =
|
||||
info_frame->max_cll;
|
||||
req_config->config.hdrMetadata.maxFALL =
|
||||
info_frame->max_fall;
|
||||
|
||||
req_config->config.hdrMetadataSpecified = true;
|
||||
|
||||
switch (info_frame->eotf) {
|
||||
case HDMI_EOTF_SMPTE_ST2084:
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_PQ;
|
||||
break;
|
||||
case HDMI_EOTF_TRADITIONAL_GAMMA_SDR:
|
||||
req_config->config.tf =
|
||||
NVKMS_OUTPUT_TF_TRADITIONAL_GAMMA_SDR;
|
||||
break;
|
||||
default:
|
||||
NV_DRM_DEV_LOG_ERR(nv_dev, "Unsupported EOTF");
|
||||
return -1;
|
||||
}
|
||||
} else {
|
||||
req_config->config.hdrMetadataSpecified = false;
|
||||
req_config->config.tf = NVKMS_OUTPUT_TF_NONE;
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Unconditionally mark the surface as changed, even if nothing changed,
|
||||
* so that we always get a flip event: a DRM client may flip with
|
||||
@ -509,9 +591,21 @@ static int nv_drm_plane_atomic_set_property(
|
||||
nv_drm_plane_state->fd_user_ptr = u64_to_user_ptr(val);
|
||||
#endif
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
nv_drm_plane_state->input_colorspace = val;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
return nv_drm_atomic_replace_property_blob_from_id(
|
||||
nv_dev->dev,
|
||||
&nv_drm_plane_state->hdr_output_metadata,
|
||||
val,
|
||||
sizeof(struct hdr_output_metadata));
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static int nv_drm_plane_atomic_get_property(
|
||||
@ -521,12 +615,26 @@ static int nv_drm_plane_atomic_get_property(
|
||||
uint64_t *val)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
|
||||
if (property == nv_dev->nv_out_fence_property) {
|
||||
return 0;
|
||||
} else {
|
||||
return -EINVAL;
|
||||
} else if (property == nv_dev->nv_input_colorspace_property) {
|
||||
*val = nv_drm_plane_state->input_colorspace;
|
||||
return 0;
|
||||
}
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
else if (property == nv_dev->nv_hdr_output_metadata_property) {
|
||||
const struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state_const(state);
|
||||
*val = nv_drm_plane_state->hdr_output_metadata ?
|
||||
nv_drm_plane_state->hdr_output_metadata->base.id : 0;
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
static struct drm_plane_state *
|
||||
@ -544,6 +652,14 @@ nv_drm_plane_atomic_duplicate_state(struct drm_plane *plane)
|
||||
__drm_atomic_helper_plane_duplicate_state(plane, &nv_plane_state->base);
|
||||
|
||||
nv_plane_state->fd_user_ptr = nv_old_plane_state->fd_user_ptr;
|
||||
nv_plane_state->input_colorspace = nv_old_plane_state->input_colorspace;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_plane_state->hdr_output_metadata = nv_old_plane_state->hdr_output_metadata;
|
||||
if (nv_plane_state->hdr_output_metadata) {
|
||||
drm_property_blob_get(nv_plane_state->hdr_output_metadata);
|
||||
}
|
||||
#endif
|
||||
|
||||
return &nv_plane_state->base;
|
||||
}
|
||||
@ -557,6 +673,12 @@ static inline void __nv_drm_plane_atomic_destroy_state(
|
||||
#else
|
||||
__drm_atomic_helper_plane_destroy_state(state);
|
||||
#endif
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct nv_drm_plane_state *nv_drm_plane_state =
|
||||
to_nv_drm_plane_state(state);
|
||||
drm_property_blob_put(nv_drm_plane_state->hdr_output_metadata);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void nv_drm_plane_atomic_destroy_state(
|
||||
@ -803,7 +925,8 @@ static const struct drm_crtc_helper_funcs nv_crtc_helper_funcs = {
|
||||
};
|
||||
|
||||
static void nv_drm_plane_install_properties(
|
||||
struct drm_plane *plane)
|
||||
struct drm_plane *plane,
|
||||
NvBool supportsHDR)
|
||||
{
|
||||
struct nv_drm_device *nv_dev = to_nv_device(plane->dev);
|
||||
|
||||
@ -811,6 +934,19 @@ static void nv_drm_plane_install_properties(
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_out_fence_property, 0);
|
||||
}
|
||||
|
||||
if (nv_dev->nv_input_colorspace_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_input_colorspace_property,
|
||||
NVKMS_INPUT_COLORSPACE_NONE);
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
if (supportsHDR && nv_dev->nv_hdr_output_metadata_property) {
|
||||
drm_object_attach_property(
|
||||
&plane->base, nv_dev->nv_hdr_output_metadata_property, 0);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
@ -990,7 +1126,9 @@ nv_drm_plane_create(struct drm_device *dev,
|
||||
drm_plane_helper_add(plane, &nv_plane_helper_funcs);
|
||||
|
||||
if (plane_type != DRM_PLANE_TYPE_CURSOR) {
|
||||
nv_drm_plane_install_properties(plane);
|
||||
nv_drm_plane_install_properties(
|
||||
plane,
|
||||
pResInfo->supportsHDR[layer_idx]);
|
||||
}
|
||||
|
||||
__nv_drm_plane_create_alpha_blending_properties(
|
||||
@ -1141,11 +1279,13 @@ void nv_drm_enumerate_crtcs_and_planes(
|
||||
}
|
||||
|
||||
for (layer = 0; layer < pResInfo->numLayers[i]; layer++) {
|
||||
struct drm_plane *overlay_plane = NULL;
|
||||
|
||||
if (layer == NVKMS_KAPI_LAYER_PRIMARY_IDX) {
|
||||
continue;
|
||||
}
|
||||
|
||||
struct drm_plane *overlay_plane =
|
||||
overlay_plane =
|
||||
nv_drm_plane_create(nv_dev->dev,
|
||||
DRM_PLANE_TYPE_OVERLAY,
|
||||
layer,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2016-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -205,6 +205,10 @@ static inline struct nv_drm_plane *to_nv_plane(struct drm_plane *plane)
|
||||
struct nv_drm_plane_state {
|
||||
struct drm_plane_state base;
|
||||
s32 __user *fd_user_ptr;
|
||||
enum NvKmsInputColorSpace input_colorspace;
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property_blob *hdr_output_metadata;
|
||||
#endif
|
||||
};
|
||||
|
||||
static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_state *state)
|
||||
@ -212,6 +216,11 @@ static inline struct nv_drm_plane_state *to_nv_drm_plane_state(struct drm_plane_
|
||||
return container_of(state, struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline const struct nv_drm_plane_state *to_nv_drm_plane_state_const(const struct drm_plane_state *state)
|
||||
{
|
||||
return container_of(state, const struct nv_drm_plane_state, base);
|
||||
}
|
||||
|
||||
static inline struct nv_drm_crtc *to_nv_crtc(struct drm_crtc *crtc)
|
||||
{
|
||||
if (crtc == NULL) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015-2016, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -86,6 +86,23 @@
|
||||
|
||||
static struct nv_drm_device *dev_list = NULL;
|
||||
|
||||
static const char* nv_get_input_colorspace_name(
|
||||
enum NvKmsInputColorSpace colorSpace)
|
||||
{
|
||||
switch (colorSpace) {
|
||||
case NVKMS_INPUT_COLORSPACE_NONE:
|
||||
return "None";
|
||||
case NVKMS_INPUT_COLORSPACE_SCRGB_LINEAR:
|
||||
return "IEC 61966-2-2 linear FP";
|
||||
case NVKMS_INPUT_COLORSPACE_BT2100_PQ:
|
||||
return "ITU-R BT.2100-PQ YCbCr";
|
||||
default:
|
||||
/* We shoudn't hit this */
|
||||
WARN_ON("Unsupported input colorspace");
|
||||
return "None";
|
||||
}
|
||||
};
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
|
||||
static void nv_drm_output_poll_changed(struct drm_device *dev)
|
||||
@ -332,6 +349,15 @@ static void nv_drm_enumerate_encoders_and_connectors
|
||||
*/
|
||||
static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
{
|
||||
struct drm_prop_enum_list enum_list[3] = { };
|
||||
int i, len = 0;
|
||||
|
||||
for (i = 0; i < 3; i++) {
|
||||
enum_list[len].type = i;
|
||||
enum_list[len].name = nv_get_input_colorspace_name(i);
|
||||
len++;
|
||||
}
|
||||
|
||||
#if defined(NV_LINUX_NVHOST_H_PRESENT) && defined(CONFIG_TEGRA_GRHOST)
|
||||
if (!nv_dev->supportsSyncpts) {
|
||||
return 0;
|
||||
@ -345,6 +371,23 @@ static int nv_drm_create_properties(struct nv_drm_device *nv_dev)
|
||||
}
|
||||
#endif
|
||||
|
||||
nv_dev->nv_input_colorspace_property =
|
||||
drm_property_create_enum(nv_dev->dev, 0, "NV_INPUT_COLORSPACE",
|
||||
enum_list, len);
|
||||
if (nv_dev->nv_input_colorspace_property == NULL) {
|
||||
NV_DRM_LOG_ERR("Failed to create NV_INPUT_COLORSPACE property");
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
nv_dev->nv_hdr_output_metadata_property =
|
||||
drm_property_create(nv_dev->dev, DRM_MODE_PROP_BLOB,
|
||||
"NV_HDR_STATIC_METADATA", 0);
|
||||
if (nv_dev->nv_hdr_output_metadata_property == NULL) {
|
||||
return -ENOMEM;
|
||||
}
|
||||
#endif
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2019, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2019-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -40,9 +40,16 @@ static const u32 nvkms_to_drm_format[] = {
|
||||
[NvKmsSurfaceMemoryFormatR5G6B5] = DRM_FORMAT_RGB565,
|
||||
[NvKmsSurfaceMemoryFormatA8R8G8B8] = DRM_FORMAT_ARGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8R8G8B8] = DRM_FORMAT_XRGB8888,
|
||||
[NvKmsSurfaceMemoryFormatX8B8G8R8] = DRM_FORMAT_XBGR8888,
|
||||
[NvKmsSurfaceMemoryFormatA2B10G10R10] = DRM_FORMAT_ABGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatX2B10G10R10] = DRM_FORMAT_XBGR2101010,
|
||||
[NvKmsSurfaceMemoryFormatA8B8G8R8] = DRM_FORMAT_ABGR8888,
|
||||
#if defined(DRM_FORMAT_ABGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16AF16] = DRM_FORMAT_ABGR16161616F,
|
||||
#endif
|
||||
#if defined(DRM_FORMAT_XBGR16161616F)
|
||||
[NvKmsSurfaceMemoryFormatRF16GF16BF16XF16] = DRM_FORMAT_XBGR16161616F,
|
||||
#endif
|
||||
|
||||
[NvKmsSurfaceMemoryFormatY8_U8__Y8_V8_N422] = DRM_FORMAT_YUYV,
|
||||
[NvKmsSurfaceMemoryFormatU8_Y8__V8_Y8_N422] = DRM_FORMAT_UYVY,
|
||||
|
@ -113,7 +113,6 @@ static vm_fault_t __nv_drm_gem_user_memory_handle_vma_fault(
|
||||
page_offset = vmf->pgoff - drm_vma_node_start(&gem->vma_node);
|
||||
|
||||
BUG_ON(page_offset > nv_user_memory->pages_count);
|
||||
|
||||
ret = vm_insert_page(vma, address, nv_user_memory->pages[page_offset]);
|
||||
switch (ret) {
|
||||
case 0:
|
||||
|
@ -93,8 +93,6 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
{
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct page **user_pages;
|
||||
const int write = 1;
|
||||
const int force = 0;
|
||||
int pages_pinned;
|
||||
|
||||
user_pages = nv_drm_calloc(pages_count, sizeof(*user_pages));
|
||||
@ -105,7 +103,7 @@ int nv_drm_lock_user_pages(unsigned long address,
|
||||
|
||||
nv_mmap_read_lock(mm);
|
||||
|
||||
pages_pinned = NV_GET_USER_PAGES(address, pages_count, write, force,
|
||||
pages_pinned = NV_PIN_USER_PAGES(address, pages_count, FOLL_WRITE,
|
||||
user_pages, NULL);
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
||||
@ -123,7 +121,7 @@ failed:
|
||||
int i;
|
||||
|
||||
for (i = 0; i < pages_pinned; i++) {
|
||||
put_page(user_pages[i]);
|
||||
NV_UNPIN_USER_PAGE(user_pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -138,8 +136,7 @@ void nv_drm_unlock_user_pages(unsigned long pages_count, struct page **pages)
|
||||
|
||||
for (i = 0; i < pages_count; i++) {
|
||||
set_page_dirty_lock(pages[i]);
|
||||
|
||||
put_page(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
|
||||
nv_drm_free(pages);
|
||||
@ -174,12 +171,7 @@ static void __exit nv_linux_drm_exit(void)
|
||||
module_init(nv_linux_drm_init);
|
||||
module_exit(nv_linux_drm_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
@ -93,9 +93,6 @@ static bool __will_generate_flip_event(struct drm_crtc *crtc,
|
||||
to_nv_crtc_state(new_crtc_state);
|
||||
struct drm_plane_state *old_plane_state = NULL;
|
||||
struct drm_plane *plane = NULL;
|
||||
struct drm_plane *primary_plane = crtc->primary;
|
||||
bool primary_event = false;
|
||||
bool overlay_event = false;
|
||||
int i;
|
||||
|
||||
if (!old_crtc_state->active && !new_crtc_state->active) {
|
||||
@ -274,6 +271,9 @@ nv_drm_atomic_apply_modeset_config(struct drm_device *dev,
|
||||
|
||||
nv_new_crtc_state->nv_flip = NULL;
|
||||
}
|
||||
#if defined(NV_DRM_CRTC_STATE_HAS_VRR_ENABLED)
|
||||
requested_config->headRequestedConfig[nv_crtc->head].modeSetConfig.vrrEnabled = new_crtc_state->vrr_enabled;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright (c) 2015, NVIDIA CORPORATION. All rights reserved.
|
||||
* Copyright (c) 2015-2022, NVIDIA CORPORATION. All rights reserved.
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
@ -122,6 +122,11 @@ struct nv_drm_device {
|
||||
NvBool supportsSyncpts;
|
||||
|
||||
struct drm_property *nv_out_fence_property;
|
||||
struct drm_property *nv_input_colorspace_property;
|
||||
|
||||
#if defined(NV_DRM_HAS_HDR_OUTPUT_METADATA)
|
||||
struct drm_property *nv_hdr_output_metadata_property;
|
||||
#endif
|
||||
|
||||
struct nv_drm_device *next;
|
||||
};
|
||||
|
@ -59,11 +59,14 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_dev_unref
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_reinit_primary_mode_group
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_lookup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_state_ref_counting
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_driver_has_gem_prime_res_obj
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_atomic_helper_connector_dpms
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_funcs_have_mode_in_name
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_connector_has_vrr_capable_property
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_framebuffer_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
@ -100,6 +103,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_gem_object_has_resv
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_async_flip
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_pageflip_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_vrr_enabled
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_format_modifiers_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_vma_node_is_allowed_has_tag_arg
|
||||
@ -115,6 +119,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_plane_atomic_check_has_atomic_state_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_device_has_pdev
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_crtc_state_has_no_vblank
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_mode_config_has_allow_fb_modifiers
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_has_hdr_output_metadata
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_add_fence
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += dma_resv_reserve_fences
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += reservation_object_reserve_shared_has_num_fences_arg
|
||||
|
@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
|
@ -35,6 +35,8 @@
|
||||
#include <linux/list.h>
|
||||
#include <linux/rwsem.h>
|
||||
|
||||
#include <acpi/video.h>
|
||||
|
||||
#include "nvstatus.h"
|
||||
|
||||
#include "nv-register-module.h"
|
||||
@ -956,6 +958,12 @@ nvkms_register_backlight(NvU32 gpu_id, NvU32 display_id, void *drv_priv,
|
||||
struct nvkms_backlight_device *nvkms_bd = NULL;
|
||||
int i;
|
||||
|
||||
#if defined(NV_ACPI_VIDEO_BACKLIGHT_USE_NATIVE)
|
||||
if (!acpi_video_backlight_use_native()) {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
|
||||
gpu_info = nvkms_alloc(NV_MAX_GPUS * sizeof(*gpu_info), NV_TRUE);
|
||||
if (gpu_info == NULL) {
|
||||
return NULL;
|
||||
@ -1346,29 +1354,7 @@ static void nvkms_proc_exit(void)
|
||||
return;
|
||||
}
|
||||
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
proc_remove(nvkms_proc_dir);
|
||||
#else
|
||||
/*
|
||||
* On kernel versions without proc_remove(), we need to explicitly
|
||||
* remove each proc file beneath nvkms_proc_dir.
|
||||
* nvkms_proc_init() only creates files directly under
|
||||
* nvkms_proc_dir, so those are the only files we need to remove
|
||||
* here: warn if there is any deeper directory nesting.
|
||||
*/
|
||||
{
|
||||
struct proc_dir_entry *entry = nvkms_proc_dir->subdir;
|
||||
|
||||
while (entry != NULL) {
|
||||
struct proc_dir_entry *next = entry->next;
|
||||
WARN_ON(entry->subdir != NULL);
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
entry = next;
|
||||
}
|
||||
}
|
||||
|
||||
remove_proc_entry(nvkms_proc_dir->name, nvkms_proc_dir->parent);
|
||||
#endif /* NV_PROC_REMOVE_PRESENT */
|
||||
#endif /* CONFIG_PROC_FS */
|
||||
}
|
||||
|
||||
@ -1630,12 +1616,7 @@ restart:
|
||||
module_init(nvkms_init);
|
||||
module_exit(nvkms_exit);
|
||||
|
||||
#if defined(MODULE_LICENSE)
|
||||
MODULE_LICENSE("Dual MIT/GPL");
|
||||
#endif
|
||||
#if defined(MODULE_INFO)
|
||||
MODULE_INFO(supported, "external");
|
||||
#endif
|
||||
#if defined(MODULE_VERSION)
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
#endif
|
||||
|
||||
MODULE_INFO(supported, "external");
|
||||
MODULE_VERSION(NV_VERSION_STRING);
|
||||
|
@ -85,15 +85,11 @@ $(obj)/$(NVIDIA_MODESET_INTERFACE): $(addprefix $(obj)/,$(NVIDIA_MODESET_OBJECTS
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_MODESET_OBJECTS)
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += timer_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_real_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += acpi_video_backlight_use_native
|
||||
|
@ -30,8 +30,18 @@ NVIDIA_PEERMEM_CFLAGS += -UDEBUG -U_DEBUG -DNDEBUG -DNV_BUILD_MODULE_INSTANCES=0
|
||||
# MOFED's Module.symvers is needed for the build
|
||||
# to find the additional ib_* symbols.
|
||||
#
|
||||
# Also, MOFED doesn't use kbuild ARCH names.
|
||||
# So adapt OFA_ARCH to match MOFED's conventions.
|
||||
#
|
||||
ifeq ($(ARCH), arm64)
|
||||
OFA_ARCH := aarch64
|
||||
else ifeq ($(ARCH), powerpc)
|
||||
OFA_ARCH := ppc64le
|
||||
else
|
||||
OFA_ARCH := $(ARCH)
|
||||
endif
|
||||
OFA_DIR := /usr/src/ofa_kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
OFA_CANDIDATES = $(OFA_DIR)/$(OFA_ARCH)/$(KERNELRELEASE) $(OFA_DIR)/$(KERNELRELEASE) $(OFA_DIR)/default /var/lib/dkms/mlnx-ofed-kernel
|
||||
MLNX_OFED_KERNEL := $(shell for d in $(OFA_CANDIDATES); do \
|
||||
if [ -d "$$d" ]; then \
|
||||
echo "$$d"; \
|
||||
|
@ -481,16 +481,6 @@ static int _check_cpu_affinity_test(void)
|
||||
int result, node;
|
||||
nv_kthread_q_t local_q;
|
||||
|
||||
// If the API does not support CPU affinity, check whether the correct
|
||||
// error code is returned.
|
||||
// Non-affinitized queue allocation has been verified by previous test
|
||||
// so just ensure that the affinitized version also works.
|
||||
if (!NV_KTHREAD_Q_SUPPORTS_AFFINITY()) {
|
||||
result = nv_kthread_q_init_on_node(&local_q, "should_fail", 0);
|
||||
TEST_CHECK_RET(result == -ENOTSUPP);
|
||||
return 0;
|
||||
}
|
||||
|
||||
for_each_online_node(node) {
|
||||
unsigned i;
|
||||
const unsigned max_i = 100;
|
||||
|
@ -169,7 +169,6 @@ void nv_kthread_q_stop(nv_kthread_q_t *q)
|
||||
//
|
||||
// This function is never invoked when there is no NUMA preference (preferred
|
||||
// node is NUMA_NO_NODE).
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
nv_kthread_q_t *q,
|
||||
int preferred_node,
|
||||
@ -217,7 +216,6 @@ static struct task_struct *thread_create_on_node(int (*threadfn)(void *data),
|
||||
|
||||
return thread[i];
|
||||
}
|
||||
#endif
|
||||
|
||||
int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferred_node)
|
||||
{
|
||||
@ -231,11 +229,7 @@ int nv_kthread_q_init_on_node(nv_kthread_q_t *q, const char *q_name, int preferr
|
||||
q->q_kthread = kthread_create(_main_loop, q, q_name);
|
||||
}
|
||||
else {
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1
|
||||
q->q_kthread = thread_create_on_node(_main_loop, q, preferred_node, q_name);
|
||||
#else
|
||||
return -ENOTSUPP;
|
||||
#endif
|
||||
}
|
||||
|
||||
if (IS_ERR(q->q_kthread)) {
|
||||
|
@ -67,17 +67,11 @@ endif
|
||||
|
||||
NV_OBJECTS_DEPEND_ON_CONFTEST += $(NVIDIA_UVM_OBJECTS)
|
||||
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += address_space_init_once
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vzalloc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += wait_on_bit_lock_argument_count
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pde_data
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += proc_remove
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += bitmap_clear
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += usleep_range
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_empty
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += radix_tree_replace_slot
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += pnv_npu2_init_context
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += kthread_create_on_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += vmf_insert_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += cpumask_of_node
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += list_is_first
|
||||
@ -88,17 +82,16 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += file_operations
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kuid_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += address_space
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += backing_dev_info
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_context_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += get_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages_remote
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pin_user_pages
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_has_address
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_ops_fault_removed_vma_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += node_states_n_memory
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -41,73 +41,6 @@
|
||||
static dev_t g_uvm_base_dev;
|
||||
static struct cdev g_uvm_cdev;
|
||||
|
||||
// List of fault service contexts for CPU faults
|
||||
static LIST_HEAD(g_cpu_service_block_context_list);
|
||||
|
||||
static uvm_spinlock_t g_cpu_service_block_context_list_lock;
|
||||
|
||||
NV_STATUS uvm_service_block_context_init(void)
|
||||
{
|
||||
unsigned num_preallocated_contexts = 4;
|
||||
|
||||
uvm_spin_lock_init(&g_cpu_service_block_context_list_lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
// Pre-allocate some fault service contexts for the CPU and add them to the global list
|
||||
while (num_preallocated_contexts-- > 0) {
|
||||
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
if (!service_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_service_block_context_exit(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context, *service_context_tmp;
|
||||
|
||||
// Free fault service contexts for the CPU and add clear the global list
|
||||
list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
|
||||
cpu_fault.service_context_list) {
|
||||
uvm_kvfree(service_context);
|
||||
}
|
||||
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
|
||||
}
|
||||
|
||||
// Get a fault service context from the global list or allocate a new one if there are no
|
||||
// available entries
|
||||
static uvm_service_block_context_t *uvm_service_block_context_cpu_alloc(void)
|
||||
{
|
||||
uvm_service_block_context_t *service_context;
|
||||
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
service_context = list_first_entry_or_null(&g_cpu_service_block_context_list, uvm_service_block_context_t,
|
||||
cpu_fault.service_context_list);
|
||||
|
||||
if (service_context)
|
||||
list_del(&service_context->cpu_fault.service_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
if (!service_context)
|
||||
service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||
|
||||
return service_context;
|
||||
}
|
||||
|
||||
// Put a fault service context in the global list
|
||||
static void uvm_service_block_context_cpu_free(uvm_service_block_context_t *service_context)
|
||||
{
|
||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||
|
||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||
|
||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||
}
|
||||
|
||||
static int uvm_open(struct inode *inode, struct file *filp)
|
||||
{
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
@ -489,139 +422,10 @@ static void uvm_vm_close_managed_entry(struct vm_area_struct *vma)
|
||||
static vm_fault_t uvm_vm_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_va_block_t *va_block;
|
||||
NvU64 fault_addr = nv_page_fault_va(vmf);
|
||||
bool is_write = vmf->flags & FAULT_FLAG_WRITE;
|
||||
NV_STATUS status = uvm_global_get_status();
|
||||
bool tools_enabled;
|
||||
bool major_fault = false;
|
||||
uvm_service_block_context_t *service_context;
|
||||
uvm_global_processor_mask_t gpus_to_check_for_ecc;
|
||||
|
||||
if (status != NV_OK)
|
||||
goto convert_error;
|
||||
|
||||
// TODO: Bug 2583279: Lock tracking is disabled for the power management
|
||||
// lock in order to suppress reporting of a lock policy violation.
|
||||
// The violation consists in acquiring the power management lock multiple
|
||||
// times, and it is manifested as an error during release. The
|
||||
// re-acquisition of the power management locks happens upon re-entry in the
|
||||
// UVM module, and it is benign on itself, but when combined with certain
|
||||
// power management scenarios, it is indicative of a potential deadlock.
|
||||
// Tracking will be re-enabled once the power management locking strategy is
|
||||
// modified to avoid deadlocks.
|
||||
if (!uvm_down_read_trylock_no_tracking(&g_uvm_global.pm.lock)) {
|
||||
status = NV_ERR_BUSY_RETRY;
|
||||
goto convert_error;
|
||||
}
|
||||
|
||||
service_context = uvm_service_block_context_cpu_alloc();
|
||||
if (!service_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto unlock;
|
||||
}
|
||||
|
||||
service_context->cpu_fault.wakeup_time_stamp = 0;
|
||||
|
||||
// The mmap_lock might be held in write mode, but the mode doesn't matter
|
||||
// for the purpose of lock ordering and we don't rely on it being in write
|
||||
// anywhere so just record it as read mode in all cases.
|
||||
uvm_record_lock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
do {
|
||||
bool do_sleep = false;
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
NvU64 now = NV_GETTIME();
|
||||
if (now < service_context->cpu_fault.wakeup_time_stamp)
|
||||
do_sleep = true;
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_start(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
// Drop the VA space lock while we sleep
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// usleep_range is preferred because msleep has a 20ms granularity
|
||||
// and udelay uses a busy-wait loop. usleep_range uses high-resolution
|
||||
// timers and, by adding a range, the Linux scheduler may coalesce
|
||||
// our wakeup with others, thus saving some interrupts.
|
||||
if (do_sleep) {
|
||||
unsigned long nap_us = (service_context->cpu_fault.wakeup_time_stamp - now) / 1000;
|
||||
|
||||
usleep_range(nap_us, nap_us + nap_us / 2);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
if (do_sleep)
|
||||
uvm_tools_record_throttling_end(va_space, fault_addr, UVM_ID_CPU);
|
||||
|
||||
status = uvm_va_block_find_create_managed(va_space, fault_addr, &va_block);
|
||||
if (status != NV_OK) {
|
||||
UVM_ASSERT_MSG(status == NV_ERR_NO_MEMORY, "status: %s\n", nvstatusToString(status));
|
||||
break;
|
||||
}
|
||||
|
||||
// Watch out, current->mm might not be vma->vm_mm
|
||||
UVM_ASSERT(vma == uvm_va_range_vma(va_block->va_range));
|
||||
|
||||
// Loop until thrashing goes away.
|
||||
status = uvm_va_block_cpu_fault(va_block, fault_addr, is_write, service_context);
|
||||
} while (status == NV_WARN_MORE_PROCESSING_REQUIRED);
|
||||
|
||||
if (status != NV_OK) {
|
||||
UvmEventFatalReason reason;
|
||||
|
||||
reason = uvm_tools_status_to_fatal_fault_reason(status);
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
|
||||
uvm_tools_record_cpu_fatal_fault(va_space, fault_addr, is_write, reason);
|
||||
}
|
||||
|
||||
tools_enabled = va_space->tools.enabled;
|
||||
|
||||
if (status == NV_OK) {
|
||||
uvm_va_space_global_gpus_in_mask(va_space,
|
||||
&gpus_to_check_for_ecc,
|
||||
&service_context->cpu_fault.gpus_to_check_for_ecc);
|
||||
uvm_global_mask_retain(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_record_unlock_mmap_lock_read(vma->vm_mm);
|
||||
|
||||
if (status == NV_OK) {
|
||||
status = uvm_global_mask_check_ecc_error(&gpus_to_check_for_ecc);
|
||||
uvm_global_mask_release(&gpus_to_check_for_ecc);
|
||||
}
|
||||
|
||||
if (tools_enabled)
|
||||
uvm_tools_flush_events();
|
||||
|
||||
// Major faults involve I/O in order to resolve the fault.
|
||||
// If any pages were DMA'ed between the GPU and host memory, that makes it a major fault.
|
||||
// A process can also get statistics for major and minor faults by calling readproc().
|
||||
major_fault = service_context->cpu_fault.did_migrate;
|
||||
uvm_service_block_context_cpu_free(service_context);
|
||||
|
||||
unlock:
|
||||
// TODO: Bug 2583279: See the comment above the matching lock acquisition
|
||||
uvm_up_read_no_tracking(&g_uvm_global.pm.lock);
|
||||
|
||||
convert_error:
|
||||
switch (status) {
|
||||
case NV_OK:
|
||||
case NV_ERR_BUSY_RETRY:
|
||||
return VM_FAULT_NOPAGE | (major_fault ? VM_FAULT_MAJOR : 0);
|
||||
case NV_ERR_NO_MEMORY:
|
||||
return VM_FAULT_OOM;
|
||||
default:
|
||||
return VM_FAULT_SIGBUS;
|
||||
}
|
||||
return uvm_va_space_cpu_fault_managed(va_space, vma, vmf);
|
||||
}
|
||||
|
||||
|
||||
static vm_fault_t uvm_vm_fault_entry(struct vm_area_struct *vma, struct vm_fault *vmf)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_vm_fault(vma, vmf));
|
||||
@ -986,8 +790,6 @@ bool uvm_file_is_nvidia_uvm(struct file *filp)
|
||||
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp)
|
||||
{
|
||||
long ret;
|
||||
int write = 1;
|
||||
int force = 0;
|
||||
struct page *page;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@ -998,7 +800,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
// are not used because unload_state_buf may be a managed memory pointer and
|
||||
// therefore a locking assertion from the CPU fault handler could be fired.
|
||||
nv_mmap_read_lock(current->mm);
|
||||
ret = NV_GET_USER_PAGES(params->unload_state_buf, 1, write, force, &page, NULL);
|
||||
ret = NV_PIN_USER_PAGES(params->unload_state_buf, 1, FOLL_WRITE, &page, NULL);
|
||||
nv_mmap_read_unlock(current->mm);
|
||||
|
||||
if (ret < 0)
|
||||
@ -1008,7 +810,7 @@ NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_B
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
put_page(page);
|
||||
NV_UNPIN_USER_PAGE(page);
|
||||
status = NV_ERR_IN_USE;
|
||||
goto error;
|
||||
}
|
||||
@ -1027,7 +829,7 @@ static void uvm_test_unload_state_exit(void)
|
||||
{
|
||||
if (g_uvm_global.unload_state.ptr) {
|
||||
kunmap(g_uvm_global.unload_state.page);
|
||||
put_page(g_uvm_global.unload_state.page);
|
||||
NV_UNPIN_USER_PAGE(g_uvm_global.unload_state.page);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -25,9 +25,62 @@
|
||||
#include "uvm_ats_faults.h"
|
||||
#include "uvm_migrate_pageable.h"
|
||||
|
||||
// TODO: Bug 2103669: Implement a real prefetching policy and remove or adapt
|
||||
// these experimental parameters. These are intended to help guide that policy.
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_replayable,
|
||||
"Max order of pages (2^N) to prefetch on replayable ATS faults");
|
||||
|
||||
static unsigned int uvm_exp_perf_prefetch_ats_order_non_replayable = 0;
|
||||
module_param(uvm_exp_perf_prefetch_ats_order_non_replayable, uint, 0644);
|
||||
MODULE_PARM_DESC(uvm_exp_perf_prefetch_ats_order_non_replayable,
|
||||
"Max order of pages (2^N) to prefetch on non-replayable ATS faults");
|
||||
|
||||
// Expand the fault region to the naturally-aligned region with order given by
|
||||
// the module parameters, clamped to the vma containing fault_addr (if any).
|
||||
// Note that this means the region contains fault_addr but may not begin at
|
||||
// fault_addr.
|
||||
static void expand_fault_region(struct mm_struct *mm,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_client_type_t client_type,
|
||||
unsigned long *start,
|
||||
unsigned long *size)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned int order;
|
||||
unsigned long outer, aligned_start, aligned_size;
|
||||
|
||||
*start = fault_addr;
|
||||
*size = PAGE_SIZE;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_HUB)
|
||||
order = uvm_exp_perf_prefetch_ats_order_non_replayable;
|
||||
else
|
||||
order = uvm_exp_perf_prefetch_ats_order_replayable;
|
||||
|
||||
if (order == 0)
|
||||
return;
|
||||
|
||||
vma = find_vma_intersection(mm, fault_addr, fault_addr + 1);
|
||||
if (!vma)
|
||||
return;
|
||||
|
||||
UVM_ASSERT(order < BITS_PER_LONG - PAGE_SHIFT);
|
||||
|
||||
aligned_size = (1UL << order) * PAGE_SIZE;
|
||||
|
||||
aligned_start = fault_addr & ~(aligned_size - 1);
|
||||
|
||||
*start = max(vma->vm_start, aligned_start);
|
||||
outer = min(vma->vm_end, aligned_start + aligned_size);
|
||||
*size = outer - *start;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 fault_addr,
|
||||
uvm_fault_access_type_t access_type)
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_fault_client_type_t client_type)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
@ -66,8 +119,6 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
{
|
||||
.va_space = va_space,
|
||||
.mm = mm,
|
||||
.start = fault_addr,
|
||||
.length = PAGE_SIZE,
|
||||
.dst_id = gpu_va_space->gpu->parent->id,
|
||||
.dst_node_id = -1,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
@ -79,6 +130,8 @@ static NV_STATUS uvm_ats_service_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
UVM_ASSERT(uvm_ats_can_service_faults(gpu_va_space, mm));
|
||||
|
||||
expand_fault_region(mm, fault_addr, client_type, &uvm_migrate_args.start, &uvm_migrate_args.length);
|
||||
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
//
|
||||
// We are trying to use migrate_vma API in the kernel (if it exists) to
|
||||
@ -131,7 +184,10 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
}
|
||||
else {
|
||||
// TODO: Bug 2103669: Service more than a single fault at a time
|
||||
status = uvm_ats_service_fault(gpu_va_space, current_entry->fault_address, service_access_type);
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
service_access_type,
|
||||
current_entry->fault_source.client_type);
|
||||
}
|
||||
|
||||
// Do not flag prefetch faults as fatal unless something fatal happened
|
||||
@ -155,7 +211,8 @@ NV_STATUS uvm_ats_service_fault_entry(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
status = uvm_ats_service_fault(gpu_va_space,
|
||||
current_entry->fault_address,
|
||||
UVM_FAULT_ACCESS_TYPE_READ);
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
current_entry->fault_source.client_type);
|
||||
|
||||
// If read accesses are also invalid, cancel the fault. If a
|
||||
// different error code is returned, exit
|
||||
|
@ -24,6 +24,7 @@
|
||||
#include "uvm_channel.h"
|
||||
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_procfs.h"
|
||||
@ -68,6 +69,38 @@ typedef enum
|
||||
UVM_CHANNEL_UPDATE_MODE_FORCE_ALL
|
||||
} uvm_channel_update_mode_t;
|
||||
|
||||
static void channel_pool_lock_init(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_CHANNEL);
|
||||
else
|
||||
uvm_spin_lock_init(&pool->spinlock, UVM_LOCK_ORDER_CHANNEL);
|
||||
}
|
||||
|
||||
void uvm_channel_pool_lock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_lock(&pool->mutex);
|
||||
else
|
||||
uvm_spin_lock(&pool->spinlock);
|
||||
}
|
||||
|
||||
void uvm_channel_pool_unlock(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_mutex_unlock(&pool->mutex);
|
||||
else
|
||||
uvm_spin_unlock(&pool->spinlock);
|
||||
}
|
||||
|
||||
void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool)
|
||||
{
|
||||
if (uvm_channel_pool_is_proxy(pool))
|
||||
uvm_assert_mutex_locked(&pool->mutex);
|
||||
else
|
||||
uvm_assert_spinlock_locked(&pool->spinlock);
|
||||
}
|
||||
|
||||
// Update channel progress, completing up to max_to_complete entries
|
||||
static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
NvU32 max_to_complete,
|
||||
@ -80,7 +113,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
// Completed value should never exceed the queued value
|
||||
UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
|
||||
@ -108,7 +141,7 @@ static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
|
||||
|
||||
channel->gpu_get = gpu_get;
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
if (cpu_put >= gpu_get)
|
||||
pending_gpfifos = cpu_put - gpu_get;
|
||||
@ -157,7 +190,7 @@ static bool channel_is_available(uvm_channel_t *channel, NvU32 num_gpfifo_entrie
|
||||
{
|
||||
NvU32 pending_entries;
|
||||
|
||||
uvm_assert_spinlock_locked(&channel->pool->lock);
|
||||
uvm_channel_pool_assert_locked(channel->pool);
|
||||
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
pending_entries = channel->cpu_put - channel->gpu_get;
|
||||
@ -174,14 +207,14 @@ static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
|
||||
UVM_ASSERT(num_gpfifo_entries > 0);
|
||||
UVM_ASSERT(num_gpfifo_entries < channel->num_gpfifo_entries);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel_is_available(channel, num_gpfifo_entries)) {
|
||||
channel->current_gpfifo_count += num_gpfifo_entries;
|
||||
claimed = true;
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
return claimed;
|
||||
}
|
||||
@ -248,7 +281,8 @@ static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t
|
||||
|
||||
NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
|
||||
{
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);
|
||||
|
||||
return channel_reserve_in_pool(manager->pool_to_use.default_for_type[type], channel_out);
|
||||
}
|
||||
|
||||
@ -289,14 +323,14 @@ static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
|
||||
{
|
||||
uvm_push_info_t *push_info;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
push_info = list_first_entry_or_null(&channel->available_push_infos, uvm_push_info_t, available_list_node);
|
||||
UVM_ASSERT(push_info != NULL);
|
||||
UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
|
||||
list_del(&push_info->available_list_node);
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
return push_info - channel->push_infos;
|
||||
}
|
||||
@ -355,10 +389,6 @@ static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
|
||||
|
||||
UVM_ASSERT(uvm_channel_is_proxy(channel));
|
||||
|
||||
// nvUvmInterfacePagingChannelPushStream should not sleep, because a
|
||||
// spinlock is currently held.
|
||||
uvm_assert_spinlock_locked(&channel->pool->lock);
|
||||
|
||||
status = nvUvmInterfacePagingChannelPushStream(channel->proxy.handle, (char *) push->begin, push_size);
|
||||
|
||||
if (status != NV_OK) {
|
||||
@ -409,7 +439,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
NvU32 cpu_put;
|
||||
NvU32 new_cpu_put;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
new_tracking_value = ++channel->tracking_sem.queued_value;
|
||||
new_payload = (NvU32)new_tracking_value;
|
||||
@ -446,7 +476,7 @@ void uvm_channel_end_push(uvm_push_t *push)
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that. Notably uvm_pushbuffer_end_push() has
|
||||
// to be called first.
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
unlock_push(channel);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
@ -470,7 +500,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
NvU32 new_cpu_put;
|
||||
uvm_gpu_t *gpu = channel->pool->manager->gpu;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;
|
||||
@ -505,7 +535,7 @@ static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_valu
|
||||
// The moment the channel is unlocked uvm_channel_update_progress_with_max()
|
||||
// may notice the GPU work to be completed and hence all state tracking the
|
||||
// push must be updated before that.
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
unlock_push(channel);
|
||||
|
||||
// This memory barrier is borrowed from CUDA, as it supposedly fixes perf
|
||||
@ -591,12 +621,12 @@ static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *ch
|
||||
if (pending_count == 0)
|
||||
return NULL;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel->gpu_get != channel->cpu_put)
|
||||
entry = &channel->gpfifo_entries[channel->gpu_get];
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
return entry;
|
||||
}
|
||||
@ -720,9 +750,9 @@ static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
|
||||
channel_update_progress_all(channel, UVM_CHANNEL_UPDATE_MODE_FORCE_ALL);
|
||||
}
|
||||
|
||||
uvm_procfs_destroy_entry(channel->procfs.pushes);
|
||||
uvm_procfs_destroy_entry(channel->procfs.info);
|
||||
uvm_procfs_destroy_entry(channel->procfs.dir);
|
||||
proc_remove(channel->procfs.pushes);
|
||||
proc_remove(channel->procfs.info);
|
||||
proc_remove(channel->procfs.dir);
|
||||
|
||||
uvm_kvfree(channel->push_acquire_infos);
|
||||
uvm_kvfree(channel->push_infos);
|
||||
@ -977,7 +1007,7 @@ static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
|
||||
pool->engine_index = engine_index;
|
||||
pool->pool_type = pool_type;
|
||||
|
||||
uvm_spin_lock_init(&pool->lock, UVM_LOCK_ORDER_CHANNEL);
|
||||
channel_pool_lock_init(pool);
|
||||
|
||||
num_channels = channel_pool_type_num_channels(pool_type);
|
||||
|
||||
@ -1482,11 +1512,11 @@ void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager)
|
||||
if (channel_manager == NULL)
|
||||
return;
|
||||
|
||||
uvm_procfs_destroy_entry(channel_manager->procfs.pending_pushes);
|
||||
proc_remove(channel_manager->procfs.pending_pushes);
|
||||
|
||||
channel_manager_destroy_pools(channel_manager);
|
||||
|
||||
uvm_procfs_destroy_entry(channel_manager->procfs.channels_dir);
|
||||
proc_remove(channel_manager->procfs.channels_dir);
|
||||
|
||||
uvm_pushbuffer_destroy(channel_manager->pushbuffer);
|
||||
|
||||
@ -1583,7 +1613,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
uvm_channel_manager_t *manager = channel->pool->manager;
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Channel %s\n", channel->name);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "completed %llu\n", uvm_channel_update_completed_value(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "queued %llu\n", channel->tracking_sem.queued_value);
|
||||
@ -1595,7 +1625,7 @@ static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA 0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA 0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
static void channel_print_push_acquires(uvm_push_acquire_info_t *push_acquire_info, struct seq_file *seq)
|
||||
@ -1639,7 +1669,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
|
||||
NvU64 completed_value = uvm_channel_update_completed_value(channel);
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
cpu_put = channel->cpu_put;
|
||||
|
||||
@ -1687,7 +1717,7 @@ static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_c
|
||||
channel_print_push_acquires(push_acquire_info, seq);
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
}
|
||||
|
||||
void uvm_channel_print_pending_pushes(uvm_channel_t *channel)
|
||||
|
@ -163,7 +163,11 @@ typedef struct
|
||||
uvm_channel_pool_type_t pool_type;
|
||||
|
||||
// Lock protecting the state of channels in the pool
|
||||
uvm_spinlock_t lock;
|
||||
union {
|
||||
uvm_spinlock_t spinlock;
|
||||
uvm_mutex_t mutex;
|
||||
};
|
||||
|
||||
} uvm_channel_pool_t;
|
||||
|
||||
struct uvm_channel_struct
|
||||
@ -309,10 +313,20 @@ struct uvm_channel_manager_struct
|
||||
// Create a channel manager for the GPU
|
||||
NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **manager_out);
|
||||
|
||||
void uvm_channel_pool_lock(uvm_channel_pool_t *pool);
|
||||
void uvm_channel_pool_unlock(uvm_channel_pool_t *pool);
|
||||
void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool);
|
||||
|
||||
static bool uvm_channel_pool_is_proxy(uvm_channel_pool_t *pool)
|
||||
{
|
||||
UVM_ASSERT(pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
|
||||
return pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_proxy(uvm_channel_t *channel)
|
||||
{
|
||||
UVM_ASSERT(channel->pool->pool_type < UVM_CHANNEL_POOL_TYPE_MASK);
|
||||
return channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY;
|
||||
return uvm_channel_pool_is_proxy(channel->pool);
|
||||
}
|
||||
|
||||
static bool uvm_channel_is_ce(uvm_channel_t *channel)
|
||||
|
@ -747,14 +747,14 @@ static NvU32 get_available_gpfifo_entries(uvm_channel_t *channel)
|
||||
{
|
||||
NvU32 pending_entries;
|
||||
|
||||
uvm_spin_lock(&channel->pool->lock);
|
||||
uvm_channel_pool_lock(channel->pool);
|
||||
|
||||
if (channel->cpu_put >= channel->gpu_get)
|
||||
pending_entries = channel->cpu_put - channel->gpu_get;
|
||||
else
|
||||
pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;
|
||||
|
||||
uvm_spin_unlock(&channel->pool->lock);
|
||||
uvm_channel_pool_unlock(channel->pool);
|
||||
|
||||
return channel->num_gpfifo_entries - pending_entries - 1;
|
||||
}
|
||||
|
@ -186,8 +186,7 @@ static void uvm_global_remove_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
uvm_assert_spinlock_locked(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index]);
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
|
||||
UVM_ASSERT(g_uvm_global.parent_gpus[gpu_index] == NULL || g_uvm_global.parent_gpus[gpu_index] == parent_gpu);
|
||||
|
||||
g_uvm_global.parent_gpus[gpu_index] = NULL;
|
||||
}
|
||||
|
@ -694,7 +694,7 @@ static NV_STATUS init_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static void deinit_parent_procfs_dir(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.dir);
|
||||
proc_remove(parent_gpu->procfs.dir);
|
||||
}
|
||||
|
||||
static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
@ -722,8 +722,8 @@ static NV_STATUS init_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
static void deinit_parent_procfs_files(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.access_counters_file);
|
||||
uvm_procfs_destroy_entry(parent_gpu->procfs.fault_stats_file);
|
||||
proc_remove(parent_gpu->procfs.access_counters_file);
|
||||
proc_remove(parent_gpu->procfs.fault_stats_file);
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
@ -774,9 +774,9 @@ static NV_STATUS init_procfs_dirs(uvm_gpu_t *gpu)
|
||||
// The kernel waits on readers to finish before returning from those calls
|
||||
static void deinit_procfs_dirs(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir_peers);
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir_symlink);
|
||||
uvm_procfs_destroy_entry(gpu->procfs.dir);
|
||||
proc_remove(gpu->procfs.dir_peers);
|
||||
proc_remove(gpu->procfs.dir_symlink);
|
||||
proc_remove(gpu->procfs.dir);
|
||||
}
|
||||
|
||||
static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
|
||||
@ -790,15 +790,15 @@ static NV_STATUS init_procfs_files(uvm_gpu_t *gpu)
|
||||
|
||||
static void deinit_procfs_files(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_procfs_destroy_entry(gpu->procfs.info_file);
|
||||
proc_remove(gpu->procfs.info_file);
|
||||
}
|
||||
|
||||
static void deinit_procfs_peer_cap_files(uvm_gpu_peer_t *peer_caps)
|
||||
{
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[0]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_symlink_file[1]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[0]);
|
||||
uvm_procfs_destroy_entry(peer_caps->procfs.peer_file[1]);
|
||||
proc_remove(peer_caps->procfs.peer_symlink_file[0]);
|
||||
proc_remove(peer_caps->procfs.peer_symlink_file[1]);
|
||||
proc_remove(peer_caps->procfs.peer_file[0]);
|
||||
proc_remove(peer_caps->procfs.peer_file[1]);
|
||||
}
|
||||
|
||||
static NV_STATUS init_semaphore_pool(uvm_gpu_t *gpu)
|
||||
@ -3080,41 +3080,41 @@ void uvm_gpu_dma_free_page(uvm_parent_gpu_t *parent_gpu, void *va, NvU64 dma_add
|
||||
atomic64_sub(PAGE_SIZE, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out)
|
||||
{
|
||||
NvU64 dma_addr;
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
dma_addr = dma_map_page(&gpu->parent->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&gpu->parent->pci_dev->dev, dma_addr))
|
||||
dma_addr = dma_map_page(&parent_gpu->pci_dev->dev, page, 0, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_mapping_error(&parent_gpu->pci_dev->dev, dma_addr))
|
||||
return NV_ERR_OPERATING_SYSTEM;
|
||||
|
||||
if (dma_addr < gpu->parent->dma_addressable_start ||
|
||||
dma_addr + size - 1 > gpu->parent->dma_addressable_limit) {
|
||||
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
|
||||
if (dma_addr < parent_gpu->dma_addressable_start ||
|
||||
dma_addr + size - 1 > parent_gpu->dma_addressable_limit) {
|
||||
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_addr, size, DMA_BIDIRECTIONAL);
|
||||
UVM_ERR_PRINT_RL("PCI mapped range [0x%llx, 0x%llx) not in the addressable range [0x%llx, 0x%llx), GPU %s\n",
|
||||
dma_addr,
|
||||
dma_addr + (NvU64)size,
|
||||
gpu->parent->dma_addressable_start,
|
||||
gpu->parent->dma_addressable_limit + 1,
|
||||
uvm_gpu_name(gpu));
|
||||
parent_gpu->dma_addressable_start,
|
||||
parent_gpu->dma_addressable_limit + 1,
|
||||
parent_gpu->name);
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
atomic64_add(size, &gpu->parent->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(gpu->parent, dma_addr);
|
||||
atomic64_add(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
*dma_address_out = dma_addr_to_gpu_addr(parent_gpu, dma_addr);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size)
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size)
|
||||
{
|
||||
UVM_ASSERT(PAGE_ALIGNED(size));
|
||||
|
||||
dma_address = gpu_addr_to_dma_addr(gpu->parent, dma_address);
|
||||
dma_unmap_page(&gpu->parent->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
|
||||
atomic64_sub(size, &gpu->parent->mapped_cpu_pages_size);
|
||||
dma_address = gpu_addr_to_dma_addr(parent_gpu, dma_address);
|
||||
dma_unmap_page(&parent_gpu->pci_dev->dev, dma_address, size, DMA_BIDIRECTIONAL);
|
||||
atomic64_sub(size, &parent_gpu->mapped_cpu_pages_size);
|
||||
}
|
||||
|
||||
// This function implements the UvmRegisterGpu API call, as described in uvm.h.
|
||||
|
@ -44,6 +44,7 @@
|
||||
#include "uvm_va_block_types.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_rb_tree.h"
|
||||
#include "uvm_perf_prefetch.h"
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
// Buffer length to store uvm gpu id, RM device name and gpu uuid.
|
||||
@ -159,6 +160,12 @@ struct uvm_service_block_context_struct
|
||||
|
||||
// State used by the VA block routines called by the servicing routine
|
||||
uvm_va_block_context_t block_context;
|
||||
|
||||
// Prefetch state hint
|
||||
uvm_perf_prefetch_hint_t prefetch_hint;
|
||||
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
|
||||
};
|
||||
|
||||
struct uvm_fault_service_batch_context_struct
|
||||
@ -374,6 +381,16 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Scratch space, used to generate artificial physically addressed notifications.
|
||||
// Virtual address notifications are always aligned to 64k. This means up to 16
|
||||
// different physical locations could have been accessed to trigger one notification.
|
||||
// The sub-granularity mask can correspond to any of them.
|
||||
struct {
|
||||
uvm_processor_id_t resident_processors[16];
|
||||
uvm_gpu_phys_address_t phys_addresses[16];
|
||||
uvm_access_counter_buffer_entry_t phys_entry;
|
||||
} scratch;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
@ -1309,19 +1326,19 @@ NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
//
|
||||
// Returns the physical address of the pages that can be used to access them on
|
||||
// the GPU.
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_gpu_t *gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
NV_STATUS uvm_gpu_map_cpu_pages(uvm_parent_gpu_t *parent_gpu, struct page *page, size_t size, NvU64 *dma_address_out);
|
||||
|
||||
// Unmap num_pages pages previously mapped with uvm_gpu_map_cpu_pages().
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_gpu_t *gpu, NvU64 dma_address, size_t size);
|
||||
void uvm_gpu_unmap_cpu_pages(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address, size_t size);
|
||||
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_gpu_t *gpu, struct page *page, NvU64 *dma_address_out)
|
||||
static NV_STATUS uvm_gpu_map_cpu_page(uvm_parent_gpu_t *parent_gpu, struct page *page, NvU64 *dma_address_out)
|
||||
{
|
||||
return uvm_gpu_map_cpu_pages(gpu, page, PAGE_SIZE, dma_address_out);
|
||||
return uvm_gpu_map_cpu_pages(parent_gpu, page, PAGE_SIZE, dma_address_out);
|
||||
}
|
||||
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_gpu_t *gpu, NvU64 dma_address)
|
||||
static void uvm_gpu_unmap_cpu_page(uvm_parent_gpu_t *parent_gpu, NvU64 dma_address)
|
||||
{
|
||||
uvm_gpu_unmap_cpu_pages(gpu, dma_address, PAGE_SIZE);
|
||||
uvm_gpu_unmap_cpu_pages(parent_gpu, dma_address, PAGE_SIZE);
|
||||
}
|
||||
|
||||
// Allocate and map a page of system DMA memory on the GPU for physical access
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -41,6 +41,10 @@
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
|
||||
|
||||
#define UVM_ACCESS_COUNTER_ACTION_NOTIFY 0x1
|
||||
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x2
|
||||
#define UVM_ACCESS_COUNTER_ON_MANAGED 0x4
|
||||
|
||||
// Each page in a tracked physical range may belong to a different VA Block. We
|
||||
// preallocate an array of reverse map translations. However, access counter
|
||||
// granularity can be set to up to 16G, which would require an array too large
|
||||
@ -934,25 +938,6 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
translate_virt_notifications_instance_ptrs(gpu, batch_context);
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
// TODO: Bug 1990466: Service virtual notifications. Entries with NULL
|
||||
// va_space are simply dropped.
|
||||
if (uvm_enable_builtin_tests) {
|
||||
NvU32 i;
|
||||
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
const bool on_managed = false;
|
||||
uvm_tools_broadcast_access_counter(gpu, batch_context->virt.notifications[i], on_managed);
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// GPA notifications provide a physical address and an aperture. Sort
|
||||
// accesses by aperture to try to coalesce operations on the same target
|
||||
// processor.
|
||||
@ -1046,9 +1031,19 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
uvm_page_mask_set(&service_context->thrashing_pin_mask, page_index);
|
||||
}
|
||||
|
||||
// If the underlying VMA is gone, skip HMM migrations.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
status = uvm_hmm_find_vma(&service_context->block_context, address);
|
||||
if (status == NV_ERR_INVALID_ADDRESS)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, address);
|
||||
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
processor,
|
||||
uvm_fault_access_type_mask_bit(UVM_FAULT_ACCESS_TYPE_PREFETCH),
|
||||
@ -1158,7 +1153,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
size_t index;
|
||||
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
|
||||
@ -1168,7 +1163,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
|
||||
gpu->id: UVM_ID_CPU;
|
||||
|
||||
*clear_counter = false;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
UVM_ASSERT(num_reverse_mappings > 0);
|
||||
|
||||
@ -1217,7 +1212,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (status == NV_OK)
|
||||
*clear_counter = true;
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
}
|
||||
|
||||
done:
|
||||
@ -1238,25 +1233,26 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t index;
|
||||
|
||||
*clear_counter = false;
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
bool clear_counter_local = false;
|
||||
unsigned out_flags_local = 0;
|
||||
status = service_phys_single_va_block(gpu,
|
||||
batch_context,
|
||||
current_entry,
|
||||
reverse_mappings + index,
|
||||
1,
|
||||
&clear_counter_local);
|
||||
&out_flags_local);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
*clear_counter = *clear_counter || clear_counter_local;
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
*out_flags |= out_flags_local;
|
||||
}
|
||||
|
||||
// In the case of failure, drop the refcounts for the remaining reverse mappings
|
||||
@ -1267,18 +1263,13 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
// Iterate over all regions set in the given sub_granularity mask
|
||||
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) \
|
||||
for ((region_start) = find_first_bit(&(sub_granularity), (config)->sub_granularity_regions_per_translation), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_start) + 1); \
|
||||
(region_start) < config->sub_granularity_regions_per_translation; \
|
||||
(region_start) = find_next_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_end) + 1), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), \
|
||||
(config)->sub_granularity_regions_per_translation, \
|
||||
(region_start) + 1))
|
||||
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) \
|
||||
for ((region_start) = find_first_bit(&(sub_granularity), (num_regions)), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1); \
|
||||
(region_start) < (num_regions); \
|
||||
(region_start) = find_next_bit(&(sub_granularity), (num_regions), (region_end) + 1), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1))
|
||||
|
||||
|
||||
static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *reverse_mappings, size_t num_reverse_mappings)
|
||||
{
|
||||
@ -1309,7 +1300,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
NvU64 address,
|
||||
unsigned long sub_granularity,
|
||||
size_t *num_reverse_mappings,
|
||||
bool *clear_counter)
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 region_start, region_end;
|
||||
@ -1318,7 +1309,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
// Get the reverse_map translations for all the regions set in the
|
||||
// sub_granularity field of the counter.
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config) {
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config->sub_granularity_regions_per_translation) {
|
||||
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
|
||||
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
|
||||
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
|
||||
@ -1350,7 +1341,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
clear_counter);
|
||||
out_flags);
|
||||
}
|
||||
else {
|
||||
status = service_phys_va_blocks(gpu,
|
||||
@ -1358,7 +1349,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
clear_counter);
|
||||
out_flags);
|
||||
}
|
||||
|
||||
return status;
|
||||
@ -1366,7 +1357,8 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NvU64 address;
|
||||
NvU64 translation_index;
|
||||
@ -1377,7 +1369,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
size_t total_reverse_mappings = 0;
|
||||
uvm_gpu_t *resident_gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
bool clear_counter = false;
|
||||
unsigned flags = 0;
|
||||
|
||||
address = current_entry->address.address;
|
||||
UVM_ASSERT(address % config->translation_size == 0);
|
||||
@ -1405,7 +1397,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
|
||||
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
|
||||
size_t num_reverse_mappings;
|
||||
bool clear_counter_local = false;
|
||||
unsigned out_flags_local = 0;
|
||||
status = service_phys_notification_translation(gpu,
|
||||
resident_gpu,
|
||||
batch_context,
|
||||
@ -1414,9 +1406,11 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
address,
|
||||
sub_granularity,
|
||||
&num_reverse_mappings,
|
||||
&clear_counter_local);
|
||||
&out_flags_local);
|
||||
total_reverse_mappings += num_reverse_mappings;
|
||||
clear_counter = clear_counter || clear_counter_local;
|
||||
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_CLEAR) == 0);
|
||||
flags |= out_flags_local;
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
@ -1425,17 +1419,14 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
|
||||
}
|
||||
|
||||
// TODO: Bug 1990466: Here we already have virtual addresses and
|
||||
// address spaces. Merge virtual and physical notification handling
|
||||
|
||||
// Currently we only report events for our tests, not for tools
|
||||
if (uvm_enable_builtin_tests) {
|
||||
const bool on_managed = total_reverse_mappings != 0;
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, on_managed);
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_NOTIFY;
|
||||
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_ON_MANAGED : 0);
|
||||
}
|
||||
|
||||
if (status == NV_OK && clear_counter)
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -1450,11 +1441,18 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->phys.notifications[i];
|
||||
unsigned flags = 0;
|
||||
|
||||
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
|
||||
continue;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, current_entry);
|
||||
status = service_phys_notification(gpu, batch_context, current_entry, &flags);
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_NOTIFY)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
@ -1462,6 +1460,191 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int cmp_sort_gpu_phys_addr(const void *_a, const void *_b)
|
||||
{
|
||||
return uvm_gpu_phys_addr_cmp(*(uvm_gpu_phys_address_t*)_a,
|
||||
*(uvm_gpu_phys_address_t*)_b);
|
||||
}
|
||||
|
||||
static bool gpu_phys_same_region(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b, NvU64 granularity)
|
||||
{
|
||||
if (a.aperture != b.aperture)
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(is_power_of_2(granularity));
|
||||
|
||||
return UVM_ALIGN_DOWN(a.address, granularity) == UVM_ALIGN_DOWN(b.address, granularity);
|
||||
}
|
||||
|
||||
static bool phys_address_in_accessed_sub_region(uvm_gpu_phys_address_t address,
|
||||
NvU64 region_size,
|
||||
NvU64 sub_region_size,
|
||||
NvU32 accessed_mask)
|
||||
{
|
||||
const unsigned accessed_index = (address.address % region_size) / sub_region_size;
|
||||
|
||||
// accessed_mask is only filled for tracking granularities larger than 64K
|
||||
if (region_size == UVM_PAGE_SIZE_64K)
|
||||
return true;
|
||||
|
||||
UVM_ASSERT(accessed_index < 32);
|
||||
return ((1 << accessed_index) & accessed_mask) != 0;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 notification_size;
|
||||
NvU64 address;
|
||||
uvm_processor_id_t *resident_processors = batch_context->virt.scratch.resident_processors;
|
||||
uvm_gpu_phys_address_t *phys_addresses = batch_context->virt.scratch.phys_addresses;
|
||||
int num_addresses = 0;
|
||||
int i;
|
||||
|
||||
// Virtual address notifications are always 64K aligned
|
||||
NvU64 region_start = current_entry->address.address;
|
||||
NvU64 region_end = current_entry->address.address + UVM_PAGE_SIZE_64K;
|
||||
|
||||
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_type_t counter_type = current_entry->counter_type;
|
||||
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
|
||||
|
||||
uvm_va_space_t *va_space = current_entry->virtual_info.va_space;
|
||||
|
||||
UVM_ASSERT(counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
|
||||
// Entries with NULL va_space are simply dropped.
|
||||
if (!va_space)
|
||||
return NV_OK;
|
||||
|
||||
status = config_granularity_to_bytes(config->rm.granularity, ¬ification_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// Collect physical locations that could have been touched
|
||||
// in the reported 64K VA region. The notification mask can
|
||||
// correspond to any of them.
|
||||
uvm_va_space_down_read(va_space);
|
||||
for (address = region_start; address < region_end;) {
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
NV_STATUS local_status = uvm_va_block_find(va_space, address, &va_block);
|
||||
if (local_status == NV_ERR_INVALID_ADDRESS || local_status == NV_ERR_OBJECT_NOT_FOUND) {
|
||||
address += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
while (address < va_block->end && address < region_end) {
|
||||
const unsigned page_index = uvm_va_block_cpu_page_index(va_block, address);
|
||||
|
||||
// UVM va_block always maps the closest resident location to processor
|
||||
const uvm_processor_id_t res_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
|
||||
// Add physical location if it's valid and not local vidmem
|
||||
if (UVM_ID_IS_VALID(res_id) && !uvm_id_equal(res_id, gpu->id)) {
|
||||
uvm_gpu_phys_address_t phys_address = uvm_va_block_res_phys_page_address(va_block, page_index, res_id, gpu);
|
||||
if (phys_address_in_accessed_sub_region(phys_address,
|
||||
notification_size,
|
||||
config->sub_granularity_region_size,
|
||||
current_entry->sub_granularity)) {
|
||||
resident_processors[num_addresses] = res_id;
|
||||
phys_addresses[num_addresses] = phys_address;
|
||||
++num_addresses;
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT_RL("Skipping phys address %llx:%s, because it couldn't have been accessed in mask %x",
|
||||
phys_address.address,
|
||||
uvm_aperture_string(phys_address.aperture),
|
||||
current_entry->sub_granularity);
|
||||
}
|
||||
}
|
||||
|
||||
address += PAGE_SIZE;
|
||||
}
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
}
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// The addresses need to be sorted to aid coalescing.
|
||||
sort(phys_addresses,
|
||||
num_addresses,
|
||||
sizeof(*phys_addresses),
|
||||
cmp_sort_gpu_phys_addr,
|
||||
NULL);
|
||||
|
||||
for (i = 0; i < num_addresses; ++i) {
|
||||
uvm_access_counter_buffer_entry_t *fake_entry = &batch_context->virt.scratch.phys_entry;
|
||||
|
||||
// Skip the current pointer if the physical region was already handled
|
||||
if (i > 0 && gpu_phys_same_region(phys_addresses[i - 1], phys_addresses[i], notification_size)) {
|
||||
UVM_ASSERT(uvm_id_equal(resident_processors[i - 1], resident_processors[i]));
|
||||
continue;
|
||||
}
|
||||
UVM_DBG_PRINT_RL("Faking MIMC address[%i/%i]: %llx (granularity mask: %llx) in aperture %s on device %s\n",
|
||||
i,
|
||||
num_addresses,
|
||||
phys_addresses[i].address,
|
||||
notification_size - 1,
|
||||
uvm_aperture_string(phys_addresses[i].aperture),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
// Construct a fake phys addr AC entry
|
||||
fake_entry->counter_type = current_entry->counter_type;
|
||||
fake_entry->address.address = UVM_ALIGN_DOWN(phys_addresses[i].address, notification_size);
|
||||
fake_entry->address.aperture = phys_addresses[i].aperture;
|
||||
fake_entry->address.is_virtual = false;
|
||||
fake_entry->physical_info.resident_id = resident_processors[i];
|
||||
fake_entry->counter_value = current_entry->counter_value;
|
||||
fake_entry->sub_granularity = current_entry->sub_granularity;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, fake_entry, out_flags);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status = NV_OK;
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
unsigned flags = 0;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
|
||||
|
||||
status = service_virt_notification(gpu, batch_context, current_entry, &flags);
|
||||
|
||||
UVM_DBG_PRINT_RL("Processed virt access counter (%d/%d): %sMANAGED (status: %d) clear: %s\n",
|
||||
i + 1,
|
||||
batch_context->virt.num_notifications,
|
||||
(flags & UVM_ACCESS_COUNTER_ON_MANAGED) ? "" : "NOT ",
|
||||
status,
|
||||
(flags & UVM_ACCESS_COUNTER_ACTION_CLEAR) ? "YES" : "NO");
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
void uvm_gpu_service_access_counters(uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -338,7 +338,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_processor_id_t new_residency;
|
||||
bool read_duplicate;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
@ -365,8 +364,11 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
// Check logical permissions
|
||||
status = uvm_va_range_check_logical_permissions(va_range,
|
||||
status = uvm_va_block_check_logical_permissions(va_block,
|
||||
&service_context->block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
fault_entry->fault_access_type,
|
||||
uvm_range_group_address_migratable(va_space,
|
||||
fault_entry->fault_address));
|
||||
@ -386,6 +388,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
fault_entry->access_type_mask,
|
||||
@ -422,7 +425,6 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_fault_buffer_entry_t *fault_entry)
|
||||
{
|
||||
@ -432,7 +434,6 @@ static NV_STATUS service_managed_fault_in_block(uvm_gpu_t *gpu,
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context.mm = mm;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@ -598,6 +599,7 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
@ -622,12 +624,11 @@ static NV_STATUS service_fault(uvm_gpu_t *gpu, uvm_fault_buffer_entry_t *fault_e
|
||||
|
||||
if (!fault_entry->is_fatal) {
|
||||
status = uvm_va_block_find_create(fault_entry->va_space,
|
||||
mm,
|
||||
fault_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (status == NV_OK)
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, mm, va_block, fault_entry);
|
||||
status = service_managed_fault_in_block(gpu_va_space->gpu, va_block, fault_entry);
|
||||
else
|
||||
status = service_non_managed_fault(gpu_va_space, mm, fault_entry, status);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -1055,13 +1055,17 @@ static NV_STATUS preprocess_fault_batch(uvm_gpu_t *gpu, uvm_fault_service_batch_
|
||||
// - service_access_type: highest access type that can be serviced.
|
||||
static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_fault_buffer_entry_t *fault_entry,
|
||||
bool allow_migration)
|
||||
{
|
||||
NV_STATUS perm_status;
|
||||
|
||||
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
|
||||
perm_status = uvm_va_block_check_logical_permissions(va_block,
|
||||
va_block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
fault_entry->fault_access_type,
|
||||
allow_migration);
|
||||
if (perm_status == NV_OK)
|
||||
@ -1083,8 +1087,11 @@ static uvm_fault_access_type_t check_fault_access_permissions(uvm_gpu_t *gpu,
|
||||
// service them before we can cancel the write/atomic faults. So we
|
||||
// retry with read fault access type.
|
||||
if (uvm_fault_access_type_mask_test(fault_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ)) {
|
||||
perm_status = uvm_va_range_check_logical_permissions(va_block->va_range,
|
||||
perm_status = uvm_va_block_check_logical_permissions(va_block,
|
||||
va_block_context,
|
||||
gpu->id,
|
||||
uvm_va_block_cpu_page_index(va_block,
|
||||
fault_entry->fault_address),
|
||||
UVM_FAULT_ACCESS_TYPE_READ,
|
||||
allow_migration);
|
||||
if (perm_status == NV_OK)
|
||||
@ -1156,14 +1163,16 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address >= va_block->start);
|
||||
UVM_ASSERT(ordered_fault_cache[first_fault_index]->fault_address <= va_block->end);
|
||||
|
||||
end = va_block->end;
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_find_policy_end(va_block,
|
||||
&block_context->block_context,
|
||||
ordered_fault_cache[first_fault_index]->fault_address,
|
||||
&end);
|
||||
else
|
||||
}
|
||||
else {
|
||||
block_context->block_context.policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
end = va_block->end;
|
||||
}
|
||||
|
||||
// Scan the sorted array and notify the fault event for all fault entries
|
||||
// in the block
|
||||
@ -1226,7 +1235,11 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
UVM_ASSERT(iter.start <= current_entry->fault_address && iter.end >= current_entry->fault_address);
|
||||
|
||||
service_access_type = check_fault_access_permissions(gpu, va_block, current_entry, iter.migratable);
|
||||
service_access_type = check_fault_access_permissions(gpu,
|
||||
va_block,
|
||||
&block_context->block_context,
|
||||
current_entry,
|
||||
iter.migratable);
|
||||
|
||||
// Do not exit early due to logical errors such as access permission
|
||||
// violation.
|
||||
@ -1269,6 +1282,7 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&block_context->block_context,
|
||||
page_index,
|
||||
gpu->id,
|
||||
service_access_type_mask,
|
||||
@ -1348,7 +1362,6 @@ static NV_STATUS service_batch_managed_faults_in_block_locked(uvm_gpu_t *gpu,
|
||||
// See the comments for function service_fault_batch_block_locked for
|
||||
// implementation details and error codes.
|
||||
static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
NvU32 first_fault_index,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
@ -1361,7 +1374,6 @@ static NV_STATUS service_batch_managed_faults_in_block(uvm_gpu_t *gpu,
|
||||
|
||||
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
|
||||
fault_block_context->num_retries = 0;
|
||||
fault_block_context->block_context.mm = mm;
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@ -1531,6 +1543,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
// to remain valid until we release. If no mm is registered, we
|
||||
// can only service managed faults, not ATS/HMM faults.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
@ -1576,13 +1589,11 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
// TODO: Bug 2103669: Service more than one ATS fault at a time so we
|
||||
// don't do an unconditional VA range lookup for every ATS fault.
|
||||
status = uvm_va_block_find_create(va_space,
|
||||
mm,
|
||||
current_entry->fault_address,
|
||||
va_block_context,
|
||||
&va_block);
|
||||
if (status == NV_OK) {
|
||||
status = service_batch_managed_faults_in_block(gpu_va_space->gpu,
|
||||
mm,
|
||||
va_block,
|
||||
i,
|
||||
batch_context,
|
||||
|
@ -118,6 +118,13 @@ static bool is_canary(NvU32 val)
|
||||
return (val & ~UVM_SEMAPHORE_CANARY_MASK) == UVM_SEMAPHORE_CANARY_BASE;
|
||||
}
|
||||
|
||||
// Can the GPU access the semaphore, i.e., can Host/Esched address the semaphore
|
||||
// pool?
|
||||
static bool gpu_can_access_semaphore_pool(uvm_gpu_t *gpu, uvm_rm_mem_t *rm_mem)
|
||||
{
|
||||
return ((uvm_rm_mem_get_gpu_uvm_va(rm_mem, gpu) + rm_mem->size - 1) < gpu->parent->max_host_va);
|
||||
}
|
||||
|
||||
static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@ -142,6 +149,9 @@ static NV_STATUS pool_alloc_page(uvm_gpu_semaphore_pool_t *pool)
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
// Verify the GPU can access the semaphore pool.
|
||||
UVM_ASSERT(gpu_can_access_semaphore_pool(pool->gpu, pool_page->memory));
|
||||
|
||||
// All semaphores are initially free
|
||||
bitmap_fill(pool_page->free_semaphores, UVM_SEMAPHORE_COUNT_PER_PAGE);
|
||||
|
||||
|
@ -46,6 +46,7 @@ MODULE_PARM_DESC(uvm_disable_hmm,
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_api.h"
|
||||
#include "uvm_va_policy.h"
|
||||
#include "uvm_tools.h"
|
||||
|
||||
bool uvm_hmm_is_enabled_system_wide(void)
|
||||
{
|
||||
@ -96,6 +97,9 @@ NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space)
|
||||
if (!uvm_hmm_is_enabled_system_wide() || !mm)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
if (va_space->initialization_flags & UVM_INIT_FLAGS_DISABLE_HMM)
|
||||
return NV_ERR_INVALID_STATE;
|
||||
|
||||
uvm_assert_mmap_lock_locked_write(mm);
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
@ -179,12 +183,19 @@ static bool hmm_invalidate(uvm_va_block_t *va_block,
|
||||
mmu_interval_set_seq(mni, cur_seq);
|
||||
|
||||
// Note: unmap_vmas() does MMU_NOTIFY_UNMAP [0, 0xffffffffffffffff]
|
||||
// Also note that hmm_invalidate() can be called when a new va_block is not
|
||||
// yet inserted into the va_space->hmm.blocks table while the original
|
||||
// va_block is being split. The original va_block may have its end address
|
||||
// updated before the mmu interval notifier is updated so this invalidate
|
||||
// may be for a range past the va_block end address.
|
||||
start = range->start;
|
||||
end = (range->end == ULONG_MAX) ? range->end : range->end - 1;
|
||||
if (start < va_block->start)
|
||||
start = va_block->start;
|
||||
if (end > va_block->end)
|
||||
end = va_block->end;
|
||||
if (start > end)
|
||||
goto unlock;
|
||||
|
||||
if (range->event == MMU_NOTIFY_UNMAP)
|
||||
uvm_va_policy_clear(va_block, start, end);
|
||||
@ -266,6 +277,7 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
UVM_ASSERT(mm);
|
||||
UVM_ASSERT(!va_block_context || va_block_context->mm == mm);
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
UVM_ASSERT(PAGE_ALIGNED(addr));
|
||||
@ -294,11 +306,13 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
// a maximum interval that doesn't overlap any existing UVM va_ranges.
|
||||
// We know that 'addr' is not within a va_range or
|
||||
// hmm_va_block_find_create() wouldn't be called.
|
||||
uvm_range_tree_adjust_interval(&va_space->va_range_tree, addr, &start, &end);
|
||||
status = uvm_range_tree_find_hole_in(&va_space->va_range_tree, addr, &start, &end);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
// Search for existing HMM va_blocks in the start/end interval and create
|
||||
// a maximum interval that doesn't overlap any existing HMM va_blocks.
|
||||
uvm_range_tree_adjust_interval(&va_space->hmm.blocks, addr, &start, &end);
|
||||
status = uvm_range_tree_find_hole_in(&va_space->hmm.blocks, addr, &start, &end);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
// Create a HMM va_block with a NULL va_range pointer.
|
||||
status = uvm_va_block_create(NULL, start, end, &va_block);
|
||||
@ -321,10 +335,7 @@ static NV_STATUS hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
}
|
||||
|
||||
status = uvm_range_tree_add(&va_space->hmm.blocks, &va_block->hmm.node);
|
||||
if (status != NV_OK) {
|
||||
UVM_ASSERT(status != NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
goto err_unreg;
|
||||
}
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
done:
|
||||
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
|
||||
@ -333,9 +344,6 @@ done:
|
||||
*va_block_ptr = va_block;
|
||||
return NV_OK;
|
||||
|
||||
err_unreg:
|
||||
mmu_interval_notifier_remove(&va_block->hmm.notifier);
|
||||
|
||||
err_release:
|
||||
uvm_va_block_release(va_block);
|
||||
|
||||
@ -352,10 +360,67 @@ NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
return hmm_va_block_find_create(va_space, addr, false, va_block_context, va_block_ptr);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
|
||||
{
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
struct vm_area_struct *vma;
|
||||
|
||||
if (!mm)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
|
||||
vma = find_vma(mm, addr);
|
||||
if (!uvm_hmm_vma_is_valid(vma, addr, false))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
va_block_context->hmm.vma = vma;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
UVM_ASSERT(vma->vm_start <= uvm_va_block_region_start(va_block, region));
|
||||
UVM_ASSERT(vma->vm_end > uvm_va_block_region_end(va_block, region));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
uvm_va_block_test_t *block_test;
|
||||
uvm_va_block_t *va_block;
|
||||
NV_STATUS status;
|
||||
|
||||
if (!uvm_hmm_is_enabled(va_space))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
status = hmm_va_block_find_create(va_space, addr, false, NULL, &va_block);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
block_test = uvm_va_block_get_test(va_block);
|
||||
if (block_test)
|
||||
block_test->inject_split_error = true;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
struct mmu_interval_notifier notifier;
|
||||
uvm_va_block_t *existing_block;
|
||||
uvm_va_block_t *new_block;
|
||||
} hmm_split_invalidate_data_t;
|
||||
|
||||
static bool hmm_split_invalidate(struct mmu_interval_notifier *mni,
|
||||
@ -363,14 +428,9 @@ static bool hmm_split_invalidate(struct mmu_interval_notifier *mni,
|
||||
unsigned long cur_seq)
|
||||
{
|
||||
hmm_split_invalidate_data_t *split_data = container_of(mni, hmm_split_invalidate_data_t, notifier);
|
||||
uvm_va_block_t *existing_block = split_data->existing_block;
|
||||
uvm_va_block_t *new_block = split_data->new_block;
|
||||
|
||||
if (uvm_ranges_overlap(existing_block->start, existing_block->end, range->start, range->end - 1))
|
||||
hmm_invalidate(existing_block, range, cur_seq);
|
||||
|
||||
if (uvm_ranges_overlap(new_block->start, new_block->end, range->start, range->end - 1))
|
||||
hmm_invalidate(new_block, range, cur_seq);
|
||||
uvm_tools_test_hmm_split_invalidate(split_data->existing_block->hmm.va_space);
|
||||
hmm_invalidate(split_data->existing_block, range, cur_seq);
|
||||
|
||||
return true;
|
||||
}
|
||||
@ -404,6 +464,7 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
|
||||
uvm_va_space_t *va_space = va_block->hmm.va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
hmm_split_invalidate_data_t split_data;
|
||||
NvU64 delay_us;
|
||||
uvm_va_block_t *new_va_block;
|
||||
NV_STATUS status;
|
||||
int ret;
|
||||
@ -419,22 +480,23 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
|
||||
return status;
|
||||
|
||||
// Initialize the newly created HMM va_block.
|
||||
new_va_block->hmm.node.start = new_va_block->start;
|
||||
new_va_block->hmm.node.end = new_va_block->end;
|
||||
new_va_block->hmm.va_space = va_space;
|
||||
uvm_range_tree_init(&new_va_block->hmm.va_policy_tree);
|
||||
|
||||
// The MMU interval notifier has to be removed in order to resize it.
|
||||
// That means there would be a window of time where invalidation callbacks
|
||||
// could be missed. To handle this case, we register a temporary notifier
|
||||
// to cover the same address range while resizing the old notifier (it is
|
||||
// OK to have multiple notifiers for the same range, we may simply try to
|
||||
// invalidate twice).
|
||||
split_data.existing_block = va_block;
|
||||
split_data.new_block = new_va_block;
|
||||
ret = mmu_interval_notifier_insert(&split_data.notifier,
|
||||
ret = mmu_interval_notifier_insert(&new_va_block->hmm.notifier,
|
||||
mm,
|
||||
va_block->start,
|
||||
new_va_block->end - va_block->start + 1,
|
||||
&hmm_notifier_split_ops);
|
||||
new_va_block->start,
|
||||
uvm_va_block_size(new_va_block),
|
||||
&uvm_hmm_notifier_ops);
|
||||
|
||||
// Since __mmu_notifier_register() was called when the va_space was
|
||||
// initially created, we know that mm->notifier_subscriptions is valid
|
||||
// and mmu_interval_notifier_insert() can't return ENOMEM.
|
||||
// The only error return is for start + length overflowing but we already
|
||||
// registered the same address range before so there should be no error.
|
||||
UVM_ASSERT(!ret);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
@ -444,40 +506,38 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
// Since __mmu_notifier_register() was called when the va_space was
|
||||
// initially created, we know that mm->notifier_subscriptions is valid
|
||||
// and mmu_interval_notifier_insert() can't return ENOMEM.
|
||||
// The only error return is for start + length overflowing but we already
|
||||
// registered the same address range before so there should be no error.
|
||||
// The MMU interval notifier has to be removed in order to resize it.
|
||||
// That means there would be a window of time when invalidation callbacks
|
||||
// could be missed. To handle this case, we register a temporary notifier
|
||||
// to cover the address range while resizing the old notifier (it is
|
||||
// OK to have multiple notifiers for the same range, we may simply try to
|
||||
// invalidate twice).
|
||||
split_data.existing_block = va_block;
|
||||
ret = mmu_interval_notifier_insert(&split_data.notifier,
|
||||
mm,
|
||||
va_block->start,
|
||||
new_end - va_block->start + 1,
|
||||
&hmm_notifier_split_ops);
|
||||
UVM_ASSERT(!ret);
|
||||
|
||||
mmu_interval_notifier_remove(&va_block->hmm.notifier);
|
||||
// Delay to allow hmm_sanity test to trigger an mmu_notifier during the
|
||||
// critical window where the split invalidate callback is active.
|
||||
delay_us = atomic64_read(&va_space->test.split_invalidate_delay_us);
|
||||
if (delay_us)
|
||||
udelay(delay_us);
|
||||
|
||||
uvm_range_tree_shrink_node(&va_space->hmm.blocks, &va_block->hmm.node, va_block->start, va_block->end);
|
||||
mmu_interval_notifier_remove(&va_block->hmm.notifier);
|
||||
|
||||
// Enable notifications on the old block with the smaller size.
|
||||
ret = mmu_interval_notifier_insert(&va_block->hmm.notifier,
|
||||
mm,
|
||||
va_block->start,
|
||||
va_block->end - va_block->start + 1,
|
||||
&uvm_hmm_notifier_ops);
|
||||
UVM_ASSERT(!ret);
|
||||
|
||||
new_va_block->hmm.node.start = new_va_block->start;
|
||||
new_va_block->hmm.node.end = new_va_block->end;
|
||||
|
||||
ret = mmu_interval_notifier_insert(&new_va_block->hmm.notifier,
|
||||
mm,
|
||||
new_va_block->start,
|
||||
new_va_block->end - new_va_block->start + 1,
|
||||
uvm_va_block_size(va_block),
|
||||
&uvm_hmm_notifier_ops);
|
||||
UVM_ASSERT(!ret);
|
||||
|
||||
mmu_interval_notifier_remove(&split_data.notifier);
|
||||
|
||||
status = uvm_range_tree_add(&va_space->hmm.blocks, &new_va_block->hmm.node);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
if (new_block_ptr)
|
||||
*new_block_ptr = new_va_block;
|
||||
|
||||
@ -485,7 +545,7 @@ static NV_STATUS hmm_split_block(uvm_va_block_t *va_block,
|
||||
|
||||
err:
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
mmu_interval_notifier_remove(&split_data.notifier);
|
||||
mmu_interval_notifier_remove(&new_va_block->hmm.notifier);
|
||||
uvm_va_block_release(new_va_block);
|
||||
return status;
|
||||
}
|
||||
@ -536,9 +596,9 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
|
||||
// page tables. However, it doesn't destroy the va_block because that would
|
||||
// require calling mmu_interval_notifier_remove() which can't be called from
|
||||
// the invalidate callback due to Linux locking constraints. If a process
|
||||
// calls mmap()/munmap() for SAM and then creates a UVM managed allocation,
|
||||
// calls mmap()/munmap() for SAM and then creates a managed allocation,
|
||||
// the same VMA range can be picked and there would be a UVM/HMM va_block
|
||||
// conflict. Creating a UVM managed allocation (or other va_range) calls this
|
||||
// conflict. Creating a managed allocation (or other va_range) calls this
|
||||
// function to remove stale HMM va_blocks or split the HMM va_block so there
|
||||
// is no overlap.
|
||||
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
@ -585,6 +645,18 @@ NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block)
|
||||
{
|
||||
uvm_va_space_t *va_space = existing_va_block->hmm.va_space;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(existing_va_block));
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
uvm_range_tree_split(&existing_va_block->hmm.va_space->hmm.blocks,
|
||||
&existing_va_block->hmm.node,
|
||||
&new_block->hmm.node);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_policy_is_split_needed_t split_needed_cb,
|
||||
@ -733,7 +805,7 @@ void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
uvm_va_policy_node_t *node;
|
||||
NvU64 end = *endp;
|
||||
NvU64 end = va_block->end;
|
||||
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
@ -747,8 +819,9 @@ void uvm_hmm_find_policy_end(uvm_va_block_t *va_block,
|
||||
if (end > node->node.end)
|
||||
end = node->node.end;
|
||||
}
|
||||
else
|
||||
else {
|
||||
va_block_context->policy = &uvm_va_policy_default;
|
||||
}
|
||||
|
||||
*endp = end;
|
||||
}
|
||||
@ -760,7 +833,7 @@ NV_STATUS uvm_hmm_find_policy_vma_and_outer(uvm_va_block_t *va_block,
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
unsigned long addr;
|
||||
NvU64 end = va_block->end;
|
||||
NvU64 end;
|
||||
uvm_page_index_t outer;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
@ -801,9 +874,9 @@ static NV_STATUS hmm_clear_thrashing_policy(uvm_va_block_t *va_block,
|
||||
// before the pinned pages information is destroyed.
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
NULL,
|
||||
unmap_remote_pinned_pages_from_all_processors(va_block,
|
||||
block_context,
|
||||
region));
|
||||
uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
|
||||
block_context,
|
||||
region));
|
||||
|
||||
uvm_perf_thrashing_info_destroy(va_block);
|
||||
|
||||
@ -839,5 +912,186 @@ NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space)
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
NvU64 start, end;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
|
||||
// We need to limit the prefetch region to the VMA.
|
||||
start = max(va_block->start, (NvU64)vma->vm_start);
|
||||
end = min(va_block->end, (NvU64)vma->vm_end - 1);
|
||||
|
||||
// Also, we need to limit the prefetch region to the policy range.
|
||||
if (policy == &uvm_va_policy_default) {
|
||||
NV_STATUS status = uvm_range_tree_find_hole_in(&va_block->hmm.va_policy_tree,
|
||||
address,
|
||||
&start,
|
||||
&end);
|
||||
// We already know the hole exists and covers the fault region.
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
else {
|
||||
uvm_va_policy_node_t *node = uvm_va_policy_node_from_policy(policy);
|
||||
|
||||
start = max(start, node->node.start);
|
||||
end = min(end, node->node.end);
|
||||
}
|
||||
|
||||
return uvm_va_block_region_from_start_end(va_block, start, end);
|
||||
}
|
||||
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block));
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
UVM_ASSERT(vma && addr >= vma->vm_start && addr < vma->vm_end);
|
||||
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
return UVM_PROT_NONE;
|
||||
else if (!(vma->vm_flags & VM_WRITE))
|
||||
return UVM_PROT_READ_ONLY;
|
||||
else
|
||||
return UVM_PROT_READ_WRITE_ATOMIC;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
|
||||
atomic64_set(&va_space->test.split_invalidate_delay_us, params->delay_us);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
struct mm_struct *mm;
|
||||
NV_STATUS status;
|
||||
|
||||
mm = uvm_va_space_mm_or_current_retain(va_space);
|
||||
if (!mm)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
uvm_down_write_mmap_lock(mm);
|
||||
uvm_va_space_down_write(va_space);
|
||||
if (va_space->hmm.disable)
|
||||
status = uvm_hmm_va_space_initialize_test(va_space);
|
||||
else
|
||||
status = NV_OK;
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params)
|
||||
{
|
||||
uvm_range_tree_node_t *tree_node;
|
||||
uvm_va_policy_node_t *node;
|
||||
struct vm_area_struct *vma;
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
if (!mm || !uvm_hmm_is_enabled(va_space))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
params->type = UVM_TEST_VA_RANGE_TYPE_MANAGED;
|
||||
params->managed.subtype = UVM_TEST_RANGE_SUBTYPE_HMM;
|
||||
params->va_range_start = 0;
|
||||
params->va_range_end = ULONG_MAX;
|
||||
params->read_duplication = UVM_TEST_READ_DUPLICATION_UNSET;
|
||||
memset(¶ms->preferred_location, 0, sizeof(params->preferred_location));
|
||||
params->accessed_by_count = 0;
|
||||
params->managed.vma_start = 0;
|
||||
params->managed.vma_end = 0;
|
||||
params->managed.is_zombie = NV_FALSE;
|
||||
params->managed.owned_by_calling_process = (mm == current->mm ? NV_TRUE : NV_FALSE);
|
||||
|
||||
vma = find_vma(mm, params->lookup_address);
|
||||
if (!uvm_hmm_vma_is_valid(vma, params->lookup_address, false))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
params->va_range_start = vma->vm_start;
|
||||
params->va_range_end = vma->vm_end - 1;
|
||||
params->managed.vma_start = vma->vm_start;
|
||||
params->managed.vma_end = vma->vm_end - 1;
|
||||
|
||||
uvm_mutex_lock(&va_space->hmm.blocks_lock);
|
||||
tree_node = uvm_range_tree_find(&va_space->hmm.blocks, params->lookup_address);
|
||||
if (!tree_node) {
|
||||
UVM_ASSERT(uvm_range_tree_find_hole_in(&va_space->hmm.blocks, params->lookup_address,
|
||||
¶ms->va_range_start, ¶ms->va_range_end) == NV_OK);
|
||||
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&va_space->hmm.blocks_lock);
|
||||
va_block = hmm_va_block_from_node(tree_node);
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
params->va_range_start = va_block->start;
|
||||
params->va_range_end = va_block->end;
|
||||
|
||||
node = uvm_va_policy_node_find(va_block, params->lookup_address);
|
||||
if (node) {
|
||||
uvm_processor_id_t processor_id;
|
||||
|
||||
if (params->va_range_start < node->node.start)
|
||||
params->va_range_start = node->node.start;
|
||||
if (params->va_range_end > node->node.end)
|
||||
params->va_range_end = node->node.end;
|
||||
|
||||
params->read_duplication = node->policy.read_duplication;
|
||||
|
||||
if (!UVM_ID_IS_INVALID(node->policy.preferred_location))
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->preferred_location, node->policy.preferred_location);
|
||||
|
||||
for_each_id_in_mask(processor_id, &node->policy.accessed_by)
|
||||
uvm_va_space_processor_uuid(va_space, ¶ms->accessed_by[params->accessed_by_count++], processor_id);
|
||||
}
|
||||
else {
|
||||
uvm_range_tree_find_hole_in(&va_block->hmm.va_policy_tree, params->lookup_address,
|
||||
¶ms->va_range_start, ¶ms->va_range_end);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// TODO: Bug 3660968: Remove this hack as soon as HMM migration is implemented
|
||||
// for VMAs other than anonymous private memory.
|
||||
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
{
|
||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (!uvm_va_block_is_hmm(va_block))
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
|
||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||
|
||||
return !vma_is_anonymous(vma);
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
|
@ -65,6 +65,8 @@ typedef struct
|
||||
// Initialize HMM for the given the va_space for testing.
|
||||
// Bug 1750144: UVM: Add HMM (Heterogeneous Memory Management) support to
|
||||
// the UVM driver. Remove this when enough HMM functionality is implemented.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
|
||||
// and the va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_va_space_initialize_test(uvm_va_space_t *va_space);
|
||||
|
||||
// Destroy any HMM state for the given the va_space.
|
||||
@ -87,6 +89,10 @@ typedef struct
|
||||
//
|
||||
// Return NV_ERR_INVALID_ADDRESS if there is no VMA associated with the
|
||||
// address 'addr' or the VMA does not have at least PROT_READ permission.
|
||||
// The caller is also responsible for checking that there is no UVM
|
||||
// va_range covering the given address before calling this function.
|
||||
// If va_block_context is not NULL, the VMA is cached in
|
||||
// va_block_context->hmm.vma.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
@ -94,23 +100,53 @@ typedef struct
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **va_block_ptr);
|
||||
|
||||
// Find the VMA for the given address and set va_block_context->hmm.vma.
|
||||
// Return NV_ERR_INVALID_ADDRESS if va_block_context->mm is NULL or there
|
||||
// is no VMA associated with the address 'addr' or the VMA does not have at
|
||||
// least PROT_READ permission.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read or mm equal to NULL.
|
||||
NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr);
|
||||
|
||||
// If va_block is a HMM va_block, check that va_block_context->hmm.vma is
|
||||
// not NULL and covers the given region. This always returns true and is
|
||||
// intended to only be used with UVM_ASSERT().
|
||||
// Locking: This function must be called with the va_block lock held and if
|
||||
// va_block is a HMM block, va_block_context->mm must be retained and
|
||||
// locked for at least read.
|
||||
bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// Find or create a HMM va_block and mark it so the next va_block split
|
||||
// will fail for testing purposes.
|
||||
// Locking: This function must be called with mm retained and locked for
|
||||
// at least read and the va_space lock at least for read.
|
||||
NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr);
|
||||
|
||||
// Reclaim any HMM va_blocks that overlap the given range.
|
||||
// Note that 'end' is inclusive.
|
||||
// A HMM va_block can be reclaimed if it doesn't contain any "valid" VMAs.
|
||||
// See uvm_hmm_vma_is_valid() for details.
|
||||
// Note that 'end' is inclusive. If mm is NULL, any HMM va_block in the
|
||||
// range will be reclaimed which assumes that the mm is being torn down
|
||||
// and was not retained.
|
||||
// Return values:
|
||||
// NV_ERR_NO_MEMORY: Reclaim required a block split, which failed.
|
||||
// NV_OK: There were no HMM blocks in the range, or all HMM
|
||||
// blocks in the range were successfully reclaimed.
|
||||
// Locking: If mm is not NULL, it must equal va_space_mm.mm, the caller
|
||||
// must hold a reference on it, and it must be locked for at least read
|
||||
// mode. Also, the va_space lock must be held in write mode.
|
||||
// must retain it with uvm_va_space_mm_or_current_retain() or be sure that
|
||||
// mm->mm_users is not zero, and it must be locked for at least read mode.
|
||||
// Also, the va_space lock must be held in write mode.
|
||||
// TODO: Bug 3372166: add asynchronous va_block reclaim.
|
||||
NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 start,
|
||||
NvU64 end);
|
||||
|
||||
// This is called to update the va_space tree of HMM va_blocks after an
|
||||
// existing va_block is split.
|
||||
// Locking: the va_space lock must be held in write mode.
|
||||
void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block);
|
||||
|
||||
// Find a HMM policy range that needs to be split. The callback function
|
||||
// 'split_needed_cb' returns true if the policy range needs to be split.
|
||||
// If a policy range is split, the existing range is truncated to
|
||||
@ -148,7 +184,7 @@ typedef struct
|
||||
// Note that 'last_address' is inclusive.
|
||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be write locked
|
||||
// and the va_space lock must be held in write mode.
|
||||
// TODO: Bug 2046423: need to implement read duplication support in Linux.
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
static NV_STATUS uvm_hmm_set_read_duplication(uvm_va_space_t *va_space,
|
||||
uvm_read_duplication_policy_t new_policy,
|
||||
NvU64 base,
|
||||
@ -159,10 +195,11 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Set va_block_context->policy to the policy covering the given address
|
||||
// 'addr' and update the ending address '*endp' to the minimum of *endp,
|
||||
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
|
||||
// policy range.
|
||||
// This function assigns va_block_context->policy to the policy covering
|
||||
// the given address 'addr' and assigns the ending address '*endp' to the
|
||||
// minimum of va_block->end, va_block_context->hmm.vma->vm_end - 1, and the
|
||||
// ending address of the policy range. Note that va_block_context->hmm.vma
|
||||
// is expected to be initialized before calling this function.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
// the va_block lock held.
|
||||
@ -171,11 +208,11 @@ typedef struct
|
||||
unsigned long addr,
|
||||
NvU64 *endp);
|
||||
|
||||
// Find the VMA for the page index 'page_index',
|
||||
// set va_block_context->policy to the policy covering the given address,
|
||||
// and update the ending page range '*outerp' to the minimum of *outerp,
|
||||
// va_block_context->hmm.vma->vm_end - 1, and the ending address of the
|
||||
// policy range.
|
||||
// This function finds the VMA for the page index 'page_index' and assigns
|
||||
// it to va_block_context->vma, sets va_block_context->policy to the policy
|
||||
// covering the given address, and sets the ending page range '*outerp'
|
||||
// to the minimum of *outerp, va_block_context->hmm.vma->vm_end - 1, the
|
||||
// ending address of the policy range, and va_block->end.
|
||||
// Return NV_ERR_INVALID_ADDRESS if no VMA is found; otherwise, NV_OK.
|
||||
// Locking: This function must be called with
|
||||
// va_block_context->hmm.vma->vm_mm retained and locked for least read and
|
||||
@ -189,6 +226,48 @@ typedef struct
|
||||
// Locking: va_space lock must be held in write mode.
|
||||
NV_STATUS uvm_hmm_clear_thrashing_policy(uvm_va_space_t *va_space);
|
||||
|
||||
// Return the expanded region around 'address' limited to the intersection
|
||||
// of va_block start/end, vma start/end, and policy start/end.
|
||||
// va_block_context must not be NULL, va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()), and
|
||||
// va_block_context->policy must be valid.
|
||||
// Locking: the caller must hold mm->mmap_lock in at least read mode, the
|
||||
// va_space lock must be held in at least read mode, and the va_block lock
|
||||
// held.
|
||||
uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address);
|
||||
|
||||
// Return the logical protection allowed of a HMM va_block for the page at
|
||||
// the given address.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// Locking: the caller must hold va_block_context->mm mmap_lock in at least
|
||||
// read mode.
|
||||
uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr);
|
||||
|
||||
NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
struct file *filp);
|
||||
|
||||
NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params);
|
||||
|
||||
// Return true if GPU fault new residency location should be system memory.
|
||||
// va_block_context must not be NULL and va_block_context->hmm.vma must be
|
||||
// valid (this is usually set by uvm_hmm_va_block_find_create()).
|
||||
// TODO: Bug 3660968: Remove this hack as soon as HMM migration is
|
||||
// implemented for VMAs other than anonymous memory.
|
||||
// Locking: the va_block lock must be held. If the va_block is a HMM
|
||||
// va_block, the va_block_context->mm must be retained and locked for least
|
||||
// read.
|
||||
bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
|
||||
#else // UVM_IS_CONFIG_HMM()
|
||||
|
||||
static bool uvm_hmm_is_enabled(uvm_va_space_t *va_space)
|
||||
@ -230,6 +309,23 @@ typedef struct
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_find_vma(uvm_va_block_context_t *va_block_context, NvU64 addr)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_va_block_context_vma_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_test_va_block_inject_split_error(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_block_reclaim(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 start,
|
||||
@ -238,6 +334,10 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void uvm_hmm_va_block_split_tree(uvm_va_block_t *existing_va_block, uvm_va_block_t *new_block)
|
||||
{
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_split_as_needed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_policy_is_split_needed_t split_needed_cb,
|
||||
@ -291,6 +391,44 @@ typedef struct
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_hmm_get_prefetch_region(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 address)
|
||||
{
|
||||
return (uvm_va_block_region_t){};
|
||||
}
|
||||
|
||||
static uvm_prot_t uvm_hmm_compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
NvU64 addr)
|
||||
{
|
||||
return UVM_PROT_NONE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_hmm_init(UVM_TEST_HMM_INIT_PARAMS *params, struct file *filp)
|
||||
{
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_test_split_invalidate_delay(UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS *params,
|
||||
struct file *filp)
|
||||
{
|
||||
return NV_ERR_INVALID_STATE;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_hmm_va_range_info(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
UVM_TEST_VA_RANGE_INFO_PARAMS *params)
|
||||
{
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
}
|
||||
|
||||
static bool uvm_hmm_must_use_sysmem(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
#endif // _UVM_HMM_H_
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -35,7 +35,7 @@ NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *f
|
||||
uvm_va_block_t *hmm_block = NULL;
|
||||
NV_STATUS status;
|
||||
|
||||
mm = uvm_va_space_mm_retain(va_space);
|
||||
mm = uvm_va_space_mm_or_current_retain(va_space);
|
||||
if (!mm)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
@ -61,7 +61,7 @@ NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *f
|
||||
status = uvm_hmm_va_block_find_create(va_space, 0UL, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
|
||||
// Try to create an HMM va_block which overlaps a UVM managed block.
|
||||
// Try to create an HMM va_block which overlaps a managed block.
|
||||
// It should fail.
|
||||
status = uvm_hmm_va_block_find_create(va_space, params->uvm_address, NULL, &hmm_block);
|
||||
TEST_CHECK_GOTO(status == NV_ERR_INVALID_ADDRESS, done);
|
||||
@ -77,14 +77,14 @@ NV_STATUS uvm_test_hmm_sanity(UVM_TEST_HMM_SANITY_PARAMS *params, struct file *f
|
||||
done:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_up_read_mmap_lock(mm);
|
||||
uvm_va_space_mm_release(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
return status;
|
||||
|
||||
out:
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_up_write_mmap_lock(mm);
|
||||
uvm_va_space_mm_release(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -34,31 +34,6 @@
|
||||
// the (out-of-tree) UVM driver from changes to the upstream Linux kernel.
|
||||
//
|
||||
|
||||
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
|
||||
void address_space_init_once(struct address_space *mapping)
|
||||
{
|
||||
memset(mapping, 0, sizeof(*mapping));
|
||||
INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
|
||||
|
||||
#if defined(NV_ADDRESS_SPACE_HAS_RWLOCK_TREE_LOCK)
|
||||
//
|
||||
// The .tree_lock member variable was changed from type rwlock_t, to
|
||||
// spinlock_t, on 25 July 2008, by mainline commit
|
||||
// 19fd6231279be3c3bdd02ed99f9b0eb195978064.
|
||||
//
|
||||
rwlock_init(&mapping->tree_lock);
|
||||
#else
|
||||
spin_lock_init(&mapping->tree_lock);
|
||||
#endif
|
||||
|
||||
spin_lock_init(&mapping->i_mmap_lock);
|
||||
INIT_LIST_HEAD(&mapping->private_list);
|
||||
spin_lock_init(&mapping->private_lock);
|
||||
INIT_RAW_PRIO_TREE_ROOT(&mapping->i_mmap);
|
||||
INIT_LIST_HEAD(&mapping->i_mmap_nonlinear);
|
||||
}
|
||||
#endif
|
||||
|
||||
#if UVM_CGROUP_ACCOUNTING_SUPPORTED()
|
||||
void uvm_memcg_context_start(uvm_memcg_context_t *context, struct mm_struct *mm)
|
||||
{
|
||||
|
@ -88,7 +88,7 @@
|
||||
|
||||
#include "nv-kthread-q.h"
|
||||
|
||||
#if NV_KTHREAD_Q_SUPPORTS_AFFINITY() == 1 && defined(NV_CPUMASK_OF_NODE_PRESENT)
|
||||
#if defined(NV_CPUMASK_OF_NODE_PRESENT)
|
||||
#define UVM_THREAD_AFFINITY_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_THREAD_AFFINITY_SUPPORTED() 0
|
||||
@ -136,8 +136,8 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#endif
|
||||
|
||||
// See bug 1707453 for further details about setting the minimum kernel version.
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 32)
|
||||
# error This driver does not support kernels older than 2.6.32!
|
||||
#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0)
|
||||
# error This driver does not support kernels older than 3.10!
|
||||
#endif
|
||||
|
||||
#if !defined(VM_RESERVED)
|
||||
@ -217,10 +217,6 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
|
||||
#define NV_UVM_GFP_FLAGS (GFP_KERNEL)
|
||||
|
||||
#if !defined(NV_ADDRESS_SPACE_INIT_ONCE_PRESENT)
|
||||
void address_space_init_once(struct address_space *mapping);
|
||||
#endif
|
||||
|
||||
// Develop builds define DEBUG but enable optimization
|
||||
#if defined(DEBUG) && !defined(NVIDIA_UVM_DEVELOP)
|
||||
// Wrappers for functions not building correctly without optimizations on,
|
||||
@ -352,23 +348,6 @@ static inline NvU64 NV_GETTIME(void)
|
||||
(bit) = find_next_zero_bit((addr), (size), (bit) + 1))
|
||||
#endif
|
||||
|
||||
// bitmap_clear was added in 2.6.33 via commit c1a2a962a2ad103846e7950b4591471fabecece7
|
||||
#if !defined(NV_BITMAP_CLEAR_PRESENT)
|
||||
static inline void bitmap_clear(unsigned long *map, unsigned int start, int len)
|
||||
{
|
||||
unsigned int index = start;
|
||||
for_each_set_bit_from(index, map, start + len)
|
||||
__clear_bit(index, map);
|
||||
}
|
||||
|
||||
static inline void bitmap_set(unsigned long *map, unsigned int start, int len)
|
||||
{
|
||||
unsigned int index = start;
|
||||
for_each_clear_bit_from(index, map, start + len)
|
||||
__set_bit(index, map);
|
||||
}
|
||||
#endif
|
||||
|
||||
// Added in 2.6.24
|
||||
#ifndef ACCESS_ONCE
|
||||
#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x))
|
||||
@ -439,17 +418,6 @@ static inline NvU64 NV_GETTIME(void)
|
||||
#define PAGE_ALIGNED(addr) (((addr) & (PAGE_SIZE - 1)) == 0)
|
||||
#endif
|
||||
|
||||
// Added in 2.6.37 via commit e1ca7788dec6773b1a2bce51b7141948f2b8bccf
|
||||
#if !defined(NV_VZALLOC_PRESENT)
|
||||
static inline void *vzalloc(unsigned long size)
|
||||
{
|
||||
void *p = vmalloc(size);
|
||||
if (p)
|
||||
memset(p, 0, size);
|
||||
return p;
|
||||
}
|
||||
#endif
|
||||
|
||||
// Changed in 3.17 via commit 743162013d40ca612b4cb53d3a200dff2d9ab26e
|
||||
#if (NV_WAIT_ON_BIT_LOCK_ARGUMENT_COUNT == 3)
|
||||
#define UVM_WAIT_ON_BIT_LOCK(word, bit, mode) \
|
||||
@ -505,21 +473,6 @@ static bool radix_tree_empty(struct radix_tree_root *tree)
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined(NV_USLEEP_RANGE_PRESENT)
|
||||
static void __sched usleep_range(unsigned long min, unsigned long max)
|
||||
{
|
||||
unsigned min_msec = min / 1000;
|
||||
unsigned max_msec = max / 1000;
|
||||
|
||||
if (min_msec != 0)
|
||||
msleep(min_msec);
|
||||
else if (max_msec != 0)
|
||||
msleep(max_msec);
|
||||
else
|
||||
msleep(1);
|
||||
}
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
struct mem_cgroup *new_memcg;
|
||||
|
@ -337,7 +337,9 @@
|
||||
//
|
||||
// - Channel lock
|
||||
// Order: UVM_LOCK_ORDER_CHANNEL
|
||||
// Spinlock (uvm_spinlock_t)
|
||||
// Spinlock (uvm_spinlock_t) or exclusive lock (mutex)
|
||||
//
|
||||
// Lock protecting the state of all the channels in a channel pool.
|
||||
//
|
||||
// - Tools global VA space list lock (g_tools_va_space_list_lock)
|
||||
// Order: UVM_LOCK_ORDER_TOOLS_VA_SPACE_LIST
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -605,7 +605,7 @@ static NV_STATUS uvm_create_external_range(uvm_va_space_t *va_space, UVM_CREATE_
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
// The mm needs to be locked in order to remove stale HMM va_blocks.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
// Create the new external VA range.
|
||||
@ -619,7 +619,7 @@ static NV_STATUS uvm_create_external_range(uvm_va_space_t *va_space, UVM_CREATE_
|
||||
}
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -636,6 +636,11 @@ static NV_STATUS set_ext_gpu_map_location(uvm_ext_gpu_map_t *ext_gpu_map,
|
||||
{
|
||||
uvm_gpu_t *owning_gpu;
|
||||
|
||||
if (!mem_info->deviceDescendant && !mem_info->sysmem) {
|
||||
ext_gpu_map->owning_gpu = NULL;
|
||||
ext_gpu_map->is_sysmem = false;
|
||||
return NV_OK;
|
||||
}
|
||||
// This is a local or peer allocation, so the owning GPU must have been
|
||||
// registered.
|
||||
owning_gpu = uvm_va_space_get_gpu_by_uuid(va_space, &mem_info->uuid);
|
||||
|
@ -523,7 +523,7 @@ static NV_STATUS mem_alloc_sysmem_chunks(uvm_mem_t *mem, struct mm_struct *mm, g
|
||||
|
||||
// In case of failure, the caller is required to handle cleanup by calling
|
||||
// uvm_mem_free
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
|
||||
static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero, bool is_protected)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
@ -559,7 +559,7 @@ static NV_STATUS mem_alloc_vidmem_chunks(uvm_mem_t *mem, bool zero)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero)
|
||||
static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zero, bool is_protected)
|
||||
{
|
||||
if (uvm_mem_is_sysmem(mem)) {
|
||||
gfp_t gfp_flags;
|
||||
@ -581,7 +581,7 @@ static NV_STATUS mem_alloc_chunks(uvm_mem_t *mem, struct mm_struct *mm, bool zer
|
||||
return status;
|
||||
}
|
||||
|
||||
return mem_alloc_vidmem_chunks(mem, zero);
|
||||
return mem_alloc_vidmem_chunks(mem, zero, is_protected);
|
||||
}
|
||||
|
||||
static const char *mem_physical_source(uvm_mem_t *mem)
|
||||
@ -618,6 +618,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_mem_t *mem = NULL;
|
||||
bool is_protected = false;
|
||||
|
||||
UVM_ASSERT(params->size > 0);
|
||||
|
||||
@ -639,7 +640,7 @@ NV_STATUS uvm_mem_alloc(const uvm_mem_alloc_params_t *params, uvm_mem_t **mem_ou
|
||||
mem->physical_allocation_size = UVM_ALIGN_UP(mem->size, mem->chunk_size);
|
||||
mem->chunks_count = mem->physical_allocation_size / mem->chunk_size;
|
||||
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero);
|
||||
status = mem_alloc_chunks(mem, params->mm, params->zero, is_protected);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@ -893,7 +894,7 @@ static void sysmem_unmap_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
// partial map_gpu_sysmem_iommu() operation.
|
||||
break;
|
||||
}
|
||||
uvm_gpu_unmap_cpu_pages(gpu, dma_addrs[i], mem->chunk_size);
|
||||
uvm_gpu_unmap_cpu_pages(gpu->parent, dma_addrs[i], mem->chunk_size);
|
||||
dma_addrs[i] = 0;
|
||||
}
|
||||
|
||||
@ -914,7 +915,7 @@ static NV_STATUS sysmem_map_gpu_phys(uvm_mem_t *mem, uvm_gpu_t *gpu)
|
||||
return status;
|
||||
|
||||
for (i = 0; i < mem->chunks_count; ++i) {
|
||||
status = uvm_gpu_map_cpu_pages(gpu,
|
||||
status = uvm_gpu_map_cpu_pages(gpu->parent,
|
||||
mem->sysmem.pages[i],
|
||||
mem->chunk_size,
|
||||
&mem->sysmem.dma_addrs[uvm_global_id_gpu_index(gpu->global_id)][i]);
|
||||
|
@ -179,6 +179,8 @@ struct uvm_mem_struct
|
||||
//
|
||||
// There is no equivalent mask for vidmem, because only the backing
|
||||
// GPU can physical access the memory
|
||||
//
|
||||
// TODO: Bug 3723779: Share DMA mappings within a single parent GPU
|
||||
uvm_global_processor_mask_t mapped_on_phys;
|
||||
|
||||
struct page **pages;
|
||||
|
@ -207,6 +207,8 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
|
||||
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space)) {
|
||||
status = uvm_va_block_make_resident_read_duplicate(va_block,
|
||||
va_block_retry,
|
||||
@ -466,6 +468,8 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||
{
|
||||
NvU64 preunmap_range_start = start;
|
||||
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_range));
|
||||
|
||||
should_do_cpu_preunmap = should_do_cpu_preunmap && va_range_should_do_cpu_preunmap(va_block_context->policy,
|
||||
va_range->va_space);
|
||||
|
||||
@ -942,10 +946,8 @@ done:
|
||||
// benchmarks to see if a two-pass approach would be faster (first
|
||||
// pass pushes all GPU work asynchronously, second pass updates CPU
|
||||
// mappings synchronously).
|
||||
if (mm) {
|
||||
if (mm)
|
||||
uvm_up_read_mmap_lock_out_of_order(mm);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
}
|
||||
|
||||
if (tracker_ptr) {
|
||||
// If requested, release semaphore
|
||||
@ -973,6 +975,7 @@ done:
|
||||
}
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
// If the migration is known to be complete, eagerly dispatch the migration
|
||||
// events, instead of processing them on a later event flush. Note that an
|
||||
@ -1043,13 +1046,12 @@ done:
|
||||
// benchmarks to see if a two-pass approach would be faster (first
|
||||
// pass pushes all GPU work asynchronously, second pass updates CPU
|
||||
// mappings synchronously).
|
||||
if (mm) {
|
||||
if (mm)
|
||||
uvm_up_read_mmap_lock_out_of_order(mm);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
}
|
||||
|
||||
tracker_status = uvm_tracker_wait_deinit(&local_tracker);
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
// This API is synchronous, so wait for migrations to finish
|
||||
uvm_tools_flush_events();
|
||||
|
@ -74,7 +74,7 @@ static NV_STATUS migrate_vma_page_copy_address(struct page *page,
|
||||
}
|
||||
else {
|
||||
// Sysmem/Indirect Peer
|
||||
NV_STATUS status = uvm_gpu_map_cpu_page(copying_gpu, page, &state->dma.addrs[page_index]);
|
||||
NV_STATUS status = uvm_gpu_map_cpu_page(copying_gpu->parent, page, &state->dma.addrs[page_index]);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -628,7 +628,7 @@ void uvm_migrate_vma_finalize_and_map(struct migrate_vma *args, migrate_vma_stat
|
||||
if (state->dma.num_pages > 0) {
|
||||
|
||||
for_each_set_bit(i, state->dma.page_mask, state->num_pages)
|
||||
uvm_gpu_unmap_cpu_page(state->dma.addrs_gpus[i], state->dma.addrs[i]);
|
||||
uvm_gpu_unmap_cpu_page(state->dma.addrs_gpus[i]->parent, state->dma.addrs[i]);
|
||||
}
|
||||
|
||||
UVM_ASSERT(!bitmap_intersects(state->populate_pages_mask, state->allocation_failed_mask, state->num_pages));
|
||||
|
@ -34,8 +34,8 @@ typedef struct
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
struct mm_struct *mm;
|
||||
const unsigned long start;
|
||||
const unsigned long length;
|
||||
unsigned long start;
|
||||
unsigned long length;
|
||||
uvm_processor_id_t dst_id;
|
||||
|
||||
// dst_node_id may be clobbered by uvm_migrate_pageable().
|
||||
|
@ -132,7 +132,7 @@ static NV_STATUS phys_mem_allocate_sysmem(uvm_page_tree_t *tree, NvLength size,
|
||||
|
||||
// Check for fake GPUs from the unit test
|
||||
if (tree->gpu->parent->pci_dev)
|
||||
status = uvm_gpu_map_cpu_pages(tree->gpu, out->handle.page, UVM_PAGE_ALIGN_UP(size), &dma_addr);
|
||||
status = uvm_gpu_map_cpu_pages(tree->gpu->parent, out->handle.page, UVM_PAGE_ALIGN_UP(size), &dma_addr);
|
||||
else
|
||||
dma_addr = page_to_phys(out->handle.page);
|
||||
|
||||
@ -217,7 +217,7 @@ static void phys_mem_deallocate_sysmem(uvm_page_tree_t *tree, uvm_mmu_page_table
|
||||
|
||||
UVM_ASSERT(ptr->addr.aperture == UVM_APERTURE_SYS);
|
||||
if (tree->gpu->parent->pci_dev)
|
||||
uvm_gpu_unmap_cpu_pages(tree->gpu, ptr->addr.address, UVM_PAGE_ALIGN_UP(ptr->size));
|
||||
uvm_gpu_unmap_cpu_pages(tree->gpu->parent, ptr->addr.address, UVM_PAGE_ALIGN_UP(ptr->size));
|
||||
__free_pages(ptr->handle.page, get_order(ptr->size));
|
||||
}
|
||||
|
||||
|
@ -50,7 +50,6 @@ NV_STATUS uvm_perf_heuristics_init()
|
||||
void uvm_perf_heuristics_exit()
|
||||
{
|
||||
uvm_perf_access_counters_exit();
|
||||
uvm_perf_prefetch_exit();
|
||||
uvm_perf_thrashing_exit();
|
||||
}
|
||||
|
||||
@ -73,9 +72,6 @@ NV_STATUS uvm_perf_heuristics_load(uvm_va_space_t *va_space)
|
||||
NV_STATUS status;
|
||||
|
||||
status = uvm_perf_thrashing_load(va_space);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
status = uvm_perf_prefetch_load(va_space);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
status = uvm_perf_access_counters_load(va_space);
|
||||
@ -105,6 +101,5 @@ void uvm_perf_heuristics_unload(uvm_va_space_t *va_space)
|
||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||
|
||||
uvm_perf_access_counters_unload(va_space);
|
||||
uvm_perf_prefetch_unload(va_space);
|
||||
uvm_perf_thrashing_unload(va_space);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -45,7 +45,6 @@
|
||||
//
|
||||
// - UVM_PERF_MODULE_TYPE_THRASHING: detects memory thrashing scenarios and
|
||||
// provides thrashing prevention mechanisms
|
||||
// - UVM_PERF_MODULE_TYPE_PREFETCH: detects memory prefetching opportunities
|
||||
// - UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS: migrates memory using access counter
|
||||
// notifications
|
||||
typedef enum
|
||||
@ -54,7 +53,6 @@ typedef enum
|
||||
|
||||
UVM_PERF_MODULE_TYPE_TEST = UVM_PERF_MODULE_FIRST_TYPE,
|
||||
UVM_PERF_MODULE_TYPE_THRASHING,
|
||||
UVM_PERF_MODULE_TYPE_PREFETCH,
|
||||
UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS,
|
||||
|
||||
UVM_PERF_MODULE_TYPE_COUNT,
|
||||
|
@ -30,31 +30,6 @@
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_test.h"
|
||||
|
||||
// Global cache to allocate the per-VA block prefetch detection structures
|
||||
static struct kmem_cache *g_prefetch_info_cache __read_mostly;
|
||||
|
||||
// Per-VA block prefetch detection structure
|
||||
typedef struct
|
||||
{
|
||||
uvm_page_mask_t prefetch_pages;
|
||||
|
||||
uvm_page_mask_t migrate_pages;
|
||||
|
||||
uvm_va_block_bitmap_tree_t bitmap_tree;
|
||||
|
||||
uvm_processor_id_t last_migration_proc_id;
|
||||
|
||||
uvm_va_block_region_t region;
|
||||
|
||||
size_t big_page_size;
|
||||
|
||||
uvm_va_block_region_t big_pages_region;
|
||||
|
||||
NvU16 pending_prefetch_pages;
|
||||
|
||||
NvU16 fault_migrations_to_last_proc;
|
||||
} block_prefetch_info_t;
|
||||
|
||||
//
|
||||
// Tunables for prefetch detection/prevention (configurable via module parameters)
|
||||
//
|
||||
@ -88,19 +63,54 @@ static bool g_uvm_perf_prefetch_enable;
|
||||
static unsigned g_uvm_perf_prefetch_threshold;
|
||||
static unsigned g_uvm_perf_prefetch_min_faults;
|
||||
|
||||
// Callback declaration for the performance heuristics events
|
||||
static void prefetch_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data);
|
||||
void uvm_perf_prefetch_bitmap_tree_iter_init(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_perf_prefetch_bitmap_tree_iter_t *iter)
|
||||
{
|
||||
UVM_ASSERT(bitmap_tree->level_count > 0);
|
||||
UVM_ASSERT_MSG(page_index < bitmap_tree->leaf_count,
|
||||
"%zd vs %zd",
|
||||
(size_t)page_index,
|
||||
(size_t)bitmap_tree->leaf_count);
|
||||
|
||||
static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index, block_prefetch_info_t *prefetch_info)
|
||||
iter->level_idx = bitmap_tree->level_count - 1;
|
||||
iter->node_idx = page_index;
|
||||
}
|
||||
|
||||
uvm_va_block_region_t uvm_perf_prefetch_bitmap_tree_iter_get_range(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
const uvm_perf_prefetch_bitmap_tree_iter_t *iter)
|
||||
{
|
||||
NvU16 range_leaves = uvm_perf_tree_iter_leaf_range(bitmap_tree, iter);
|
||||
NvU16 range_start = uvm_perf_tree_iter_leaf_range_start(bitmap_tree, iter);
|
||||
uvm_va_block_region_t subregion = uvm_va_block_region(range_start, range_start + range_leaves);
|
||||
|
||||
UVM_ASSERT(iter->level_idx >= 0);
|
||||
UVM_ASSERT(iter->level_idx < bitmap_tree->level_count);
|
||||
|
||||
return subregion;
|
||||
}
|
||||
|
||||
NvU16 uvm_perf_prefetch_bitmap_tree_iter_get_count(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
const uvm_perf_prefetch_bitmap_tree_iter_t *iter)
|
||||
{
|
||||
uvm_va_block_region_t subregion = uvm_perf_prefetch_bitmap_tree_iter_get_range(bitmap_tree, iter);
|
||||
|
||||
return uvm_page_mask_region_weight(&bitmap_tree->pages, subregion);
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_va_block_region_t max_prefetch_region)
|
||||
{
|
||||
NvU16 counter;
|
||||
uvm_va_block_bitmap_tree_iter_t iter;
|
||||
uvm_va_block_bitmap_tree_t *bitmap_tree = &prefetch_info->bitmap_tree;
|
||||
uvm_va_block_region_t prefetch_region = uvm_va_block_region(bitmap_tree->leaf_count,
|
||||
bitmap_tree->leaf_count + 1);
|
||||
uvm_perf_prefetch_bitmap_tree_iter_t iter;
|
||||
uvm_va_block_region_t prefetch_region = uvm_va_block_region(0, 0);
|
||||
|
||||
uvm_va_block_bitmap_tree_traverse_counters(counter, bitmap_tree, page_index, &iter) {
|
||||
uvm_va_block_region_t subregion = uvm_va_block_bitmap_tree_iter_get_range(bitmap_tree, &iter);
|
||||
uvm_perf_prefetch_bitmap_tree_traverse_counters(counter,
|
||||
bitmap_tree,
|
||||
page_index - max_prefetch_region.first + bitmap_tree->offset,
|
||||
&iter) {
|
||||
uvm_va_block_region_t subregion = uvm_perf_prefetch_bitmap_tree_iter_get_range(bitmap_tree, &iter);
|
||||
NvU16 subregion_pages = uvm_va_block_region_num_pages(subregion);
|
||||
|
||||
UVM_ASSERT(counter <= subregion_pages);
|
||||
@ -109,289 +119,287 @@ static uvm_va_block_region_t compute_prefetch_region(uvm_page_index_t page_index
|
||||
}
|
||||
|
||||
// Clamp prefetch region to actual pages
|
||||
if (prefetch_region.first < bitmap_tree->leaf_count) {
|
||||
if (prefetch_region.first < prefetch_info->region.first)
|
||||
prefetch_region.first = prefetch_info->region.first;
|
||||
if (prefetch_region.outer) {
|
||||
prefetch_region.first += max_prefetch_region.first;
|
||||
if (prefetch_region.first < bitmap_tree->offset) {
|
||||
prefetch_region.first = bitmap_tree->offset;
|
||||
}
|
||||
else {
|
||||
prefetch_region.first -= bitmap_tree->offset;
|
||||
if (prefetch_region.first < max_prefetch_region.first)
|
||||
prefetch_region.first = max_prefetch_region.first;
|
||||
}
|
||||
|
||||
if (prefetch_region.outer > prefetch_info->region.outer)
|
||||
prefetch_region.outer = prefetch_info->region.outer;
|
||||
prefetch_region.outer += max_prefetch_region.first;
|
||||
if (prefetch_region.outer < bitmap_tree->offset) {
|
||||
prefetch_region.outer = bitmap_tree->offset;
|
||||
}
|
||||
else {
|
||||
prefetch_region.outer -= bitmap_tree->offset;
|
||||
if (prefetch_region.outer > max_prefetch_region.outer)
|
||||
prefetch_region.outer = max_prefetch_region.outer;
|
||||
}
|
||||
}
|
||||
|
||||
return prefetch_region;
|
||||
}
|
||||
|
||||
// Performance heuristics module for prefetch
|
||||
static uvm_perf_module_t g_module_prefetch;
|
||||
|
||||
static uvm_perf_module_event_callback_desc_t g_callbacks_prefetch[] = {
|
||||
{ UVM_PERF_EVENT_BLOCK_DESTROY, prefetch_block_destroy_cb },
|
||||
{ UVM_PERF_EVENT_MODULE_UNLOAD, prefetch_block_destroy_cb },
|
||||
{ UVM_PERF_EVENT_BLOCK_SHRINK, prefetch_block_destroy_cb }
|
||||
};
|
||||
|
||||
// Get the prefetch detection struct for the given block
|
||||
static block_prefetch_info_t *prefetch_info_get(uvm_va_block_t *va_block)
|
||||
{
|
||||
return uvm_perf_module_type_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_PREFETCH);
|
||||
}
|
||||
|
||||
static void prefetch_info_destroy(uvm_va_block_t *va_block)
|
||||
{
|
||||
block_prefetch_info_t *prefetch_info = prefetch_info_get(va_block);
|
||||
if (prefetch_info) {
|
||||
kmem_cache_free(g_prefetch_info_cache, prefetch_info);
|
||||
uvm_perf_module_type_unset_data(va_block->perf_modules_data, UVM_PERF_MODULE_TYPE_PREFETCH);
|
||||
}
|
||||
}
|
||||
|
||||
// Get the prefetch detection struct for the given block or create it if it
|
||||
// does not exist
|
||||
static block_prefetch_info_t *prefetch_info_get_create(uvm_va_block_t *va_block)
|
||||
{
|
||||
block_prefetch_info_t *prefetch_info = prefetch_info_get(va_block);
|
||||
if (!prefetch_info) {
|
||||
// Create some ghost leaves so we can align the tree to big page boundary. We use the
|
||||
// largest page size to handle the worst-case scenario
|
||||
size_t big_page_size = UVM_PAGE_SIZE_128K;
|
||||
uvm_va_block_region_t big_pages_region = uvm_va_block_big_page_region_all(va_block, big_page_size);
|
||||
size_t num_leaves = uvm_va_block_num_cpu_pages(va_block);
|
||||
|
||||
// If the va block is not big enough to fit 128KB pages, maybe it still can fit 64KB pages
|
||||
if (big_pages_region.outer == 0) {
|
||||
big_page_size = UVM_PAGE_SIZE_64K;
|
||||
big_pages_region = uvm_va_block_big_page_region_all(va_block, big_page_size);
|
||||
}
|
||||
|
||||
if (big_pages_region.first > 0)
|
||||
num_leaves += (big_page_size / PAGE_SIZE - big_pages_region.first);
|
||||
|
||||
UVM_ASSERT(num_leaves <= PAGES_PER_UVM_VA_BLOCK);
|
||||
|
||||
prefetch_info = nv_kmem_cache_zalloc(g_prefetch_info_cache, NV_UVM_GFP_FLAGS);
|
||||
if (!prefetch_info)
|
||||
goto fail;
|
||||
|
||||
prefetch_info->last_migration_proc_id = UVM_ID_INVALID;
|
||||
|
||||
uvm_va_block_bitmap_tree_init_from_page_count(&prefetch_info->bitmap_tree, num_leaves);
|
||||
|
||||
uvm_perf_module_type_set_data(va_block->perf_modules_data, prefetch_info, UVM_PERF_MODULE_TYPE_PREFETCH);
|
||||
}
|
||||
|
||||
return prefetch_info;
|
||||
|
||||
fail:
|
||||
prefetch_info_destroy(va_block);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void grow_fault_granularity_if_no_thrashing(block_prefetch_info_t *prefetch_info,
|
||||
static void grow_fault_granularity_if_no_thrashing(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_page_index_t first,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
const uvm_page_mask_t *thrashing_pages)
|
||||
{
|
||||
if (!uvm_page_mask_region_empty(faulted_pages, region) &&
|
||||
(!thrashing_pages || uvm_page_mask_region_empty(thrashing_pages, region))) {
|
||||
region.first += prefetch_info->region.first;
|
||||
region.outer += prefetch_info->region.first;
|
||||
uvm_page_mask_region_fill(&prefetch_info->bitmap_tree.pages, region);
|
||||
UVM_ASSERT(region.first >= first);
|
||||
region.first = region.first - first + bitmap_tree->offset;
|
||||
region.outer = region.outer - first + bitmap_tree->offset;
|
||||
UVM_ASSERT(region.outer <= bitmap_tree->leaf_count);
|
||||
uvm_page_mask_region_fill(&bitmap_tree->pages, region);
|
||||
}
|
||||
}
|
||||
|
||||
static void grow_fault_granularity(uvm_va_block_t *va_block,
|
||||
block_prefetch_info_t *prefetch_info,
|
||||
static void grow_fault_granularity(uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
NvU32 big_page_size,
|
||||
uvm_va_block_region_t big_pages_region,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
const uvm_page_mask_t *thrashing_pages)
|
||||
{
|
||||
size_t num_big_pages;
|
||||
size_t big_page_index;
|
||||
uvm_va_block_region_t block_region = uvm_va_block_region_from_block(va_block);
|
||||
uvm_page_index_t pages_per_big_page = big_page_size / PAGE_SIZE;
|
||||
uvm_page_index_t page_index;
|
||||
|
||||
// Migrate whole block if no big pages and no page in it is thrashing
|
||||
if (!big_pages_region.outer) {
|
||||
grow_fault_granularity_if_no_thrashing(bitmap_tree,
|
||||
max_prefetch_region,
|
||||
max_prefetch_region.first,
|
||||
faulted_pages,
|
||||
thrashing_pages);
|
||||
return;
|
||||
}
|
||||
|
||||
// Migrate whole "prefix" if no page in it is thrashing
|
||||
if (prefetch_info->big_pages_region.first > 0) {
|
||||
uvm_va_block_region_t prefix_region = uvm_va_block_region(0, prefetch_info->big_pages_region.first);
|
||||
if (big_pages_region.first > max_prefetch_region.first) {
|
||||
uvm_va_block_region_t prefix_region = uvm_va_block_region(max_prefetch_region.first, big_pages_region.first);
|
||||
|
||||
grow_fault_granularity_if_no_thrashing(prefetch_info, prefix_region, faulted_pages, thrashing_pages);
|
||||
grow_fault_granularity_if_no_thrashing(bitmap_tree,
|
||||
prefix_region,
|
||||
max_prefetch_region.first,
|
||||
faulted_pages,
|
||||
thrashing_pages);
|
||||
}
|
||||
|
||||
// Migrate whole big pages if they are not thrashing
|
||||
num_big_pages = uvm_va_block_num_big_pages(va_block, prefetch_info->big_page_size);
|
||||
for (big_page_index = 0; big_page_index < num_big_pages; ++big_page_index) {
|
||||
uvm_va_block_region_t big_region = uvm_va_block_big_page_region(va_block,
|
||||
big_page_index,
|
||||
prefetch_info->big_page_size);
|
||||
for (page_index = big_pages_region.first;
|
||||
page_index < big_pages_region.outer;
|
||||
page_index += pages_per_big_page) {
|
||||
uvm_va_block_region_t big_region = uvm_va_block_region(page_index,
|
||||
page_index + pages_per_big_page);
|
||||
|
||||
grow_fault_granularity_if_no_thrashing(prefetch_info, big_region, faulted_pages, thrashing_pages);
|
||||
grow_fault_granularity_if_no_thrashing(bitmap_tree,
|
||||
big_region,
|
||||
max_prefetch_region.first,
|
||||
faulted_pages,
|
||||
thrashing_pages);
|
||||
}
|
||||
|
||||
// Migrate whole "suffix" if no page in it is thrashing
|
||||
if (prefetch_info->big_pages_region.outer < block_region.outer) {
|
||||
uvm_va_block_region_t suffix_region = uvm_va_block_region(prefetch_info->big_pages_region.outer,
|
||||
block_region.outer);
|
||||
if (big_pages_region.outer < max_prefetch_region.outer) {
|
||||
uvm_va_block_region_t suffix_region = uvm_va_block_region(big_pages_region.outer,
|
||||
max_prefetch_region.outer);
|
||||
|
||||
grow_fault_granularity_if_no_thrashing(prefetch_info, suffix_region, faulted_pages, thrashing_pages);
|
||||
grow_fault_granularity_if_no_thrashing(bitmap_tree,
|
||||
suffix_region,
|
||||
max_prefetch_region.first,
|
||||
faulted_pages,
|
||||
thrashing_pages);
|
||||
}
|
||||
}
|
||||
|
||||
// Within a block we only allow prefetching to a single processor. Therefore, if two processors
|
||||
// are accessing non-overlapping regions within the same block they won't benefit from
|
||||
// prefetching.
|
||||
// Within a block we only allow prefetching to a single processor. Therefore,
|
||||
// if two processors are accessing non-overlapping regions within the same
|
||||
// block they won't benefit from prefetching.
|
||||
//
|
||||
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within a VA block
|
||||
void uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t region)
|
||||
// TODO: Bug 1778034: [uvm] Explore prefetching to different processors within
|
||||
// a VA block.
|
||||
static NvU32 uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_page_mask_t *prefetch_pages,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree)
|
||||
{
|
||||
uvm_page_index_t page_index;
|
||||
block_prefetch_info_t *prefetch_info;
|
||||
const uvm_page_mask_t *resident_mask = NULL;
|
||||
const uvm_page_mask_t *thrashing_pages = NULL;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
uvm_va_block_region_t max_prefetch_region;
|
||||
NvU32 big_page_size;
|
||||
uvm_va_block_region_t big_pages_region;
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
if (!g_uvm_perf_prefetch_enable)
|
||||
return;
|
||||
|
||||
prefetch_info = prefetch_info_get_create(va_block);
|
||||
if (!prefetch_info)
|
||||
return;
|
||||
|
||||
if (!uvm_id_equal(prefetch_info->last_migration_proc_id, new_residency)) {
|
||||
prefetch_info->last_migration_proc_id = new_residency;
|
||||
prefetch_info->fault_migrations_to_last_proc = 0;
|
||||
if (!uvm_id_equal(va_block->prefetch_info.last_migration_proc_id, new_residency)) {
|
||||
va_block->prefetch_info.last_migration_proc_id = new_residency;
|
||||
va_block->prefetch_info.fault_migrations_to_last_proc = 0;
|
||||
}
|
||||
|
||||
prefetch_info->pending_prefetch_pages = 0;
|
||||
// Compute the expanded region that prefetching is allowed from.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
max_prefetch_region = uvm_hmm_get_prefetch_region(va_block,
|
||||
va_block_context,
|
||||
uvm_va_block_region_start(va_block, faulted_region));
|
||||
}
|
||||
else {
|
||||
max_prefetch_region = uvm_va_block_region_from_block(va_block);
|
||||
}
|
||||
|
||||
uvm_page_mask_zero(prefetch_pages);
|
||||
|
||||
if (UVM_ID_IS_CPU(new_residency) || va_block->gpus[uvm_id_gpu_index(new_residency)] != NULL)
|
||||
resident_mask = uvm_va_block_resident_mask_get(va_block, new_residency);
|
||||
|
||||
// If this is a first-touch fault and the destination processor is the
|
||||
// preferred location, populate the whole VA block
|
||||
// preferred location, populate the whole max_prefetch_region.
|
||||
if (uvm_processor_mask_empty(&va_block->resident) &&
|
||||
uvm_id_equal(new_residency, policy->preferred_location)) {
|
||||
uvm_page_mask_region_fill(&prefetch_info->prefetch_pages, uvm_va_block_region_from_block(va_block));
|
||||
uvm_page_mask_region_fill(prefetch_pages, max_prefetch_region);
|
||||
goto done;
|
||||
}
|
||||
|
||||
if (resident_mask)
|
||||
uvm_page_mask_or(&prefetch_info->bitmap_tree.pages, resident_mask, faulted_pages);
|
||||
uvm_page_mask_or(&bitmap_tree->pages, resident_mask, faulted_pages);
|
||||
else
|
||||
uvm_page_mask_copy(&prefetch_info->bitmap_tree.pages, faulted_pages);
|
||||
uvm_page_mask_copy(&bitmap_tree->pages, faulted_pages);
|
||||
|
||||
// Get the big page size for the new residency
|
||||
// If we are using a subregion of the va_block, align bitmap_tree
|
||||
uvm_page_mask_shift_right(&bitmap_tree->pages, &bitmap_tree->pages, max_prefetch_region.first);
|
||||
|
||||
// Get the big page size for the new residency.
|
||||
// Assume 64K size if the new residency is the CPU or no GPU va space is
|
||||
// registered in the current process for this GPU.
|
||||
if (UVM_ID_IS_GPU(new_residency) &&
|
||||
uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, new_residency)) {
|
||||
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, new_residency);
|
||||
prefetch_info->big_page_size = uvm_va_block_gpu_big_page_size(va_block, gpu);
|
||||
|
||||
big_page_size = uvm_va_block_gpu_big_page_size(va_block, gpu);
|
||||
}
|
||||
else {
|
||||
prefetch_info->big_page_size = UVM_PAGE_SIZE_64K;
|
||||
big_page_size = UVM_PAGE_SIZE_64K;
|
||||
}
|
||||
|
||||
big_pages_region = uvm_va_block_big_page_region_subset(va_block, max_prefetch_region, big_page_size);
|
||||
|
||||
// Adjust the prefetch tree to big page granularity to make sure that we
|
||||
// get big page-friendly prefetching hints
|
||||
prefetch_info->big_pages_region = uvm_va_block_big_page_region_all(va_block, prefetch_info->big_page_size);
|
||||
if (prefetch_info->big_pages_region.first > 0) {
|
||||
prefetch_info->region.first = prefetch_info->big_page_size / PAGE_SIZE - prefetch_info->big_pages_region.first;
|
||||
if (big_pages_region.first - max_prefetch_region.first > 0) {
|
||||
bitmap_tree->offset = big_page_size / PAGE_SIZE - (big_pages_region.first - max_prefetch_region.first);
|
||||
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region) + bitmap_tree->offset;
|
||||
|
||||
uvm_page_mask_shift_left(&prefetch_info->bitmap_tree.pages,
|
||||
&prefetch_info->bitmap_tree.pages,
|
||||
prefetch_info->region.first);
|
||||
UVM_ASSERT(bitmap_tree->offset < big_page_size / PAGE_SIZE);
|
||||
UVM_ASSERT(bitmap_tree->leaf_count <= PAGES_PER_UVM_VA_BLOCK);
|
||||
|
||||
uvm_page_mask_shift_left(&bitmap_tree->pages, &bitmap_tree->pages, bitmap_tree->offset);
|
||||
}
|
||||
else {
|
||||
prefetch_info->region.first = 0;
|
||||
bitmap_tree->offset = 0;
|
||||
bitmap_tree->leaf_count = uvm_va_block_region_num_pages(max_prefetch_region);
|
||||
}
|
||||
|
||||
prefetch_info->region.outer = prefetch_info->region.first + uvm_va_block_num_cpu_pages(va_block);
|
||||
bitmap_tree->level_count = ilog2(roundup_pow_of_two(bitmap_tree->leaf_count)) + 1;
|
||||
|
||||
thrashing_pages = uvm_perf_thrashing_get_thrashing_pages(va_block);
|
||||
|
||||
// Assume big pages by default. Prefetch the rest of 4KB subregions within the big page
|
||||
// region unless there is thrashing.
|
||||
grow_fault_granularity(va_block, prefetch_info, faulted_pages, thrashing_pages);
|
||||
// Assume big pages by default. Prefetch the rest of 4KB subregions within
|
||||
// the big page region unless there is thrashing.
|
||||
grow_fault_granularity(bitmap_tree,
|
||||
big_page_size,
|
||||
big_pages_region,
|
||||
max_prefetch_region,
|
||||
faulted_pages,
|
||||
thrashing_pages);
|
||||
|
||||
// Do not compute prefetch regions with faults on pages that are thrashing
|
||||
if (thrashing_pages)
|
||||
uvm_page_mask_andnot(&prefetch_info->migrate_pages, faulted_pages, thrashing_pages);
|
||||
uvm_page_mask_andnot(&va_block_context->scratch_page_mask, faulted_pages, thrashing_pages);
|
||||
else
|
||||
uvm_page_mask_copy(&prefetch_info->migrate_pages, faulted_pages);
|
||||
uvm_page_mask_copy(&va_block_context->scratch_page_mask, faulted_pages);
|
||||
|
||||
// Update the tree using the migration mask to compute the pages to prefetch
|
||||
uvm_page_mask_zero(&prefetch_info->prefetch_pages);
|
||||
for_each_va_block_page_in_region_mask(page_index, &prefetch_info->migrate_pages, region) {
|
||||
uvm_va_block_region_t prefetch_region = compute_prefetch_region(page_index + prefetch_info->region.first,
|
||||
prefetch_info);
|
||||
uvm_page_mask_region_fill(&prefetch_info->prefetch_pages, prefetch_region);
|
||||
// Update the tree using the scratch mask to compute the pages to prefetch
|
||||
for_each_va_block_page_in_region_mask(page_index, &va_block_context->scratch_page_mask, faulted_region) {
|
||||
uvm_va_block_region_t region = compute_prefetch_region(page_index, bitmap_tree, max_prefetch_region);
|
||||
|
||||
uvm_page_mask_region_fill(prefetch_pages, region);
|
||||
|
||||
// Early out if we have already prefetched until the end of the VA block
|
||||
if (prefetch_region.outer == prefetch_info->region.outer)
|
||||
if (region.outer == max_prefetch_region.outer)
|
||||
break;
|
||||
}
|
||||
|
||||
// Adjust prefetching page mask
|
||||
if (prefetch_info->region.first > 0) {
|
||||
uvm_page_mask_shift_right(&prefetch_info->prefetch_pages,
|
||||
&prefetch_info->prefetch_pages,
|
||||
prefetch_info->region.first);
|
||||
}
|
||||
|
||||
done:
|
||||
// Do not prefetch pages that are going to be migrated/populated due to a
|
||||
// fault
|
||||
uvm_page_mask_andnot(&prefetch_info->prefetch_pages,
|
||||
&prefetch_info->prefetch_pages,
|
||||
faulted_pages);
|
||||
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, faulted_pages);
|
||||
|
||||
// TODO: Bug 1765432: prefetching pages that are already mapped on the CPU
|
||||
// would trigger a remap, which may cause a large overhead. Therefore,
|
||||
// exclude them from the mask.
|
||||
if (UVM_ID_IS_CPU(new_residency)) {
|
||||
// For HMM, we don't know what pages are mapped by the CPU unless we try to
|
||||
// migrate them. Prefetch pages will only be opportunistically migrated.
|
||||
if (UVM_ID_IS_CPU(new_residency) && !uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_page_mask_and(&va_block_context->scratch_page_mask,
|
||||
resident_mask,
|
||||
&va_block->cpu.pte_bits[UVM_PTE_BITS_CPU_READ]);
|
||||
uvm_page_mask_andnot(&prefetch_info->prefetch_pages,
|
||||
&prefetch_info->prefetch_pages,
|
||||
&va_block_context->scratch_page_mask);
|
||||
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, &va_block_context->scratch_page_mask);
|
||||
}
|
||||
|
||||
// Avoid prefetching pages that are thrashing
|
||||
if (thrashing_pages) {
|
||||
uvm_page_mask_andnot(&prefetch_info->prefetch_pages,
|
||||
&prefetch_info->prefetch_pages,
|
||||
thrashing_pages);
|
||||
}
|
||||
if (thrashing_pages)
|
||||
uvm_page_mask_andnot(prefetch_pages, prefetch_pages, thrashing_pages);
|
||||
|
||||
prefetch_info->fault_migrations_to_last_proc += uvm_page_mask_region_weight(faulted_pages, region);
|
||||
prefetch_info->pending_prefetch_pages = uvm_page_mask_weight(&prefetch_info->prefetch_pages);
|
||||
va_block->prefetch_info.fault_migrations_to_last_proc += uvm_page_mask_region_weight(faulted_pages, faulted_region);
|
||||
|
||||
return uvm_page_mask_weight(prefetch_pages);
|
||||
}
|
||||
|
||||
uvm_perf_prefetch_hint_t uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
|
||||
const uvm_page_mask_t *new_residency_mask)
|
||||
void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_perf_prefetch_hint_t *out_hint)
|
||||
{
|
||||
uvm_perf_prefetch_hint_t ret = UVM_PERF_PREFETCH_HINT_NONE();
|
||||
block_prefetch_info_t *prefetch_info;
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_page_mask_t *prefetch_pages = &out_hint->prefetch_pages_mask;
|
||||
NvU32 pending_prefetch_pages;
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, faulted_region));
|
||||
UVM_ASSERT(uvm_hmm_va_block_context_vma_is_valid(va_block, va_block_context, faulted_region));
|
||||
|
||||
out_hint->residency = UVM_ID_INVALID;
|
||||
|
||||
if (!g_uvm_perf_prefetch_enable)
|
||||
return ret;
|
||||
return;
|
||||
|
||||
if (!va_space->test.page_prefetch_enabled)
|
||||
return ret;
|
||||
return;
|
||||
|
||||
prefetch_info = prefetch_info_get(va_block);
|
||||
if (!prefetch_info)
|
||||
return ret;
|
||||
pending_prefetch_pages = uvm_perf_prefetch_prenotify_fault_migrations(va_block,
|
||||
va_block_context,
|
||||
new_residency,
|
||||
faulted_pages,
|
||||
faulted_region,
|
||||
prefetch_pages,
|
||||
bitmap_tree);
|
||||
|
||||
if (prefetch_info->fault_migrations_to_last_proc >= g_uvm_perf_prefetch_min_faults &&
|
||||
prefetch_info->pending_prefetch_pages > 0) {
|
||||
if (va_block->prefetch_info.fault_migrations_to_last_proc >= g_uvm_perf_prefetch_min_faults &&
|
||||
pending_prefetch_pages > 0) {
|
||||
bool changed = false;
|
||||
uvm_range_group_range_t *rgr;
|
||||
|
||||
@ -402,62 +410,19 @@ uvm_perf_prefetch_hint_t uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
|
||||
max(rgr->node.start, va_block->start),
|
||||
min(rgr->node.end, va_block->end));
|
||||
|
||||
if (uvm_page_mask_region_empty(new_residency_mask, region) &&
|
||||
!uvm_page_mask_region_empty(&prefetch_info->prefetch_pages, region)) {
|
||||
uvm_page_mask_region_clear(&prefetch_info->prefetch_pages, region);
|
||||
if (uvm_page_mask_region_empty(faulted_pages, region) &&
|
||||
!uvm_page_mask_region_empty(prefetch_pages, region)) {
|
||||
uvm_page_mask_region_clear(prefetch_pages, region);
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (changed)
|
||||
prefetch_info->pending_prefetch_pages = uvm_page_mask_weight(&prefetch_info->prefetch_pages);
|
||||
pending_prefetch_pages = uvm_page_mask_weight(prefetch_pages);
|
||||
|
||||
if (prefetch_info->pending_prefetch_pages > 0) {
|
||||
ret.residency = prefetch_info->last_migration_proc_id;
|
||||
ret.prefetch_pages_mask = &prefetch_info->prefetch_pages;
|
||||
}
|
||||
if (pending_prefetch_pages > 0)
|
||||
out_hint->residency = va_block->prefetch_info.last_migration_proc_id;
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void prefetch_block_destroy_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_data)
|
||||
{
|
||||
uvm_va_block_t *va_block;
|
||||
|
||||
UVM_ASSERT(g_uvm_perf_prefetch_enable);
|
||||
|
||||
UVM_ASSERT(event_id == UVM_PERF_EVENT_BLOCK_DESTROY ||
|
||||
event_id == UVM_PERF_EVENT_MODULE_UNLOAD ||
|
||||
event_id == UVM_PERF_EVENT_BLOCK_SHRINK);
|
||||
|
||||
if (event_id == UVM_PERF_EVENT_BLOCK_DESTROY)
|
||||
va_block = event_data->block_destroy.block;
|
||||
else if (event_id == UVM_PERF_EVENT_BLOCK_SHRINK)
|
||||
va_block = event_data->block_shrink.block;
|
||||
else
|
||||
va_block = event_data->module_unload.block;
|
||||
|
||||
if (!va_block)
|
||||
return;
|
||||
|
||||
prefetch_info_destroy(va_block);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_perf_prefetch_load(uvm_va_space_t *va_space)
|
||||
{
|
||||
if (!g_uvm_perf_prefetch_enable)
|
||||
return NV_OK;
|
||||
|
||||
return uvm_perf_module_load(&g_module_prefetch, va_space);
|
||||
}
|
||||
|
||||
void uvm_perf_prefetch_unload(uvm_va_space_t *va_space)
|
||||
{
|
||||
if (!g_uvm_perf_prefetch_enable)
|
||||
return;
|
||||
|
||||
uvm_perf_module_unload(&g_module_prefetch, va_space);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_perf_prefetch_init()
|
||||
@ -467,13 +432,6 @@ NV_STATUS uvm_perf_prefetch_init()
|
||||
if (!g_uvm_perf_prefetch_enable)
|
||||
return NV_OK;
|
||||
|
||||
uvm_perf_module_init("perf_prefetch", UVM_PERF_MODULE_TYPE_PREFETCH, g_callbacks_prefetch,
|
||||
ARRAY_SIZE(g_callbacks_prefetch), &g_module_prefetch);
|
||||
|
||||
g_prefetch_info_cache = NV_KMEM_CACHE_CREATE("block_prefetch_info_t", block_prefetch_info_t);
|
||||
if (!g_prefetch_info_cache)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (uvm_perf_prefetch_threshold <= 100) {
|
||||
g_uvm_perf_prefetch_threshold = uvm_perf_prefetch_threshold;
|
||||
}
|
||||
@ -498,14 +456,6 @@ NV_STATUS uvm_perf_prefetch_init()
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_perf_prefetch_exit()
|
||||
{
|
||||
if (!g_uvm_perf_prefetch_enable)
|
||||
return;
|
||||
|
||||
kmem_cache_destroy_safe(&g_prefetch_info_cache);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_set_page_prefetch_policy(UVM_TEST_SET_PAGE_PREFETCH_POLICY_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2019 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -30,32 +30,66 @@
|
||||
|
||||
typedef struct
|
||||
{
|
||||
const uvm_page_mask_t *prefetch_pages_mask;
|
||||
uvm_page_mask_t prefetch_pages_mask;
|
||||
|
||||
uvm_processor_id_t residency;
|
||||
} uvm_perf_prefetch_hint_t;
|
||||
|
||||
// Global initialization/cleanup functions
|
||||
// Encapsulates a counter tree built on top of a page mask bitmap in which each
|
||||
// leaf represents a page in the block. It contains leaf_count and level_count
|
||||
// so that it can use some macros for perf trees.
|
||||
typedef struct
|
||||
{
|
||||
uvm_page_mask_t pages;
|
||||
|
||||
uvm_page_index_t offset;
|
||||
|
||||
NvU16 leaf_count;
|
||||
|
||||
NvU8 level_count;
|
||||
} uvm_perf_prefetch_bitmap_tree_t;
|
||||
|
||||
// Iterator for the bitmap tree. It contains level_idx and node_idx so that it
|
||||
// can use some macros for perf trees.
|
||||
typedef struct
|
||||
{
|
||||
s8 level_idx;
|
||||
|
||||
uvm_page_index_t node_idx;
|
||||
} uvm_perf_prefetch_bitmap_tree_iter_t;
|
||||
|
||||
// Global initialization function (no clean up needed).
|
||||
NV_STATUS uvm_perf_prefetch_init(void);
|
||||
void uvm_perf_prefetch_exit(void);
|
||||
|
||||
// VA space Initialization/cleanup functions
|
||||
NV_STATUS uvm_perf_prefetch_load(uvm_va_space_t *va_space);
|
||||
void uvm_perf_prefetch_unload(uvm_va_space_t *va_space);
|
||||
// Return a hint with the pages that may be prefetched in the block.
|
||||
// The faulted_pages mask and faulted_region are the pages being migrated to
|
||||
// the given residency.
|
||||
// va_block_context must not be NULL, va_block_context->policy must be valid,
|
||||
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
|
||||
// which also means the va_block_context->mm is not NULL, retained, and locked
|
||||
// for at least read.
|
||||
// Locking: The caller must hold the va_space lock and va_block lock.
|
||||
void uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *faulted_pages,
|
||||
uvm_va_block_region_t faulted_region,
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_perf_prefetch_hint_t *out_hint);
|
||||
|
||||
// Obtain a hint with the pages that may be prefetched in the block
|
||||
uvm_perf_prefetch_hint_t uvm_perf_prefetch_get_hint(uvm_va_block_t *va_block,
|
||||
const uvm_page_mask_t *new_residency_mask);
|
||||
void uvm_perf_prefetch_bitmap_tree_iter_init(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_perf_prefetch_bitmap_tree_iter_t *iter);
|
||||
uvm_va_block_region_t uvm_perf_prefetch_bitmap_tree_iter_get_range(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
const uvm_perf_prefetch_bitmap_tree_iter_t *iter);
|
||||
NvU16 uvm_perf_prefetch_bitmap_tree_iter_get_count(const uvm_perf_prefetch_bitmap_tree_t *bitmap_tree,
|
||||
const uvm_perf_prefetch_bitmap_tree_iter_t *iter);
|
||||
|
||||
// Notify that the given mask of pages within region is going to migrate to
|
||||
// the given residency. The caller must hold the va_space lock.
|
||||
void uvm_perf_prefetch_prenotify_fault_migrations(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t new_residency,
|
||||
const uvm_page_mask_t *migrate_pages,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
#define UVM_PERF_PREFETCH_HINT_NONE() \
|
||||
(uvm_perf_prefetch_hint_t){ NULL, UVM_ID_INVALID }
|
||||
#define uvm_perf_prefetch_bitmap_tree_traverse_counters(counter,tree,page,iter) \
|
||||
for (uvm_perf_prefetch_bitmap_tree_iter_init((tree), (page), (iter)), \
|
||||
(counter) = uvm_perf_prefetch_bitmap_tree_iter_get_count((tree), (iter)); \
|
||||
(iter)->level_idx >= 0; \
|
||||
(counter) = --(iter)->level_idx < 0? 0: \
|
||||
uvm_perf_prefetch_bitmap_tree_iter_get_count((tree), (iter)))
|
||||
|
||||
#endif
|
||||
|
@ -458,7 +458,7 @@ static void cpu_thrashing_stats_exit(void)
|
||||
{
|
||||
if (g_cpu_thrashing_stats.procfs_file) {
|
||||
UVM_ASSERT(uvm_procfs_is_debug_enabled());
|
||||
uvm_procfs_destroy_entry(g_cpu_thrashing_stats.procfs_file);
|
||||
proc_remove(g_cpu_thrashing_stats.procfs_file);
|
||||
g_cpu_thrashing_stats.procfs_file = NULL;
|
||||
}
|
||||
}
|
||||
@ -522,7 +522,7 @@ static void gpu_thrashing_stats_destroy(uvm_gpu_t *gpu)
|
||||
uvm_perf_module_type_unset_data(gpu->perf_modules_data, UVM_PERF_MODULE_TYPE_THRASHING);
|
||||
|
||||
if (gpu_thrashing->procfs_file)
|
||||
uvm_procfs_destroy_entry(gpu_thrashing->procfs_file);
|
||||
proc_remove(gpu_thrashing->procfs_file);
|
||||
|
||||
uvm_kvfree(gpu_thrashing);
|
||||
}
|
||||
@ -652,7 +652,6 @@ done:
|
||||
|
||||
static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 address, NvU64 bytes);
|
||||
|
||||
// Destroy the thrashing detection struct for the given block
|
||||
void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block)
|
||||
{
|
||||
block_thrashing_info_t *block_thrashing = thrashing_info_get(va_block);
|
||||
@ -1066,11 +1065,11 @@ static void thrashing_reset_pages_in_region(uvm_va_block_t *va_block, NvU64 addr
|
||||
|
||||
// Unmap remote mappings from the given processors on the pinned pages
|
||||
// described by region and block_thrashing->pinned pages.
|
||||
static NV_STATUS unmap_remote_pinned_pages_from_processors(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
block_thrashing_info_t *block_thrashing,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_processor_mask_t *unmap_processors)
|
||||
static NV_STATUS unmap_remote_pinned_pages(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
block_thrashing_info_t *block_thrashing,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_processor_mask_t *unmap_processors)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS tracker_status;
|
||||
@ -1116,17 +1115,16 @@ static NV_STATUS unmap_remote_pinned_pages_from_processors(uvm_va_block_t *va_bl
|
||||
return status;
|
||||
}
|
||||
|
||||
// Unmap remote mappings from all processors on the pinned pages
|
||||
// described by region and block_thrashing->pinned pages.
|
||||
NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
block_thrashing_info_t *block_thrashing;
|
||||
uvm_processor_mask_t unmap_processors;
|
||||
uvm_va_policy_t *policy;
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));
|
||||
|
||||
block_thrashing = thrashing_info_get(va_block);
|
||||
if (!block_thrashing || !block_thrashing->pages)
|
||||
@ -1137,15 +1135,9 @@ NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block
|
||||
|
||||
// Unmap all mapped processors (that are not SetAccessedBy) with
|
||||
// no copy of the page
|
||||
policy = uvm_va_policy_get(va_block, uvm_va_block_region_start(va_block, region));
|
||||
|
||||
uvm_processor_mask_andnot(&unmap_processors, &va_block->mapped, &policy->accessed_by);
|
||||
|
||||
return unmap_remote_pinned_pages_from_processors(va_block,
|
||||
va_block_context,
|
||||
block_thrashing,
|
||||
region,
|
||||
&unmap_processors);
|
||||
return unmap_remote_pinned_pages(va_block, va_block_context, block_thrashing, region, &unmap_processors);
|
||||
}
|
||||
|
||||
// Check that we are not migrating pages away from its pinned location and
|
||||
@ -1246,7 +1238,7 @@ void thrashing_event_cb(uvm_perf_event_t event_id, uvm_perf_event_data_t *event_
|
||||
if (!va_space_thrashing->params.enable)
|
||||
return;
|
||||
|
||||
// TODO: Bug 2046423: HMM will need to look up the policy when
|
||||
// TODO: Bug 3660922: HMM will need to look up the policy when
|
||||
// read duplication is supported.
|
||||
read_duplication = uvm_va_block_is_hmm(va_block) ?
|
||||
UVM_READ_DUPLICATION_UNSET :
|
||||
@ -1796,6 +1788,7 @@ static void thrashing_unpin_pages(struct work_struct *work)
|
||||
struct delayed_work *dwork = to_delayed_work(work);
|
||||
va_space_thrashing_info_t *va_space_thrashing = container_of(dwork, va_space_thrashing_info_t, pinned_pages.dwork);
|
||||
uvm_va_space_t *va_space = va_space_thrashing->va_space;
|
||||
uvm_va_block_context_t *va_block_context = &va_space_thrashing->pinned_pages.va_block_context;
|
||||
|
||||
UVM_ASSERT(uvm_va_space_initialized(va_space) == NV_OK);
|
||||
|
||||
@ -1857,12 +1850,13 @@ static void thrashing_unpin_pages(struct work_struct *work)
|
||||
UVM_ASSERT(block_thrashing);
|
||||
UVM_ASSERT(uvm_page_mask_test(&block_thrashing->pinned_pages.mask, page_index));
|
||||
|
||||
va_space_thrashing->pinned_pages.va_block_context.policy =
|
||||
uvm_va_block_context_init(va_block_context, NULL);
|
||||
va_block_context->policy =
|
||||
uvm_va_policy_get(va_block, uvm_va_block_cpu_page_address(va_block, page_index));
|
||||
|
||||
unmap_remote_pinned_pages_from_all_processors(va_block,
|
||||
&va_space_thrashing->pinned_pages.va_block_context,
|
||||
uvm_va_block_region_for_page(page_index));
|
||||
uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
|
||||
va_block_context,
|
||||
uvm_va_block_region_for_page(page_index));
|
||||
thrashing_reset_page(va_space_thrashing, va_block, block_thrashing, page_index);
|
||||
}
|
||||
|
||||
@ -2105,11 +2099,10 @@ NV_STATUS uvm_test_set_page_thrashing_policy(UVM_TEST_SET_PAGE_THRASHING_POLICY_
|
||||
|
||||
// Unmap may split PTEs and require a retry. Needs to be called
|
||||
// before the pinned pages information is destroyed.
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
NULL,
|
||||
unmap_remote_pinned_pages_from_all_processors(va_block,
|
||||
block_context,
|
||||
va_block_region));
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, NULL,
|
||||
uvm_perf_thrashing_unmap_remote_pinned_pages_all(va_block,
|
||||
block_context,
|
||||
va_block_region));
|
||||
|
||||
uvm_perf_thrashing_info_destroy(va_block);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2019 NVIDIA Corporation
|
||||
Copyright (c) 2016-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -108,8 +108,11 @@ void uvm_perf_thrashing_info_destroy(uvm_va_block_t *va_block);
|
||||
|
||||
// Unmap remote mappings from all processors on the pinned pages
|
||||
// described by region and block_thrashing->pinned pages.
|
||||
NV_STATUS unmap_remote_pinned_pages_from_all_processors(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid() in uvm_va_block.h.
|
||||
// Locking: the va_block lock must be held.
|
||||
NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -23,6 +23,7 @@
|
||||
|
||||
#include "uvm_perf_utils.h"
|
||||
#include "uvm_va_block.h"
|
||||
#include "uvm_perf_prefetch.h"
|
||||
#include "uvm_test.h"
|
||||
|
||||
static NV_STATUS test_saturating_counter_basic(void)
|
||||
@ -681,10 +682,12 @@ fail:
|
||||
static NV_STATUS test_bitmap_tree_traversal(void)
|
||||
{
|
||||
int value;
|
||||
uvm_va_block_bitmap_tree_t tree;
|
||||
uvm_va_block_bitmap_tree_iter_t iter;
|
||||
uvm_perf_prefetch_bitmap_tree_t tree;
|
||||
uvm_perf_prefetch_bitmap_tree_iter_t iter;
|
||||
|
||||
uvm_va_block_bitmap_tree_init_from_page_count(&tree, 9);
|
||||
tree.leaf_count = 9;
|
||||
tree.level_count = ilog2(roundup_pow_of_two(tree.leaf_count)) + 1;
|
||||
uvm_page_mask_zero(&tree.pages);
|
||||
|
||||
TEST_CHECK_RET(tree.level_count == 5);
|
||||
TEST_CHECK_RET(tree.leaf_count == 9);
|
||||
@ -695,7 +698,7 @@ static NV_STATUS test_bitmap_tree_traversal(void)
|
||||
uvm_page_mask_set(&tree.pages, 7);
|
||||
uvm_page_mask_set(&tree.pages, 8);
|
||||
|
||||
uvm_va_block_bitmap_tree_traverse_counters(value, &tree, 6, &iter) {
|
||||
uvm_perf_prefetch_bitmap_tree_traverse_counters(value, &tree, 6, &iter) {
|
||||
if (iter.level_idx == 4)
|
||||
TEST_CHECK_RET(value == 0);
|
||||
else if (iter.level_idx == 3)
|
||||
|
@ -591,19 +591,16 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
static NV_STATUS pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_gpu_memory_type_t memory_type,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
uvm_gpu_t *gpu = uvm_pmm_to_gpu(pmm);
|
||||
NV_STATUS status;
|
||||
size_t i;
|
||||
uvm_pmm_gpu_memory_type_t memory_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL;
|
||||
|
||||
status = uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, memory_type, flags, chunks, out_tracker);
|
||||
NV_STATUS status = uvm_pmm_gpu_alloc(pmm, num_chunks, chunk_size, memory_type, flags, chunks, out_tracker);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -618,6 +615,18 @@ NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_pmm_gpu_alloc_kernel(uvm_pmm_gpu_t *pmm,
|
||||
size_t num_chunks,
|
||||
uvm_chunk_size_t chunk_size,
|
||||
uvm_pmm_alloc_flags_t flags,
|
||||
uvm_gpu_chunk_t **chunks,
|
||||
uvm_tracker_t *out_tracker)
|
||||
{
|
||||
uvm_pmm_gpu_memory_type_t memory_type = UVM_PMM_GPU_MEMORY_TYPE_KERNEL;
|
||||
|
||||
return pmm_gpu_alloc_kernel(pmm, num_chunks, chunk_size, memory_type, flags, chunks, out_tracker);
|
||||
}
|
||||
|
||||
static void chunk_update_lists_locked(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chunk)
|
||||
{
|
||||
uvm_gpu_root_chunk_t *root_chunk = root_chunk_from_chunk(pmm, chunk);
|
||||
@ -1174,7 +1183,7 @@ static void root_chunk_unmap_indirect_peer(uvm_pmm_gpu_t *pmm, uvm_gpu_root_chun
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(uvm_global_get_status() != NV_OK);
|
||||
|
||||
uvm_gpu_unmap_cpu_pages(other_gpu, indirect_peer->dma_addrs[index], UVM_CHUNK_SIZE_MAX);
|
||||
uvm_gpu_unmap_cpu_pages(other_gpu->parent, indirect_peer->dma_addrs[index], UVM_CHUNK_SIZE_MAX);
|
||||
uvm_processor_mask_clear(&root_chunk->indirect_peers_mapped, other_gpu->id);
|
||||
new_count = atomic64_dec_return(&indirect_peer->map_count);
|
||||
UVM_ASSERT(new_count >= 0);
|
||||
@ -1304,7 +1313,7 @@ NV_STATUS uvm_pmm_gpu_indirect_peer_map(uvm_pmm_gpu_t *pmm, uvm_gpu_chunk_t *chu
|
||||
root_chunk_lock(pmm, root_chunk);
|
||||
|
||||
if (!uvm_processor_mask_test(&root_chunk->indirect_peers_mapped, accessing_gpu->id)) {
|
||||
status = uvm_gpu_map_cpu_pages(accessing_gpu,
|
||||
status = uvm_gpu_map_cpu_pages(accessing_gpu->parent,
|
||||
uvm_gpu_chunk_to_page(pmm, &root_chunk->chunk),
|
||||
UVM_CHUNK_SIZE_MAX,
|
||||
&indirect_peer->dma_addrs[index]);
|
||||
@ -2705,7 +2714,8 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_pages(void *void_pmm,
|
||||
NvU64 *pages,
|
||||
NvU32 num_pages_to_evict,
|
||||
NvU64 phys_start,
|
||||
NvU64 phys_end)
|
||||
NvU64 phys_end,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_pmm_gpu_t *pmm = (uvm_pmm_gpu_t *)void_pmm;
|
||||
@ -2804,14 +2814,15 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_pages_wrapper(void *void_pmm,
|
||||
NvU64 *pages,
|
||||
NvU32 num_pages_to_evict,
|
||||
NvU64 phys_start,
|
||||
NvU64 phys_end)
|
||||
NvU64 phys_end,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
// RM invokes the eviction callbacks with its API lock held, but not its GPU
|
||||
// lock.
|
||||
uvm_record_lock_rm_api();
|
||||
status = uvm_pmm_gpu_pma_evict_pages(void_pmm, page_size, pages, num_pages_to_evict, phys_start, phys_end);
|
||||
status = uvm_pmm_gpu_pma_evict_pages(void_pmm, page_size, pages, num_pages_to_evict, phys_start, phys_end, mem_type);
|
||||
uvm_record_unlock_rm_api();
|
||||
return status;
|
||||
}
|
||||
@ -2821,19 +2832,24 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_pages_wrapper_entry(void *void_pmm,
|
||||
NvU64 *pages,
|
||||
NvU32 num_pages_to_evict,
|
||||
NvU64 phys_start,
|
||||
NvU64 phys_end)
|
||||
NvU64 phys_end,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_pmm_gpu_pma_evict_pages_wrapper(void_pmm,
|
||||
page_size,
|
||||
pages,
|
||||
num_pages_to_evict,
|
||||
phys_start,
|
||||
phys_end));
|
||||
phys_end,
|
||||
mem_type));
|
||||
}
|
||||
|
||||
// See the documentation of pmaEvictRangeCb_t in pma.h for details of the
|
||||
// expected semantics.
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm, NvU64 phys_begin, NvU64 phys_end)
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm,
|
||||
NvU64 phys_begin,
|
||||
NvU64 phys_end,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_pmm_gpu_t *pmm = (uvm_pmm_gpu_t *)void_pmm;
|
||||
@ -2922,21 +2938,27 @@ static NV_STATUS uvm_pmm_gpu_pma_evict_range(void *void_pmm, NvU64 phys_begin, N
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_range_wrapper(void *void_pmm, NvU64 phys_begin, NvU64 phys_end)
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_range_wrapper(void *void_pmm,
|
||||
NvU64 phys_begin,
|
||||
NvU64 phys_end,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type)
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
// RM invokes the eviction callbacks with its API lock held, but not its GPU
|
||||
// lock.
|
||||
uvm_record_lock_rm_api();
|
||||
status = uvm_pmm_gpu_pma_evict_range(void_pmm, phys_begin, phys_end);
|
||||
status = uvm_pmm_gpu_pma_evict_range(void_pmm, phys_begin, phys_end, mem_type);
|
||||
uvm_record_unlock_rm_api();
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_range_wrapper_entry(void *void_pmm, NvU64 phys_begin, NvU64 phys_end)
|
||||
static NV_STATUS uvm_pmm_gpu_pma_evict_range_wrapper_entry(void *void_pmm,
|
||||
NvU64 phys_begin,
|
||||
NvU64 phys_end,
|
||||
UVM_PMA_GPU_MEMORY_TYPE mem_type)
|
||||
{
|
||||
UVM_ENTRY_RET(uvm_pmm_gpu_pma_evict_range_wrapper(void_pmm, phys_begin, phys_end));
|
||||
UVM_ENTRY_RET(uvm_pmm_gpu_pma_evict_range_wrapper(void_pmm, phys_begin, phys_end, mem_type));
|
||||
}
|
||||
|
||||
static void deinit_chunk_split_cache(uvm_pmm_gpu_t *pmm)
|
||||
@ -3420,12 +3442,13 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
|
||||
params->evicted_physical_address = 0;
|
||||
params->chunk_size_backing_virtual = 0;
|
||||
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu = uvm_va_space_get_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
|
||||
if (!gpu || !uvm_gpu_supports_eviction(gpu)) {
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
}
|
||||
pmm = &gpu->pmm;
|
||||
@ -3436,13 +3459,24 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
|
||||
// For virtual mode, look up and retain the block first so that eviction can
|
||||
// be started without the VA space lock held.
|
||||
if (params->eviction_mode == UvmTestEvictModeVirtual) {
|
||||
status = uvm_va_block_find_create(va_space, mm, params->address, NULL, &block);
|
||||
if (status != NV_OK) {
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
block_context = uvm_va_block_context_alloc(mm);
|
||||
if (!block_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = uvm_va_block_find_create(va_space, params->address, block_context, &block);
|
||||
uvm_va_block_context_free(block_context);
|
||||
if (status != NV_OK) {
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Retain the block before unlocking the VA space lock so that we can
|
||||
// safely access it later.
|
||||
uvm_va_block_retain(block);
|
||||
@ -3451,7 +3485,7 @@ NV_STATUS uvm_test_evict_chunk(UVM_TEST_EVICT_CHUNK_PARAMS *params, struct file
|
||||
// Unlock the VA space to emulate real eviction better where a VA space lock
|
||||
// may not be held or may be held for a different VA space.
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
|
||||
|
||||
if (params->eviction_mode == UvmTestEvictModeVirtual) {
|
||||
UVM_ASSERT(block);
|
||||
|
@ -428,10 +428,10 @@ uvm_chunk_sizes_mask_t uvm_cpu_chunk_get_allocation_sizes(void)
|
||||
return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
|
||||
}
|
||||
|
||||
static void uvm_cpu_chunk_set_phys_size(uvm_cpu_chunk_t *chunk, uvm_chunk_size_t size)
|
||||
static void uvm_cpu_chunk_set_size(uvm_cpu_chunk_t *chunk, uvm_chunk_size_t size)
|
||||
{
|
||||
#if !UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
|
||||
chunk->log2_phys_size = ilog2(size);
|
||||
chunk->log2_size = ilog2(size);
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -440,13 +440,7 @@ uvm_chunk_size_t uvm_cpu_chunk_get_size(uvm_cpu_chunk_t *chunk)
|
||||
#if UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
|
||||
return PAGE_SIZE;
|
||||
#else
|
||||
uvm_chunk_size_t chunk_size;
|
||||
|
||||
UVM_ASSERT(chunk);
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_phys_size(chunk));
|
||||
chunk_size = uvm_va_block_region_size(chunk->region);
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_phys_size(chunk) >= chunk_size);
|
||||
return chunk_size;
|
||||
return ((uvm_chunk_size_t)1) << chunk->log2_size;
|
||||
#endif
|
||||
}
|
||||
|
||||
@ -1036,8 +1030,7 @@ void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *
|
||||
return;
|
||||
};
|
||||
|
||||
uvm_page_mask_region_clear(&va_block->cpu.allocated,
|
||||
uvm_va_block_region(page_index, page_index + uvm_cpu_chunk_num_pages(chunk)));
|
||||
uvm_page_mask_region_clear(&va_block->cpu.allocated, chunk->region);
|
||||
|
||||
if (uvm_page_mask_empty(&va_block->cpu.allocated)) {
|
||||
if (UVM_CPU_STORAGE_GET_TYPE(va_block) != UVM_CPU_CHUNK_STORAGE_CHUNK)
|
||||
@ -1191,7 +1184,7 @@ NV_STATUS uvm_cpu_chunk_alloc(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
chunk->page = page;
|
||||
uvm_cpu_chunk_set_phys_size(chunk, alloc_size);
|
||||
uvm_cpu_chunk_set_size(chunk, alloc_size);
|
||||
chunk->region = region;
|
||||
nv_kref_init(&chunk->refcount);
|
||||
uvm_spin_lock_init(&chunk->lock, UVM_LOCK_ORDER_LEAF);
|
||||
@ -1224,13 +1217,15 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_chunk_size_t new_size)
|
||||
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_chunk_size_t new_size,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_cpu_chunk_t **new_chunks)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS insert_status;
|
||||
uvm_cpu_chunk_t *new_chunk;
|
||||
uvm_page_index_t running_page_index = chunk->region.first;
|
||||
uvm_page_index_t next_page_index;
|
||||
uvm_page_index_t running_page_index = page_index;
|
||||
size_t num_new_chunks;
|
||||
size_t num_subchunk_pages;
|
||||
size_t i;
|
||||
@ -1238,21 +1233,13 @@ NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk,
|
||||
UVM_ASSERT(chunk);
|
||||
UVM_ASSERT(is_power_of_2(new_size));
|
||||
UVM_ASSERT(new_size < uvm_cpu_chunk_get_size(chunk));
|
||||
UVM_ASSERT(new_chunks);
|
||||
|
||||
// We subtract 1 from the computed number of subchunks because we always
|
||||
// keep the original chunk as the first in the block's list. This is so we
|
||||
// don't lose the physical chunk.
|
||||
// All new subchunks will point to the original chunk as their parent.
|
||||
num_new_chunks = (uvm_cpu_chunk_get_size(chunk) / new_size) - 1;
|
||||
num_new_chunks = uvm_cpu_chunk_get_size(chunk) / new_size;
|
||||
num_subchunk_pages = new_size / PAGE_SIZE;
|
||||
running_page_index += num_subchunk_pages;
|
||||
|
||||
// Remove the existing chunk from the block first. We re-insert it after
|
||||
// the split.
|
||||
uvm_cpu_chunk_remove_from_block(va_block, chunk, chunk->region.first);
|
||||
|
||||
for (i = 0; i < num_new_chunks; i++) {
|
||||
uvm_page_index_t relative_page_index = running_page_index - chunk->region.first;
|
||||
uvm_page_index_t relative_page_index = running_page_index - page_index;
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
new_chunk = uvm_kvmalloc_zero(sizeof(*new_chunk));
|
||||
@ -1264,10 +1251,10 @@ NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk,
|
||||
new_chunk->page = chunk->page + relative_page_index;
|
||||
new_chunk->offset = chunk->offset + relative_page_index;
|
||||
new_chunk->region = uvm_va_block_region(running_page_index, running_page_index + num_subchunk_pages);
|
||||
uvm_cpu_chunk_set_phys_size(new_chunk, new_size);
|
||||
uvm_cpu_chunk_set_size(new_chunk, new_size);
|
||||
nv_kref_init(&new_chunk->refcount);
|
||||
|
||||
// This lock is unused for logical blocks but initialize it for
|
||||
// This lock is unused for logical chunks but initialize it for
|
||||
// consistency.
|
||||
uvm_spin_lock_init(&new_chunk->lock, UVM_LOCK_ORDER_LEAF);
|
||||
new_chunk->parent = chunk;
|
||||
@ -1286,109 +1273,64 @@ NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk,
|
||||
parent_dma_addr + (relative_page_index * PAGE_SIZE));
|
||||
}
|
||||
|
||||
status = uvm_cpu_chunk_insert_in_block(va_block, new_chunk, new_chunk->region.first);
|
||||
if (status != NV_OK) {
|
||||
uvm_cpu_chunk_put(new_chunk);
|
||||
goto error;
|
||||
}
|
||||
|
||||
new_chunks[i] = new_chunk;
|
||||
running_page_index += num_subchunk_pages;
|
||||
}
|
||||
|
||||
chunk->region = uvm_va_block_region(chunk->region.first, chunk->region.first + num_subchunk_pages);
|
||||
// Drop the original reference count on the parent (from its creation). This
|
||||
// is done so the parent's reference count goes to 0 when all the children
|
||||
// are released.
|
||||
uvm_cpu_chunk_put(chunk);
|
||||
|
||||
error:
|
||||
// Re-insert the split chunk. This is done unconditionally in both the
|
||||
// success and error paths. The difference is that on the success path,
|
||||
// the chunk's region has been updated.
|
||||
// This operation should never fail with NV_ERR_NO_MEMORY since all
|
||||
// state memory should already be allocated. Failing with other errors
|
||||
// is a programmer error.
|
||||
insert_status = uvm_cpu_chunk_insert_in_block(va_block, chunk, chunk->region.first);
|
||||
UVM_ASSERT(insert_status != NV_ERR_INVALID_ARGUMENT && insert_status != NV_ERR_INVALID_STATE);
|
||||
|
||||
if (status != NV_OK) {
|
||||
for_each_cpu_chunk_in_block_region_safe(new_chunk,
|
||||
running_page_index,
|
||||
next_page_index,
|
||||
va_block,
|
||||
chunk->region) {
|
||||
uvm_cpu_chunk_remove_from_block(va_block, new_chunk, new_chunk->region.first);
|
||||
while (i--)
|
||||
uvm_cpu_chunk_put(new_chunk);
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk)
|
||||
NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t **chunks,
|
||||
size_t num_merge_chunks,
|
||||
uvm_chunk_size_t merge_size,
|
||||
uvm_cpu_chunk_t **merged_chunk)
|
||||
{
|
||||
uvm_cpu_chunk_t *parent;
|
||||
uvm_cpu_chunk_t *subchunk;
|
||||
uvm_chunk_sizes_mask_t merge_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
uvm_chunk_size_t merge_chunk_size;
|
||||
uvm_chunk_size_t parent_phys_size;
|
||||
uvm_chunk_size_t chunk_size;
|
||||
uvm_va_block_region_t subchunk_region;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_page_index_t next_page_index;
|
||||
NV_STATUS insert_status;
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(chunk);
|
||||
parent = chunk->parent;
|
||||
UVM_ASSERT(chunks);
|
||||
UVM_ASSERT(num_merge_chunks > 0);
|
||||
UVM_ASSERT(merged_chunk);
|
||||
|
||||
// If the chunk does not have a parent, a merge cannot be done.
|
||||
parent = chunks[0]->parent;
|
||||
if (!parent)
|
||||
return NULL;
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
parent_phys_size = uvm_cpu_chunk_get_phys_size(parent);
|
||||
chunk_size = uvm_cpu_chunk_get_size(chunks[0]);
|
||||
|
||||
// Remove all sizes above the parent's physical size.
|
||||
merge_sizes &= parent_phys_size | (parent_phys_size - 1);
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(parent) == merge_size);
|
||||
UVM_ASSERT(merge_size > chunk_size);
|
||||
|
||||
// Remove all sizes including and below the chunk's current size.
|
||||
merge_sizes &= ~(chunk_size | (chunk_size - 1));
|
||||
for (i = 1; i < num_merge_chunks; i++) {
|
||||
if (chunks[i]->parent != parent || uvm_cpu_chunk_get_size(chunks[i]) != chunk_size)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
// Find the largest size that is fully contained within the VA block.
|
||||
for_each_chunk_size_rev(merge_chunk_size, merge_sizes) {
|
||||
NvU64 parent_start = uvm_cpu_chunk_get_virt_addr(va_block, parent);
|
||||
NvU64 parent_end = parent_start + parent_phys_size - 1;
|
||||
|
||||
if (uvm_va_block_contains_address(va_block, parent_start) &&
|
||||
uvm_va_block_contains_address(va_block, parent_start + merge_chunk_size - 1) &&
|
||||
IS_ALIGNED(parent_start, merge_chunk_size) &&
|
||||
IS_ALIGNED(parent_end + 1, merge_chunk_size))
|
||||
break;
|
||||
UVM_ASSERT(nv_kref_read(&chunks[i]->refcount) == 1);
|
||||
}
|
||||
|
||||
if (merge_chunk_size == UVM_CHUNK_SIZE_INVALID)
|
||||
return NULL;
|
||||
// Take a reference on the parent chunk so it doesn't get released when all
|
||||
// of the children are released below.
|
||||
uvm_cpu_chunk_get(parent);
|
||||
|
||||
if (uvm_cpu_chunk_get_size(parent) == merge_chunk_size)
|
||||
return NULL;
|
||||
for (i = 0; i < num_merge_chunks; i++)
|
||||
uvm_cpu_chunk_put(chunks[i]);
|
||||
|
||||
UVM_ASSERT(chunk_size == uvm_cpu_chunk_get_size(parent));
|
||||
UVM_ASSERT(IS_ALIGNED(merge_chunk_size, chunk_size));
|
||||
*merged_chunk = parent;
|
||||
|
||||
subchunk_region = uvm_va_block_region(parent->region.first + uvm_cpu_chunk_num_pages(parent),
|
||||
parent->region.first + (merge_chunk_size / PAGE_SIZE));
|
||||
|
||||
// Remove the first (parent) subchunk. It will be re-inserted later with an
|
||||
// updated region.
|
||||
uvm_cpu_chunk_remove_from_block(va_block, parent, parent->region.first);
|
||||
|
||||
for_each_cpu_chunk_in_block_region_safe(subchunk, page_index, next_page_index, va_block, subchunk_region) {
|
||||
UVM_ASSERT(subchunk);
|
||||
uvm_cpu_chunk_remove_from_block(va_block, subchunk, subchunk->region.first);
|
||||
uvm_cpu_chunk_put(subchunk);
|
||||
}
|
||||
|
||||
parent->region = uvm_va_block_region(parent->region.first, parent->region.first + (merge_chunk_size / PAGE_SIZE));
|
||||
insert_status = uvm_cpu_chunk_insert_in_block(va_block, parent, parent->region.first);
|
||||
UVM_ASSERT(insert_status != NV_ERR_INVALID_ARGUMENT && insert_status != NV_ERR_INVALID_STATE);
|
||||
|
||||
return parent;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_cpu_chunk_t *get_parent_cpu_chunk(uvm_cpu_chunk_t *chunk)
|
||||
@ -1414,7 +1356,7 @@ static void check_cpu_dirty_flag(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_i
|
||||
// compound pages.
|
||||
page = chunk->page + page_index;
|
||||
if (PageDirty(page)) {
|
||||
bitmap_fill(chunk->dirty_bitmap, uvm_cpu_chunk_get_phys_size(chunk) / PAGE_SIZE);
|
||||
bitmap_fill(chunk->dirty_bitmap, uvm_cpu_chunk_get_size(chunk) / PAGE_SIZE);
|
||||
ClearPageDirty(page);
|
||||
}
|
||||
}
|
||||
@ -1432,7 +1374,7 @@ static uvm_cpu_chunk_t *get_parent_and_page_index(uvm_cpu_chunk_t *chunk, uvm_pa
|
||||
|
||||
page_index = chunk->offset + (page_index - chunk->region.first);
|
||||
parent = get_parent_cpu_chunk(chunk);
|
||||
UVM_ASSERT(page_index < uvm_cpu_chunk_get_phys_size(parent) / PAGE_SIZE);
|
||||
UVM_ASSERT(page_index < uvm_cpu_chunk_get_size(parent) / PAGE_SIZE);
|
||||
*out_page_index = page_index;
|
||||
return parent;
|
||||
}
|
||||
@ -1442,7 +1384,7 @@ void uvm_cpu_chunk_mark_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
|
||||
uvm_cpu_chunk_t *parent;
|
||||
|
||||
parent = get_parent_and_page_index(chunk, &page_index);
|
||||
if (uvm_cpu_chunk_get_phys_size(parent) == PAGE_SIZE) {
|
||||
if (uvm_cpu_chunk_get_size(parent) == PAGE_SIZE) {
|
||||
SetPageDirty(parent->page);
|
||||
return;
|
||||
}
|
||||
@ -1457,7 +1399,7 @@ void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
|
||||
uvm_cpu_chunk_t *parent;
|
||||
|
||||
parent = get_parent_and_page_index(chunk, &page_index);
|
||||
if (uvm_cpu_chunk_get_phys_size(parent) == PAGE_SIZE) {
|
||||
if (uvm_cpu_chunk_get_size(parent) == PAGE_SIZE) {
|
||||
ClearPageDirty(parent->page);
|
||||
return;
|
||||
}
|
||||
@ -1474,7 +1416,7 @@ bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
|
||||
bool dirty;
|
||||
|
||||
parent = get_parent_and_page_index(chunk, &page_index);
|
||||
if (uvm_cpu_chunk_get_phys_size(parent) == PAGE_SIZE)
|
||||
if (uvm_cpu_chunk_get_size(parent) == PAGE_SIZE)
|
||||
return PageDirty(parent->page);
|
||||
|
||||
uvm_spin_lock(&parent->lock);
|
||||
|
@ -181,6 +181,9 @@ size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_map
|
||||
#if UVM_CPU_CHUNK_SIZES == PAGE_SIZE
|
||||
#define UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE() 1
|
||||
typedef struct page uvm_cpu_chunk_t;
|
||||
|
||||
#define UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index) (page_index)
|
||||
|
||||
#else
|
||||
#define UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE() 0
|
||||
typedef struct uvm_cpu_chunk_struct uvm_cpu_chunk_t;
|
||||
@ -224,13 +227,10 @@ struct uvm_cpu_chunk_struct
|
||||
// parent.
|
||||
nv_kref_t refcount;
|
||||
|
||||
// Size of the chunk at the time of its creation.
|
||||
// For chunks, which are the result of a split, this
|
||||
// value will be the size of the chunk prior to the
|
||||
// split.
|
||||
// For chunks resulting from page allocations (physical),
|
||||
// Size of the chunk.
|
||||
// For chunks resulting from page allocations (physical chunks),
|
||||
// this value is the size of the physical allocation.
|
||||
size_t log2_phys_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
|
||||
size_t log2_size : order_base_2(UVM_CHUNK_SIZE_MASK_SIZE);
|
||||
|
||||
struct {
|
||||
// Per-GPU array of DMA mapping addresses for the chunk.
|
||||
@ -252,6 +252,8 @@ struct uvm_cpu_chunk_struct
|
||||
// for logical chunks this will be NULL;
|
||||
unsigned long *dirty_bitmap;
|
||||
};
|
||||
|
||||
#define UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index) (chunk->region.first)
|
||||
#endif // UVM_CPU_CHUNK_SIZES == PAGE_SIZE
|
||||
|
||||
// Return the set of allowed CPU chunk allocation sizes.
|
||||
@ -302,22 +304,6 @@ void uvm_cpu_chunk_remove_from_block(uvm_va_block_t *va_block, uvm_cpu_chunk_t *
|
||||
// NULL is returned.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_get_chunk_for_page(uvm_va_block_t *block, uvm_page_index_t page_index);
|
||||
|
||||
// Return the physical size of the CPU chunk.
|
||||
// The physical size of the CPU chunk is the size of the physical CPU
|
||||
// memory backing the CPU chunk. It is set at CPU chunk allocation time
|
||||
static uvm_chunk_size_t uvm_cpu_chunk_get_phys_size(uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
#if UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
|
||||
return (uvm_chunk_size_t)PAGE_SIZE;
|
||||
#else
|
||||
return ((uvm_chunk_size_t)1) << chunk->log2_phys_size;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Return the size of the CPU chunk. While the physical size of the CPU
|
||||
// chunk reflects the size of the physical memory backing the chunk, this
|
||||
// size is the effective size of the chunk and changes as result of CPU
|
||||
// chunk splits.
|
||||
uvm_chunk_size_t uvm_cpu_chunk_get_size(uvm_cpu_chunk_t *chunk);
|
||||
|
||||
// Return the number of base system pages covered by the CPU chunk.
|
||||
@ -370,35 +356,27 @@ NvU64 uvm_cpu_chunk_get_gpu_mapping_addr(uvm_va_block_t *block,
|
||||
// new_size has to be one of the supported CPU chunk allocation sizes and has to
|
||||
// be smaller than the current size of chunk.
|
||||
//
|
||||
// On success, NV_OK is returned. All new chunks will have chunk as parent and
|
||||
// chunk's size will have been updated to new_size.
|
||||
//
|
||||
// Note that due to the way CPU chunks are managed and split, the number of
|
||||
// newly created chunks will be (size_of(chunk) / new_size) - 1.
|
||||
//
|
||||
// On failure NV_ERR_NO_MEMORY will be returned. chunk's size will not be
|
||||
// modified.
|
||||
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_chunk_size_t new_size);
|
||||
// On success, NV_OK is returned. On failure NV_ERR_NO_MEMORY will be returned.
|
||||
NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_chunk_size_t new_size,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_cpu_chunk_t **new_chunks);
|
||||
|
||||
// Merge chunk's parent to the highest possible CPU chunk size fully contained
|
||||
// within the parent's owning VA block.
|
||||
// Merge chunks to merge_size.
|
||||
//
|
||||
// The size to which chunks are merged is determined by finding the largest
|
||||
// size from the set of allowed CPU chunk sizes that satisfies both criteria
|
||||
// below:
|
||||
// * The VA range of the parent chunk resulting from the merge has to be
|
||||
// fully contained within the VA block.
|
||||
// * The start and end VA addresses of the parent based on its physical
|
||||
// size have to be aligned to the merge size.
|
||||
// All input chunks must have the same parent and size. If not,
|
||||
// NV_ERR_INVALID_ARGUMENT is returned.
|
||||
//
|
||||
// It is possible that a merge cannot be done if chunk does not have a parent
|
||||
// (it is a physical chunk), chunk's owning VA block is not the same as
|
||||
// its parent's owning VA block, or there is no chunk size that satisfied both
|
||||
// the above criteria.
|
||||
// If a merge cannot be done, NV_WARN_NOTHING_TO_DO is returned.
|
||||
//
|
||||
// Return a pointer to the merged chunk. If a merge could not be done, return
|
||||
// NULL.
|
||||
uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);
|
||||
// On success, NV_OK is returned and merged_chunk is set to point to the
|
||||
// merged chunk.
|
||||
NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t **chunks,
|
||||
size_t num_merge_chunks,
|
||||
uvm_chunk_size_t merge_size,
|
||||
uvm_cpu_chunk_t **merged_chunk);
|
||||
|
||||
// Mark the CPU sub-page page_index in the CPU chunk as dirty.
|
||||
// page_index has to be a page withing the chunk's region.
|
||||
@ -414,14 +392,22 @@ bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
|
||||
|
||||
#else // UVM_CPU_CHUNK_SIZE_IS_PAGE_SIZE()
|
||||
|
||||
static NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk, uvm_chunk_size_t new_size)
|
||||
static NV_STATUS uvm_cpu_chunk_split(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_chunk_size_t new_size,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_cpu_chunk_t **new_chunks)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_cpu_chunk_t *uvm_cpu_chunk_merge(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk)
|
||||
static NV_STATUS uvm_cpu_chunk_merge(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t **chunk,
|
||||
size_t num_merge_chunks,
|
||||
uvm_chunk_size_t merge_size,
|
||||
uvm_cpu_chunk_t **merged_chunk)
|
||||
{
|
||||
return NULL;
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
static void uvm_cpu_chunk_mark_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index)
|
||||
|
@ -101,7 +101,7 @@ static NV_STATUS split_as_needed(uvm_va_space_t *va_space,
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(addr));
|
||||
|
||||
// Look for UVM managed allocations first, then look for HMM policies.
|
||||
// Look for managed allocations first, then look for HMM policies.
|
||||
va_range = uvm_va_range_find(va_space, addr);
|
||||
if (!va_range)
|
||||
return uvm_hmm_split_as_needed(va_space, addr, split_needed_cb, data);
|
||||
@ -203,6 +203,10 @@ NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
// TODO: Bug 1750144: remove this restriction when HMM handles setting
|
||||
// the preferred location semantics instead of just recording the policy.
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
uvm_va_block_mark_cpu_dirty(va_block);
|
||||
|
||||
@ -432,10 +436,9 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
va_block_context->policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
|
||||
// Read duplication takes precedence over SetAccesedBy. Do not add mappings
|
||||
// Read duplication takes precedence over SetAccessedBy. Do not add mappings
|
||||
// if read duplication is enabled.
|
||||
if (uvm_va_policy_is_read_duplicate(va_block_context->policy, va_space))
|
||||
return NV_OK;
|
||||
@ -617,6 +620,10 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
|
||||
NV_STATUS status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
|
||||
va_block_set_read_duplication_locked(va_block,
|
||||
&va_block_retry,
|
||||
@ -714,6 +721,9 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_tracker_t local_tracker = UVM_TRACKER_INIT();
|
||||
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
// Restore all SetAccessedBy mappings
|
||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
|
||||
va_block_unset_read_duplication_locked(va_block,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2021 NVIDIA Corporation
|
||||
Copyright (c) 2018-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -54,7 +54,7 @@ NV_STATUS uvm_populate_pageable_vma(struct vm_area_struct *vma,
|
||||
{
|
||||
unsigned long vma_num_pages;
|
||||
unsigned long outer = start + length;
|
||||
const bool is_writable = is_write_populate(vma, populate_permissions);
|
||||
unsigned int gup_flags = is_write_populate(vma, populate_permissions) ? FOLL_WRITE : 0;
|
||||
struct mm_struct *mm = vma->vm_mm;
|
||||
unsigned long vm_flags = vma->vm_flags;
|
||||
bool uvm_managed_vma;
|
||||
@ -97,7 +97,10 @@ NV_STATUS uvm_populate_pageable_vma(struct vm_area_struct *vma,
|
||||
if (uvm_managed_vma)
|
||||
uvm_record_unlock_mmap_lock_read(mm);
|
||||
|
||||
ret = NV_GET_USER_PAGES_REMOTE(NULL, mm, start, vma_num_pages, is_writable, 0, pages, NULL);
|
||||
if (touch)
|
||||
ret = NV_PIN_USER_PAGES_REMOTE(mm, start, vma_num_pages, gup_flags, pages, NULL, NULL);
|
||||
else
|
||||
ret = NV_GET_USER_PAGES_REMOTE(mm, start, vma_num_pages, gup_flags, pages, NULL, NULL);
|
||||
|
||||
if (uvm_managed_vma)
|
||||
uvm_record_lock_mmap_lock_read(mm);
|
||||
@ -114,7 +117,7 @@ NV_STATUS uvm_populate_pageable_vma(struct vm_area_struct *vma,
|
||||
|
||||
for (i = 0; i < ret; i++) {
|
||||
UVM_ASSERT(pages[i]);
|
||||
put_page(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -127,7 +130,7 @@ NV_STATUS uvm_populate_pageable_vma(struct vm_area_struct *vma,
|
||||
|
||||
for (i = 0; i < vma_num_pages; i++) {
|
||||
uvm_touch_page(pages[i]);
|
||||
put_page(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -68,31 +68,7 @@ NV_STATUS uvm_procfs_init()
|
||||
|
||||
void uvm_procfs_exit()
|
||||
{
|
||||
uvm_procfs_destroy_entry(uvm_proc_dir);
|
||||
}
|
||||
|
||||
// TODO: Bug 1767237: Copied from nv-procfs.c. Refactor it out to
|
||||
// nv-procfs-common.c.
|
||||
static void procfs_destroy_entry_with_root(struct proc_dir_entry *entry, struct proc_dir_entry *delimiter)
|
||||
{
|
||||
#if defined(NV_PROC_REMOVE_PRESENT)
|
||||
proc_remove(entry);
|
||||
#else
|
||||
while (entry) {
|
||||
struct proc_dir_entry *next = entry->next;
|
||||
if (entry->subdir)
|
||||
procfs_destroy_entry_with_root(entry->subdir, delimiter);
|
||||
remove_proc_entry(entry->name, entry->parent);
|
||||
if (entry == delimiter)
|
||||
break;
|
||||
entry = next;
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
void uvm_procfs_destroy_entry(struct proc_dir_entry *entry)
|
||||
{
|
||||
procfs_destroy_entry_with_root(entry, entry);
|
||||
proc_remove(uvm_proc_dir);
|
||||
}
|
||||
|
||||
struct proc_dir_entry *uvm_procfs_get_gpu_base_dir()
|
||||
|
@ -53,8 +53,6 @@ static bool uvm_procfs_is_debug_enabled(void)
|
||||
struct proc_dir_entry *uvm_procfs_get_gpu_base_dir(void);
|
||||
struct proc_dir_entry *uvm_procfs_get_cpu_base_dir(void);
|
||||
|
||||
void uvm_procfs_destroy_entry(struct proc_dir_entry *entry);
|
||||
|
||||
int uvm_procfs_open_callback(void);
|
||||
void uvm_procfs_close_callback(void);
|
||||
|
||||
|
@ -121,7 +121,7 @@ NV_STATUS uvm_pushbuffer_create(uvm_channel_manager_t *channel_manager, uvm_push
|
||||
goto error;
|
||||
|
||||
// Verify the GPU can access the pushbuffer.
|
||||
UVM_ASSERT(uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE < gpu->parent->max_host_va);
|
||||
UVM_ASSERT((uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE - 1) < gpu->parent->max_host_va);
|
||||
|
||||
bitmap_fill(pushbuffer->idle_chunks, UVM_PUSHBUFFER_CHUNKS);
|
||||
bitmap_fill(pushbuffer->available_chunks, UVM_PUSHBUFFER_CHUNKS);
|
||||
@ -372,7 +372,7 @@ void uvm_pushbuffer_destroy(uvm_pushbuffer_t *pushbuffer)
|
||||
if (pushbuffer == NULL)
|
||||
return;
|
||||
|
||||
uvm_procfs_destroy_entry(pushbuffer->procfs.info_file);
|
||||
proc_remove(pushbuffer->procfs.info_file);
|
||||
|
||||
uvm_rm_mem_free(pushbuffer->memory);
|
||||
uvm_kvfree(pushbuffer);
|
||||
@ -448,7 +448,7 @@ void uvm_pushbuffer_end_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm
|
||||
{
|
||||
uvm_pushbuffer_chunk_t *chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
|
||||
|
||||
uvm_assert_spinlock_locked(&push->channel->pool->lock);
|
||||
uvm_channel_pool_assert_locked(push->channel->pool);
|
||||
|
||||
uvm_spin_lock(&pushbuffer->lock);
|
||||
|
||||
|
@ -166,30 +166,6 @@ void uvm_range_tree_shrink_node(uvm_range_tree_t *tree, uvm_range_tree_node_t *n
|
||||
node->end = new_end;
|
||||
}
|
||||
|
||||
void uvm_range_tree_adjust_interval(uvm_range_tree_t *tree,
|
||||
NvU64 addr,
|
||||
NvU64 *startp,
|
||||
NvU64 *endp)
|
||||
{
|
||||
uvm_range_tree_node_t *node;
|
||||
NvU64 start = *startp;
|
||||
NvU64 end = *endp;
|
||||
|
||||
uvm_range_tree_for_each_in(node, tree, start, end) {
|
||||
if (node->start > addr) {
|
||||
end = node->start - 1;
|
||||
break;
|
||||
}
|
||||
else if (node->end < addr)
|
||||
start = node->end + 1;
|
||||
else
|
||||
UVM_ASSERT_MSG(0, "Found node at address 0x%llx\n", addr);
|
||||
}
|
||||
|
||||
*startp = start;
|
||||
*endp = end;
|
||||
}
|
||||
|
||||
void uvm_range_tree_split(uvm_range_tree_t *tree,
|
||||
uvm_range_tree_node_t *existing,
|
||||
uvm_range_tree_node_t *new)
|
||||
@ -261,3 +237,55 @@ uvm_range_tree_node_t *uvm_range_tree_iter_first(uvm_range_tree_t *tree, NvU64 s
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_range_tree_find_hole(uvm_range_tree_t *tree, NvU64 addr, NvU64 *start, NvU64 *end)
|
||||
{
|
||||
uvm_range_tree_node_t *node;
|
||||
|
||||
// Find the first node on or after addr, if any
|
||||
node = uvm_range_tree_iter_first(tree, addr, ULLONG_MAX);
|
||||
if (node) {
|
||||
if (node->start <= addr)
|
||||
return NV_ERR_UVM_ADDRESS_IN_USE;
|
||||
|
||||
// node->start can't be 0, otherwise it would contain addr
|
||||
if (end)
|
||||
*end = node->start - 1;
|
||||
|
||||
node = uvm_range_tree_prev(tree, node);
|
||||
}
|
||||
else {
|
||||
// All nodes in the tree must come before addr, if any exist
|
||||
node = uvm_range_tree_last(tree);
|
||||
if (end)
|
||||
*end = ULLONG_MAX;
|
||||
}
|
||||
|
||||
if (start) {
|
||||
if (node)
|
||||
*start = node->end + 1;
|
||||
else
|
||||
*start = 0;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_range_tree_find_hole_in(uvm_range_tree_t *tree, NvU64 addr, NvU64 *start, NvU64 *end)
|
||||
{
|
||||
NvU64 temp_start, temp_end;
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(start);
|
||||
UVM_ASSERT(end);
|
||||
UVM_ASSERT(*start <= addr);
|
||||
UVM_ASSERT(*end >= addr);
|
||||
|
||||
status = uvm_range_tree_find_hole(tree, addr, &temp_start, &temp_end);
|
||||
if (status == NV_OK) {
|
||||
*start = max(temp_start, *start);
|
||||
*end = min(temp_end, *end);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -73,11 +73,6 @@ static void uvm_range_tree_remove(uvm_range_tree_t *tree, uvm_range_tree_node_t
|
||||
// lesser or equal to node->end.
|
||||
void uvm_range_tree_shrink_node(uvm_range_tree_t *tree, uvm_range_tree_node_t *node, NvU64 new_start, NvU64 new_end);
|
||||
|
||||
// Adjust start and end to be the largest contiguous interval surrounding addr
|
||||
// between *startp and *endp and without overlapping an existing tree node.
|
||||
// This function assumes there is no node that includes addr.
|
||||
void uvm_range_tree_adjust_interval(uvm_range_tree_t *tree, NvU64 addr, NvU64 *startp, NvU64 *endp);
|
||||
|
||||
// Splits an existing node into two pieces, with the new node always after the
|
||||
// existing node. The caller must set new->start before calling this function.
|
||||
// existing should not be modified by the caller. On return, existing will
|
||||
@ -100,6 +95,16 @@ uvm_range_tree_node_t *uvm_range_tree_merge_next(uvm_range_tree_t *tree, uvm_ran
|
||||
// Returns the node containing addr, if any
|
||||
uvm_range_tree_node_t *uvm_range_tree_find(uvm_range_tree_t *tree, NvU64 addr);
|
||||
|
||||
// Find the largest hole containing addr but not containing any nodes. If addr
|
||||
// is contained by a node, NV_ERR_UVM_ADDRESS_IN_USE is returned.
|
||||
//
|
||||
// start and end may be NULL.
|
||||
NV_STATUS uvm_range_tree_find_hole(uvm_range_tree_t *tree, NvU64 addr, NvU64 *start, NvU64 *end);
|
||||
|
||||
// Like uvm_range_tree_find_hole, but start and end are in/out parameters that
|
||||
// clamp the range.
|
||||
NV_STATUS uvm_range_tree_find_hole_in(uvm_range_tree_t *tree, NvU64 addr, NvU64 *start, NvU64 *end);
|
||||
|
||||
// Returns the prev/next node in address order, or NULL if none exists
|
||||
static uvm_range_tree_node_t *uvm_range_tree_prev(uvm_range_tree_t *tree, uvm_range_tree_node_t *node)
|
||||
{
|
||||
@ -118,17 +123,6 @@ static uvm_range_tree_node_t *uvm_range_tree_next(uvm_range_tree_t *tree, uvm_ra
|
||||
// Returns the first node in the range [start, end], if any
|
||||
uvm_range_tree_node_t *uvm_range_tree_iter_first(uvm_range_tree_t *tree, NvU64 start, NvU64 end);
|
||||
|
||||
// Return true if the range tree is empty.
|
||||
static bool uvm_range_tree_empty(uvm_range_tree_t *tree)
|
||||
{
|
||||
return list_empty(&tree->head);
|
||||
}
|
||||
|
||||
static NvU64 uvm_range_tree_node_size(uvm_range_tree_node_t *node)
|
||||
{
|
||||
return node->end - node->start + 1;
|
||||
}
|
||||
|
||||
// Returns the node following the provided node in address order, if that node's
|
||||
// start <= the provided end.
|
||||
static uvm_range_tree_node_t *uvm_range_tree_iter_next(uvm_range_tree_t *tree, uvm_range_tree_node_t *node, NvU64 end)
|
||||
@ -139,6 +133,25 @@ static uvm_range_tree_node_t *uvm_range_tree_iter_next(uvm_range_tree_t *tree, u
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Return true if the range tree is empty.
|
||||
static bool uvm_range_tree_empty(uvm_range_tree_t *tree)
|
||||
{
|
||||
return list_empty(&tree->head);
|
||||
}
|
||||
|
||||
// Return the last node in the tree, or NULL if none exists
|
||||
static uvm_range_tree_node_t *uvm_range_tree_last(uvm_range_tree_t *tree)
|
||||
{
|
||||
if (list_empty(&tree->head))
|
||||
return NULL;
|
||||
return list_last_entry(&tree->head, uvm_range_tree_node_t, list);
|
||||
}
|
||||
|
||||
static NvU64 uvm_range_tree_node_size(uvm_range_tree_node_t *node)
|
||||
{
|
||||
return node->end - node->start + 1;
|
||||
}
|
||||
|
||||
#define uvm_range_tree_for_each(node, tree) list_for_each_entry((node), &(tree)->head, list)
|
||||
|
||||
#define uvm_range_tree_for_each_safe(node, next, tree) \
|
||||
|
@ -303,10 +303,93 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS rtt_check_between(rtt_state_t *state, uvm_range_tree_node_t *lower, uvm_range_tree_node_t *upper)
|
||||
{
|
||||
bool hole_exists = true;
|
||||
NvU64 hole_start = 0, hole_end = ULLONG_MAX;
|
||||
NvU64 test_start, test_end;
|
||||
|
||||
if (lower) {
|
||||
if (lower->end == ULLONG_MAX) {
|
||||
UVM_ASSERT(!upper);
|
||||
hole_exists = false;
|
||||
}
|
||||
else {
|
||||
hole_start = lower->end + 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (upper) {
|
||||
if (upper->start == 0) {
|
||||
UVM_ASSERT(!lower);
|
||||
hole_exists = false;
|
||||
}
|
||||
else {
|
||||
hole_end = upper->start - 1;
|
||||
}
|
||||
}
|
||||
|
||||
if (hole_start > hole_end)
|
||||
hole_exists = false;
|
||||
|
||||
if (hole_exists) {
|
||||
size_t i;
|
||||
NvU64 hole_mid = hole_start + ((hole_end - hole_start) / 2);
|
||||
NvU64 inputs[] = {hole_start, hole_mid, hole_end};
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(inputs); i++) {
|
||||
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, inputs[i]) == NULL);
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, inputs[i], &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == hole_start);
|
||||
TEST_CHECK_RET(test_end == hole_end);
|
||||
|
||||
test_start = 0;
|
||||
test_end = ULLONG_MAX;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, inputs[i], &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == hole_start);
|
||||
TEST_CHECK_RET(test_end == hole_end);
|
||||
|
||||
test_start = hole_start;
|
||||
test_end = inputs[i];
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, inputs[i], &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == hole_start);
|
||||
TEST_CHECK_RET(test_end == inputs[i]);
|
||||
|
||||
test_start = inputs[i];
|
||||
test_end = hole_end;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, inputs[i], &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == inputs[i]);
|
||||
TEST_CHECK_RET(test_end == hole_end);
|
||||
}
|
||||
}
|
||||
else {
|
||||
test_start = 0;
|
||||
test_end = ULLONG_MAX;
|
||||
|
||||
if (lower) {
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, lower->end, NULL, NULL),
|
||||
NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, lower->end, &test_start, &test_end),
|
||||
NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
}
|
||||
|
||||
if (upper) {
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, upper->start, NULL, NULL),
|
||||
NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, upper->start, &test_start, &test_end),
|
||||
NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS rtt_check_node(rtt_state_t *state, uvm_range_tree_node_t *node)
|
||||
{
|
||||
uvm_range_tree_node_t *temp, *prev, *next;
|
||||
NvU64 start, mid, end;
|
||||
NvU64 hole_start = 0, hole_end = ULLONG_MAX;
|
||||
|
||||
start = node->start;
|
||||
end = node->end;
|
||||
@ -320,6 +403,18 @@ static NV_STATUS rtt_check_node(rtt_state_t *state, uvm_range_tree_node_t *node)
|
||||
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, start) == node);
|
||||
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, mid) == node);
|
||||
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, end) == node);
|
||||
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, start, NULL, NULL), NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, mid, NULL, NULL), NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, end, NULL, NULL), NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, start, &hole_start, &hole_end),
|
||||
NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, mid, &hole_start, &hole_end),
|
||||
NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
MEM_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, end, &hole_start, &hole_end),
|
||||
NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
|
||||
TEST_CHECK_RET(uvm_range_tree_node_size(node) == end - start + 1);
|
||||
|
||||
if (end < ULLONG_MAX)
|
||||
@ -327,6 +422,8 @@ static NV_STATUS rtt_check_node(rtt_state_t *state, uvm_range_tree_node_t *node)
|
||||
|
||||
uvm_range_tree_for_each_in(temp, &state->tree, start, end)
|
||||
TEST_CHECK_RET(temp == node);
|
||||
uvm_range_tree_for_each_in_safe(temp, next, &state->tree, start, end)
|
||||
TEST_CHECK_RET(temp == node);
|
||||
|
||||
prev = uvm_range_tree_prev(&state->tree, node);
|
||||
if (prev) {
|
||||
@ -341,11 +438,16 @@ static NV_STATUS rtt_check_node(rtt_state_t *state, uvm_range_tree_node_t *node)
|
||||
if (next) {
|
||||
TEST_CHECK_RET(node->end < next->start);
|
||||
TEST_CHECK_RET(uvm_range_tree_prev(&state->tree, next) == node);
|
||||
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) != node);
|
||||
}
|
||||
else {
|
||||
TEST_CHECK_RET(uvm_range_tree_iter_next(&state->tree, node, ULLONG_MAX) == NULL);
|
||||
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == node);
|
||||
}
|
||||
|
||||
TEST_NV_CHECK_RET(rtt_check_between(state, prev, node));
|
||||
TEST_NV_CHECK_RET(rtt_check_between(state, node, next));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@ -362,13 +464,17 @@ static NV_STATUS rtt_check_iterator_all(rtt_state_t *state)
|
||||
TEST_CHECK_RET(prev->end < node->start);
|
||||
TEST_CHECK_RET(uvm_range_tree_prev(&state->tree, node) == prev);
|
||||
|
||||
TEST_NV_CHECK_RET(rtt_check_between(state, prev, node));
|
||||
|
||||
++iter_count;
|
||||
prev = node;
|
||||
expected = uvm_range_tree_next(&state->tree, node);
|
||||
}
|
||||
TEST_CHECK_RET(expected == NULL);
|
||||
|
||||
TEST_CHECK_RET(expected == NULL);
|
||||
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == prev);
|
||||
TEST_CHECK_RET(iter_count == state->count);
|
||||
TEST_NV_CHECK_RET(rtt_check_between(state, prev, NULL));
|
||||
|
||||
iter_count = 0;
|
||||
expected = NULL;
|
||||
@ -381,13 +487,17 @@ static NV_STATUS rtt_check_iterator_all(rtt_state_t *state)
|
||||
TEST_CHECK_RET(prev->end < node->start);
|
||||
TEST_CHECK_RET(uvm_range_tree_prev(&state->tree, node) == prev);
|
||||
|
||||
// Skip rtt_check_between since it was done in the loop above
|
||||
|
||||
++iter_count;
|
||||
prev = node;
|
||||
expected = uvm_range_tree_next(&state->tree, node);
|
||||
}
|
||||
TEST_CHECK_RET(expected == NULL);
|
||||
|
||||
TEST_CHECK_RET(expected == NULL);
|
||||
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == prev);
|
||||
TEST_CHECK_RET(iter_count == state->count);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
@ -424,20 +534,32 @@ static NV_STATUS rtt_range_add_check(rtt_state_t *state, rtt_range_t *range)
|
||||
}
|
||||
}
|
||||
|
||||
status = rtt_range_add(state, range, &node);
|
||||
|
||||
// Verify tree state
|
||||
if (overlap) {
|
||||
// Verify failure
|
||||
MEM_NV_CHECK_RET(status, NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
|
||||
// The tree said there's already a range there. Check whether its
|
||||
// internal state is consistent.
|
||||
node = uvm_range_tree_iter_first(&state->tree, range->start, range->end);
|
||||
TEST_CHECK_RET(node);
|
||||
TEST_CHECK_RET(rtt_range_overlaps_node(node, range));
|
||||
}
|
||||
else {
|
||||
// Verify success
|
||||
NvU64 hole_start, hole_end;
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, range->start, &hole_start, &hole_end));
|
||||
TEST_CHECK_RET(hole_start <= range->start);
|
||||
TEST_CHECK_RET(hole_end >= range->end);
|
||||
|
||||
hole_start = range->start;
|
||||
hole_end = range->end;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, range->start, &hole_start, &hole_end));
|
||||
TEST_CHECK_RET(hole_start == range->start);
|
||||
TEST_CHECK_RET(hole_end == range->end);
|
||||
}
|
||||
|
||||
status = rtt_range_add(state, range, &node);
|
||||
|
||||
if (overlap) {
|
||||
MEM_NV_CHECK_RET(status, NV_ERR_UVM_ADDRESS_IN_USE);
|
||||
}
|
||||
else {
|
||||
MEM_NV_CHECK_RET(status, NV_OK);
|
||||
status = rtt_check_node(state, node);
|
||||
}
|
||||
@ -450,6 +572,7 @@ static NV_STATUS rtt_index_remove_check(rtt_state_t *state, size_t index)
|
||||
{
|
||||
uvm_range_tree_node_t *node, *prev, *next;
|
||||
NvU64 start, end;
|
||||
NvU64 hole_start, hole_end;
|
||||
NV_STATUS status;
|
||||
|
||||
TEST_CHECK_RET(index < state->count);
|
||||
@ -472,12 +595,35 @@ static NV_STATUS rtt_index_remove_check(rtt_state_t *state, size_t index)
|
||||
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, start) == NULL);
|
||||
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, end) == NULL);
|
||||
TEST_CHECK_RET(uvm_range_tree_iter_first(&state->tree, start, end) == NULL);
|
||||
if (prev)
|
||||
|
||||
hole_start = start;
|
||||
hole_end = end;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, start, &hole_start, &hole_end));
|
||||
TEST_CHECK_RET(hole_start == start);
|
||||
TEST_CHECK_RET(hole_end == end);
|
||||
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, start, &hole_start, &hole_end));
|
||||
TEST_CHECK_RET(hole_start <= start);
|
||||
TEST_CHECK_RET(hole_end >= end);
|
||||
|
||||
if (prev) {
|
||||
TEST_CHECK_RET(uvm_range_tree_next(&state->tree, prev) == next);
|
||||
if (next)
|
||||
TEST_CHECK_RET(hole_start == prev->end + 1);
|
||||
}
|
||||
|
||||
if (next) {
|
||||
TEST_CHECK_RET(uvm_range_tree_prev(&state->tree, next) == prev);
|
||||
TEST_CHECK_RET(hole_end == next->start - 1);
|
||||
}
|
||||
else {
|
||||
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == prev);
|
||||
}
|
||||
|
||||
if (!prev && !next) {
|
||||
TEST_CHECK_RET(uvm_range_tree_empty(&state->tree));
|
||||
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == NULL);
|
||||
TEST_CHECK_RET(hole_start == 0);
|
||||
TEST_CHECK_RET(hole_end == ULLONG_MAX);
|
||||
TEST_CHECK_RET(state->count == 0);
|
||||
}
|
||||
else {
|
||||
@ -749,10 +895,11 @@ static NV_STATUS rtt_index_merge_check_next_val(rtt_state_t *state, NvU64 addr)
|
||||
|
||||
static NV_STATUS rtt_directed(rtt_state_t *state)
|
||||
{
|
||||
uvm_range_tree_node_t *node;
|
||||
uvm_range_tree_node_t *node, *next;
|
||||
|
||||
// Empty tree
|
||||
TEST_CHECK_RET(uvm_range_tree_empty(&state->tree));
|
||||
TEST_CHECK_RET(uvm_range_tree_last(&state->tree) == NULL);
|
||||
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, 0) == NULL);
|
||||
TEST_CHECK_RET(uvm_range_tree_find(&state->tree, ULLONG_MAX) == NULL);
|
||||
uvm_range_tree_for_each(node, &state->tree)
|
||||
@ -763,6 +910,13 @@ static NV_STATUS rtt_directed(rtt_state_t *state)
|
||||
TEST_CHECK_RET(0);
|
||||
uvm_range_tree_for_each_in(node, &state->tree, ULLONG_MAX, ULLONG_MAX)
|
||||
TEST_CHECK_RET(0);
|
||||
uvm_range_tree_for_each_in_safe(node, next, &state->tree, 0, 0)
|
||||
TEST_CHECK_RET(0);
|
||||
uvm_range_tree_for_each_in_safe(node, next, &state->tree, 0, ULLONG_MAX)
|
||||
TEST_CHECK_RET(0);
|
||||
uvm_range_tree_for_each_in_safe(node, next, &state->tree, ULLONG_MAX, ULLONG_MAX)
|
||||
TEST_CHECK_RET(0);
|
||||
TEST_NV_CHECK_RET(rtt_check_between(state, NULL, NULL));
|
||||
|
||||
// Consume entire range
|
||||
MEM_NV_CHECK_RET(rtt_range_add_check_val(state, 0, ULLONG_MAX), NV_OK);
|
||||
@ -1038,8 +1192,8 @@ static NV_STATUS rtt_batch_remove(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Attempts to shrink a randomly-selected range in the tree. On selecting a range
|
||||
// of size 1, the attempt is repeated with another range up to the
|
||||
// Attempts to shrink a randomly-selected range in the tree. On selecting a
|
||||
// range of size 1, the attempt is repeated with another range up to the
|
||||
// params->max_attempts threshold.
|
||||
static NV_STATUS rtt_rand_shrink(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_PARAMS *params)
|
||||
{
|
||||
@ -1151,11 +1305,12 @@ static NV_STATUS rtt_rand_split(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_P
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Attempts to merge a randomly-selected range in the tree in a randomly-selected
|
||||
// direction (next or prev). On selecting a range with a non-adjacent neighbor,
|
||||
// the attempt is repeated with another range up to the params->max_attempts
|
||||
// threshold. On reaching the attempt threshold the RNG probabilities are
|
||||
// adjusted to prefer split operations and NV_ERR_BUSY_RETRY is returned.
|
||||
// Attempts to merge a randomly-selected range in the tree in a randomly-
|
||||
// selected direction (next or prev). On selecting a range with a non-adjacent
|
||||
// neighbor, the attempt is repeated with another range up to the
|
||||
// params->max_attempts threshold. On reaching the attempt threshold the RNG
|
||||
// probabilities are adjusted to prefer split operations and NV_ERR_BUSY_RETRY
|
||||
// is returned.
|
||||
static NV_STATUS rtt_rand_merge(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_PARAMS *params)
|
||||
{
|
||||
uvm_range_tree_node_t *node;
|
||||
@ -1236,20 +1391,113 @@ static NV_STATUS rtt_rand_collision_check(rtt_state_t *state, NvU64 max_end)
|
||||
// in that range in order.
|
||||
static NV_STATUS rtt_rand_iterator_check(rtt_state_t *state, NvU64 max_end)
|
||||
{
|
||||
uvm_range_tree_node_t *node, *prev = NULL;
|
||||
uvm_range_tree_node_t *node;
|
||||
uvm_range_tree_node_t *prev = NULL, *first = NULL, *last = NULL, *next = NULL;
|
||||
size_t i, target_count = 0, iter_count = 0;
|
||||
NvU64 hole_start, hole_end, test_start, test_end;
|
||||
rtt_range_t range;
|
||||
|
||||
// Generate the range to check
|
||||
rtt_get_rand_range(&state->rng, max_end, &range);
|
||||
|
||||
// Phase 1: Iterate through the unordered list, counting how many nodes we
|
||||
// ought to see from the tree iterator.
|
||||
for (i = 0; i < state->count; i++)
|
||||
target_count += rtt_range_overlaps_node(state->nodes[i], &range);
|
||||
// ought to see from the tree iterator and finding the boundary nodes.
|
||||
for (i = 0; i < state->count; i++) {
|
||||
node = state->nodes[i];
|
||||
|
||||
if (rtt_range_overlaps_node(node, &range)) {
|
||||
++target_count;
|
||||
|
||||
// first is the lowest node with any overlap
|
||||
if (!first || first->start > node->start)
|
||||
first = node;
|
||||
|
||||
// last is the highest node with any overlap
|
||||
if (!last || last->end < node->end)
|
||||
last = node;
|
||||
}
|
||||
else {
|
||||
// prev is the highest node with end < range.start
|
||||
if (node->end < range.start && (!prev || node->end > prev->end))
|
||||
prev = node;
|
||||
|
||||
// next is the lowest node with start > range.end
|
||||
if (node->start > range.end && (!next || node->start < next->start))
|
||||
next = node;
|
||||
}
|
||||
}
|
||||
|
||||
// Phase 2: Use the tree iterators
|
||||
|
||||
// The holes between the nodes will be checked within the iterator loop.
|
||||
// Here we check the holes at the start and end of the range, if any.
|
||||
if (first) {
|
||||
if (range.start < first->start) {
|
||||
// Check hole at range.start
|
||||
hole_start = prev ? prev->end + 1 : 0;
|
||||
hole_end = first->start - 1;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, range.start, &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == hole_start);
|
||||
TEST_CHECK_RET(test_end == hole_end);
|
||||
|
||||
test_start = range.start;
|
||||
test_end = ULLONG_MAX;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, range.start, &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == range.start);
|
||||
TEST_CHECK_RET(test_end == hole_end);
|
||||
}
|
||||
|
||||
// Else, no hole at start
|
||||
}
|
||||
else {
|
||||
// No nodes intersect the range
|
||||
UVM_ASSERT(target_count == 0);
|
||||
UVM_ASSERT(!last);
|
||||
|
||||
hole_start = prev ? prev->end + 1 : 0;
|
||||
hole_end = next ? next->start - 1 : ULLONG_MAX;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, range.start, &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == hole_start);
|
||||
TEST_CHECK_RET(test_end == hole_end);
|
||||
|
||||
test_start = range.start;
|
||||
test_end = range.end;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, range.start, &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == range.start);
|
||||
TEST_CHECK_RET(test_end == range.end);
|
||||
}
|
||||
|
||||
if (last && range.end > last->end) {
|
||||
// Check hole at range.end
|
||||
hole_start = last->end + 1;
|
||||
hole_end = next ? next->start - 1 : ULLONG_MAX;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole(&state->tree, range.end, &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == hole_start);
|
||||
TEST_CHECK_RET(test_end == hole_end);
|
||||
|
||||
test_start = 0;
|
||||
test_end = range.end;
|
||||
TEST_NV_CHECK_RET(uvm_range_tree_find_hole_in(&state->tree, range.end, &test_start, &test_end));
|
||||
TEST_CHECK_RET(test_start == hole_start);
|
||||
TEST_CHECK_RET(test_end == range.end);
|
||||
}
|
||||
|
||||
// Phase 2: Use the tree iterator
|
||||
uvm_range_tree_for_each_in(node, &state->tree, range.start, range.end) {
|
||||
TEST_CHECK_RET(rtt_range_overlaps_node(node, &range));
|
||||
if (prev) {
|
||||
TEST_CHECK_RET(prev->end < node->start);
|
||||
TEST_NV_CHECK_RET(rtt_check_between(state, prev, node));
|
||||
}
|
||||
|
||||
++iter_count;
|
||||
prev = node;
|
||||
}
|
||||
|
||||
TEST_CHECK_RET(iter_count == target_count);
|
||||
|
||||
prev = NULL;
|
||||
iter_count = 0;
|
||||
uvm_range_tree_for_each_in_safe(node, next, &state->tree, range.start, range.end) {
|
||||
TEST_CHECK_RET(rtt_range_overlaps_node(node, &range));
|
||||
if (prev)
|
||||
TEST_CHECK_RET(prev->end < node->start);
|
||||
@ -1277,9 +1525,9 @@ static rtt_op_t rtt_get_rand_op(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_P
|
||||
if (state->count == 1 && state->count == params->max_ranges)
|
||||
return RTT_OP_REMOVE;
|
||||
|
||||
// r_group selects between the two groups of operations, either {add/remove/shrink}
|
||||
// or {merge/split}. r_sub selects the sub operation within that group based
|
||||
// on the current probability settings.
|
||||
// r_group selects between the two groups of operations, either {add/remove/
|
||||
// shrink} or {merge/split}. r_sub selects the sub operation within that
|
||||
// group based on the current probability settings.
|
||||
r_group = uvm_test_rng_range_32(&state->rng, 1, 100);
|
||||
r_sub = uvm_test_rng_range_32(&state->rng, 1, 100);
|
||||
|
||||
@ -1287,7 +1535,9 @@ static rtt_op_t rtt_get_rand_op(rtt_state_t *state, UVM_TEST_RANGE_TREE_RANDOM_P
|
||||
if (r_group <= params->add_remove_shrink_group_probability) {
|
||||
if (r_sub <= state->shrink_probability)
|
||||
return RTT_OP_SHRINK;
|
||||
// After giving shrink a chance, redo the randomization for add/remove.
|
||||
|
||||
// After giving shrink a chance, redo the randomization for add/
|
||||
// remove.
|
||||
r_sub = uvm_test_rng_range_32(&state->rng, 1, 100);
|
||||
|
||||
if (r_sub <= state->add_chance)
|
||||
|
@ -60,10 +60,22 @@ static NV_STATUS map_cpu(uvm_rm_mem_t *rm_mem)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS check_alignment(uvm_rm_mem_t *rm_mem, uvm_gpu_t *gpu, NvU64 alignment)
|
||||
{
|
||||
// Alignment requirements only apply to mappings in the UVM-owned VA space
|
||||
if (alignment != 0) {
|
||||
bool is_proxy_va_space = false;
|
||||
NvU64 gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, is_proxy_va_space);
|
||||
|
||||
TEST_CHECK_RET(IS_ALIGNED(gpu_va, alignment));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS map_gpu_owner(uvm_rm_mem_t *rm_mem, NvU64 alignment)
|
||||
{
|
||||
uvm_gpu_t *gpu = rm_mem->gpu_owner;
|
||||
NvU64 gpu_va;
|
||||
|
||||
// The memory should have been automatically mapped in the GPU owner
|
||||
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu(rm_mem, gpu));
|
||||
@ -73,9 +85,7 @@ static NV_STATUS map_gpu_owner(uvm_rm_mem_t *rm_mem, NvU64 alignment)
|
||||
// located in vidmem.
|
||||
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu) == uvm_gpu_uses_proxy_channel_pool(gpu));
|
||||
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
|
||||
if (alignment)
|
||||
TEST_CHECK_RET(IS_ALIGNED(gpu_va, alignment));
|
||||
TEST_NV_CHECK_RET(check_alignment(rm_mem, gpu, alignment));
|
||||
|
||||
// Explicitly mapping or unmapping to the GPU that owns the allocation is
|
||||
// not allowed, so the testing related to GPU owners is simpler than that of
|
||||
@ -87,7 +97,6 @@ static NV_STATUS map_other_gpus(uvm_rm_mem_t *rm_mem, uvm_va_space_t *va_space,
|
||||
{
|
||||
uvm_gpu_t *gpu_owner = rm_mem->gpu_owner;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU64 gpu_va;
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (gpu == gpu_owner)
|
||||
@ -119,9 +128,7 @@ static NV_STATUS map_other_gpus(uvm_rm_mem_t *rm_mem, uvm_va_space_t *va_space,
|
||||
|
||||
TEST_CHECK_RET(uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu) == uvm_gpu_uses_proxy_channel_pool(gpu));
|
||||
|
||||
gpu_va = uvm_rm_mem_get_gpu_va(rm_mem, gpu, uvm_rm_mem_mapped_on_gpu_proxy(rm_mem, gpu));
|
||||
if (alignment)
|
||||
TEST_CHECK_RET(IS_ALIGNED(gpu_va, alignment));
|
||||
TEST_NV_CHECK_RET(check_alignment(rm_mem, gpu, alignment));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVIDIA Corporation
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -247,6 +247,7 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_CHANNEL_STRESS, uvm_test_channel_stress);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_CE_SANITY, uvm_test_ce_sanity);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_HOST_SANITY, uvm_test_host_sanity);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_SPACE_MM_OR_CURRENT_RETAIN, uvm_test_va_space_mm_or_current_retain);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_BLOCK_INFO, uvm_test_va_block_info);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_LOCK_SANITY, uvm_test_lock_sanity);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PERF_UTILS_SANITY, uvm_test_perf_utils_sanity);
|
||||
@ -328,6 +329,8 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
uvm_test_va_range_inject_add_gpu_va_space_error);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DESTROY_GPU_VA_SPACE_DELAY, uvm_test_destroy_gpu_va_space_delay);
|
||||
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED, uvm_test_cgroup_accounting_supported);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_HMM_INIT, uvm_test_hmm_init);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SPLIT_INVALIDATE_DELAY, uvm_test_split_invalidate_delay);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2021 NVidia Corporation
|
||||
Copyright (c) 2015-2022 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -23,9 +23,7 @@
|
||||
#ifndef __UVM_TEST_IOCTL_H__
|
||||
#define __UVM_TEST_IOCTL_H__
|
||||
|
||||
#ifndef __KERNEL__
|
||||
|
||||
#endif
|
||||
#include "uvm_types.h"
|
||||
#include "uvm_ioctl.h"
|
||||
#include "nv_uvm_types.h"
|
||||
@ -151,6 +149,14 @@ typedef enum
|
||||
UVM_TEST_VA_RANGE_TYPE_MAX
|
||||
} UVM_TEST_VA_RANGE_TYPE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_TEST_RANGE_SUBTYPE_INVALID = 0,
|
||||
UVM_TEST_RANGE_SUBTYPE_UVM,
|
||||
UVM_TEST_RANGE_SUBTYPE_HMM,
|
||||
UVM_TEST_RANGE_SUBTYPE_MAX
|
||||
} UVM_TEST_RANGE_SUBTYPE;
|
||||
|
||||
// Keep this in sync with uvm_read_duplication_t in uvm_va_range.h
|
||||
typedef enum
|
||||
{
|
||||
@ -169,6 +175,7 @@ typedef struct
|
||||
NvBool is_zombie; // Out
|
||||
// Note: if this is a zombie, this field is meaningless.
|
||||
NvBool owned_by_calling_process; // Out
|
||||
NvU32 subtype; // Out (UVM_TEST_RANGE_SUBTYPE)
|
||||
} UVM_TEST_VA_RANGE_INFO_MANAGED;
|
||||
|
||||
#define UVM_TEST_VA_RANGE_INFO UVM_TEST_IOCTL_BASE(4)
|
||||
@ -176,6 +183,10 @@ typedef struct
|
||||
{
|
||||
NvU64 lookup_address NV_ALIGN_BYTES(8); // In
|
||||
|
||||
// For HMM ranges va_range_start/end will contain the lookup address but not
|
||||
// neccessarily the maximal range over which the returned policy applies.
|
||||
// For example there could be adjacent ranges with the same policy, implying
|
||||
// the returned range could be as small as a page in the worst case for HMM.
|
||||
NvU64 va_range_start NV_ALIGN_BYTES(8); // Out
|
||||
NvU64 va_range_end NV_ALIGN_BYTES(8); // Out, inclusive
|
||||
NvU32 read_duplication; // Out (UVM_TEST_READ_DUPLICATION_POLICY)
|
||||
@ -536,12 +547,14 @@ typedef struct
|
||||
// If user_pages_allocation_retry_force_count is non-0 then the next count user
|
||||
// memory allocations under the VA block will be forced to do allocation-retry.
|
||||
//
|
||||
// If cpu_pages_allocation_error_count is not zero, the subsequent operations
|
||||
// that need to allocate CPU pages will fail with NV_ERR_NO_MEMORY for
|
||||
// cpu_pages_allocation_error_count times. If cpu_pages_allocation_error_count
|
||||
// is equal to ~0U, the count is infinite.
|
||||
//
|
||||
// If eviction_failure is NV_TRUE, the next eviction attempt from the VA block
|
||||
// will fail with NV_ERR_NO_MEMORY.
|
||||
//
|
||||
// If cpu_pages_allocation_error is NV_TRUE, the subsequent operations that
|
||||
// need to allocate CPU pages will fail with NV_ERR_NO_MEMORY.
|
||||
//
|
||||
// If populate_failure is NV_TRUE, a retry error will be injected after the next
|
||||
// successful user memory allocation under the VA block but before that
|
||||
// allocation is used by the block. This is similar to
|
||||
@ -558,8 +571,8 @@ typedef struct
|
||||
NvU32 page_table_allocation_retry_force_count; // In
|
||||
NvU32 user_pages_allocation_retry_force_count; // In
|
||||
NvU32 cpu_chunk_allocation_size_mask; // In
|
||||
NvU32 cpu_pages_allocation_error_count; // In
|
||||
NvBool eviction_error; // In
|
||||
NvBool cpu_pages_allocation_error; // In
|
||||
NvBool populate_error; // In
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS;
|
||||
@ -1111,10 +1124,14 @@ typedef struct
|
||||
//
|
||||
// If migrate_vma_allocation_fail_nth is greater than 0, the nth page
|
||||
// allocation within migrate_vma will fail.
|
||||
//
|
||||
// If va_block_allocation_fail_nth is greater than 0, the nth call to
|
||||
// uvm_va_block_find_create() will fail with NV_ERR_NO_MEMORY.
|
||||
#define UVM_TEST_VA_SPACE_INJECT_ERROR UVM_TEST_IOCTL_BASE(72)
|
||||
typedef struct
|
||||
{
|
||||
NvU32 migrate_vma_allocation_fail_nth; // In
|
||||
NvU32 va_block_allocation_fail_nth; // In
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS;
|
||||
@ -1341,6 +1358,28 @@ typedef struct
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_HOST_SANITY_PARAMS;
|
||||
|
||||
// Calls uvm_va_space_mm_or_current_retain() on a VA space,
|
||||
// then releases the va_space_mm and returns.
|
||||
#define UVM_TEST_VA_SPACE_MM_OR_CURRENT_RETAIN UVM_TEST_IOCTL_BASE(89)
|
||||
typedef struct
|
||||
{
|
||||
// User address of a flag to act as a semaphore. If non-NULL, the address
|
||||
// is set to 1 after successful retain but before the sleep.
|
||||
NvU64 retain_done_ptr NV_ALIGN_BYTES(8); // In
|
||||
|
||||
// Approximate duration for which to sleep with the va_space_mm retained.
|
||||
NvU64 sleep_us NV_ALIGN_BYTES(8); // In
|
||||
|
||||
// On success, this contains the value of mm->mm_users before mmput() is
|
||||
// called.
|
||||
NvU64 mm_users NV_ALIGN_BYTES(8); // Out
|
||||
|
||||
// NV_ERR_PAGE_TABLE_NOT_AVAIL Could not retain va_space_mm
|
||||
// (uvm_va_space_mm_or_current_retain returned
|
||||
// NULL)
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_VA_SPACE_MM_OR_CURRENT_RETAIN_PARAMS;
|
||||
|
||||
#define UVM_TEST_GET_USER_SPACE_END_ADDRESS UVM_TEST_IOCTL_BASE(90)
|
||||
typedef struct
|
||||
{
|
||||
@ -1396,6 +1435,19 @@ typedef struct
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED_PARAMS;
|
||||
|
||||
#define UVM_TEST_HMM_INIT UVM_TEST_IOCTL_BASE(97)
|
||||
typedef struct
|
||||
{
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_HMM_INIT_PARAMS;
|
||||
|
||||
#define UVM_TEST_SPLIT_INVALIDATE_DELAY UVM_TEST_IOCTL_BASE(98)
|
||||
typedef struct
|
||||
{
|
||||
NvU64 delay_us; // In
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_SPLIT_INVALIDATE_DELAY_PARAMS;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -430,10 +430,12 @@ static bool thread_context_non_interrupt_add(uvm_thread_context_t *thread_contex
|
||||
if (thread_context->array_index == UVM_THREAD_CONTEXT_ARRAY_SIZE) {
|
||||
NvU64 old = atomic64_cmpxchg(&array_entry->task, 0, task);
|
||||
|
||||
// Task already added a different thread context. There is nothing
|
||||
// to undo because the current thread context has not been inserted.
|
||||
if (old == task)
|
||||
// Task already added a different thread context. The current thread
|
||||
// context has not been inserted but needs to be freed.
|
||||
if (old == task) {
|
||||
thread_context_non_interrupt_deinit(thread_context);
|
||||
return false;
|
||||
}
|
||||
|
||||
// Speculatively add the current thread context.
|
||||
if (old == 0)
|
||||
@ -444,6 +446,7 @@ static bool thread_context_non_interrupt_add(uvm_thread_context_t *thread_contex
|
||||
// Task already added a different thread context to the array, so
|
||||
// undo the speculative insertion
|
||||
atomic64_set(&table_entry->array[thread_context->array_index].task, 0);
|
||||
thread_context_non_interrupt_deinit(thread_context);
|
||||
|
||||
return false;
|
||||
}
|
||||
@ -474,6 +477,9 @@ static bool thread_context_non_interrupt_add(uvm_thread_context_t *thread_contex
|
||||
added = true;
|
||||
}
|
||||
|
||||
if (!added)
|
||||
thread_context_non_interrupt_deinit(thread_context);
|
||||
|
||||
spin_unlock_irqrestore(&table_entry->tree_lock, flags);
|
||||
return added;
|
||||
}
|
||||
|
@ -218,7 +218,7 @@ static void uvm_put_user_pages_dirty(struct page **pages, NvU64 page_count)
|
||||
|
||||
for (i = 0; i < page_count; i++) {
|
||||
set_page_dirty(pages[i]);
|
||||
put_page(pages[i]);
|
||||
NV_UNPIN_USER_PAGE(pages[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -262,7 +262,7 @@ static NV_STATUS map_user_pages(NvU64 user_va, NvU64 size, void **addr, struct p
|
||||
}
|
||||
|
||||
nv_mmap_read_lock(current->mm);
|
||||
ret = NV_GET_USER_PAGES(user_va, num_pages, 1, 0, *pages, vmas);
|
||||
ret = NV_PIN_USER_PAGES(user_va, num_pages, FOLL_WRITE, *pages, vmas);
|
||||
nv_mmap_read_unlock(current->mm);
|
||||
if (ret != num_pages) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
@ -1116,6 +1116,19 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
|
||||
uvm_tools_broadcast_event(&entry);
|
||||
}
|
||||
|
||||
void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space)
|
||||
{
|
||||
UvmEventEntry entry;
|
||||
|
||||
if (!va_space->tools.enabled)
|
||||
return;
|
||||
|
||||
entry.testEventData.splitInvalidate.eventType = UvmEventTypeTestHmmSplitInvalidate;
|
||||
uvm_down_read(&va_space->tools.lock);
|
||||
uvm_tools_record_event(va_space, &entry);
|
||||
uvm_up_read(&va_space->tools.lock);
|
||||
}
|
||||
|
||||
// This function is used as a begin marker to group all migrations within a VA
|
||||
// block that are performed in the same call to
|
||||
// block_copy_resident_pages_between. All of these are pushed to the same
|
||||
@ -2101,8 +2114,7 @@ exit:
|
||||
|
||||
uvm_global_mask_release(retained_global_gpus);
|
||||
|
||||
if (mm)
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
|
||||
uvm_kvfree(global_gpus);
|
||||
uvm_kvfree(retained_global_gpus);
|
||||
|
@ -115,6 +115,8 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry,
|
||||
bool on_managed);
|
||||
|
||||
void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space);
|
||||
|
||||
// schedules completed events and then waits from the to be dispatched
|
||||
void uvm_tools_flush_events(void);
|
||||
|
||||
|
@ -34,9 +34,6 @@
|
||||
#include "nvstatus.h"
|
||||
#include "nvCpuUuid.h"
|
||||
|
||||
#ifndef __KERNEL__
|
||||
|
||||
#endif
|
||||
|
||||
/*******************************************************************************
|
||||
UVM stream types
|
||||
@ -359,9 +356,10 @@ typedef enum
|
||||
UvmEventNumTypes,
|
||||
|
||||
// ---- Private event types for uvm tests
|
||||
UvmEventTestTypesFirst = 63,
|
||||
UvmEventTestTypesFirst = 62,
|
||||
|
||||
UvmEventTypeTestAccessCounter = UvmEventTestTypesFirst,
|
||||
UvmEventTypeTestHmmSplitInvalidate = UvmEventTestTypesFirst,
|
||||
UvmEventTypeTestAccessCounter = UvmEventTestTypesFirst + 1,
|
||||
|
||||
UvmEventTestTypesLast = UvmEventTypeTestAccessCounter,
|
||||
|
||||
@ -387,6 +385,7 @@ typedef enum
|
||||
#define UVM_EVENT_ENABLE_MAP_REMOTE ((NvU64)1 << UvmEventTypeMapRemote)
|
||||
#define UVM_EVENT_ENABLE_EVICTION ((NvU64)1 << UvmEventTypeEviction)
|
||||
#define UVM_EVENT_ENABLE_TEST_ACCESS_COUNTER ((NvU64)1 << UvmEventTypeTestAccessCounter)
|
||||
#define UVM_EVENT_ENABLE_TEST_HMM_SPLIT_INVALIDATE ((NvU64)1 << UvmEventTypeTestHmmSplitInvalidate)
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Information associated with a memory violation event
|
||||
@ -977,6 +976,11 @@ typedef struct
|
||||
NvU64 instancePtr;
|
||||
} UvmEventTestAccessCounterInfo;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU8 eventType;
|
||||
} UvmEventTestSplitInvalidateInfo;
|
||||
|
||||
//------------------------------------------------------------------------------
|
||||
// Entry added in the event queue buffer when an enabled event occurs. For
|
||||
// compatibility with all tools ensure that this structure is 64 bit aligned.
|
||||
@ -1010,6 +1014,7 @@ typedef struct
|
||||
NvU8 eventType;
|
||||
|
||||
UvmEventTestAccessCounterInfo accessCounter;
|
||||
UvmEventTestSplitInvalidateInfo splitInvalidate;
|
||||
} testEventData;
|
||||
};
|
||||
} UvmEventEntry;
|
||||
|
@ -618,7 +618,7 @@ static NV_STATUS uvm_register_channel(uvm_va_space_t *va_space,
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
|
||||
// The mm needs to be locked in order to remove stale HMM va_blocks.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
|
||||
|
||||
// We have the RM objects now so we know what the VA range layout should be.
|
||||
// Re-take the VA space lock in write mode to create and insert them.
|
||||
@ -653,10 +653,8 @@ static NV_STATUS uvm_register_channel(uvm_va_space_t *va_space,
|
||||
if (status != NV_OK)
|
||||
goto error_under_write;
|
||||
|
||||
if (mm) {
|
||||
if (mm)
|
||||
uvm_up_read_mmap_lock_out_of_order(mm);
|
||||
uvm_va_space_mm_release(va_space);
|
||||
}
|
||||
|
||||
// The subsequent mappings will need to call into RM, which means we must
|
||||
// downgrade the VA space lock to read mode. Although we're in read mode no
|
||||
@ -681,6 +679,7 @@ static NV_STATUS uvm_register_channel(uvm_va_space_t *va_space,
|
||||
goto error_under_read;
|
||||
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
uvm_gpu_release(gpu);
|
||||
return NV_OK;
|
||||
|
||||
@ -688,7 +687,7 @@ error_under_write:
|
||||
if (user_channel->gpu_va_space)
|
||||
uvm_user_channel_detach(user_channel, &deferred_free_list);
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
|
||||
uvm_deferred_free_object_list(&deferred_free_list);
|
||||
uvm_gpu_release(gpu);
|
||||
return status;
|
||||
@ -714,10 +713,12 @@ error_under_read:
|
||||
if (user_channel->gpu_va_space) {
|
||||
uvm_user_channel_detach(user_channel, &deferred_free_list);
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
uvm_deferred_free_object_list(&deferred_free_list);
|
||||
}
|
||||
else {
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_va_space_mm_or_current_release(va_space, mm);
|
||||
}
|
||||
|
||||
uvm_user_channel_release(user_channel);
|
||||
|
@ -105,6 +105,36 @@ uvm_va_space_t *uvm_va_block_get_va_space(uvm_va_block_t *va_block)
|
||||
return va_space;
|
||||
}
|
||||
|
||||
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_policy_t *policy,
|
||||
uvm_va_block_region_t region)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_va_policy_node_t *node;
|
||||
|
||||
if (policy == &uvm_va_policy_default) {
|
||||
// There should only be the default policy within the region.
|
||||
node = uvm_va_policy_node_iter_first(va_block,
|
||||
uvm_va_block_region_start(va_block, region),
|
||||
uvm_va_block_region_end(va_block, region));
|
||||
UVM_ASSERT(!node);
|
||||
}
|
||||
else {
|
||||
// The policy node should cover the region.
|
||||
node = uvm_va_policy_node_from_policy(policy);
|
||||
UVM_ASSERT(node->node.start <= uvm_va_block_region_start(va_block, region));
|
||||
UVM_ASSERT(node->node.end >= uvm_va_block_region_end(va_block, region));
|
||||
}
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(policy == uvm_va_range_get_policy(va_block->va_range));
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
static NvU64 block_gpu_pte_flag_cacheable(uvm_va_block_t *block, uvm_gpu_t *gpu, uvm_processor_id_t resident_id)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
|
||||
@ -589,7 +619,7 @@ NV_STATUS uvm_va_block_create(uvm_va_range_t *va_range,
|
||||
UVM_ASSERT(size <= UVM_VA_BLOCK_SIZE);
|
||||
|
||||
if (va_range) {
|
||||
// Create a UVM managed va_block.
|
||||
// Create a managed va_block.
|
||||
UVM_ASSERT(start >= va_range->node.start);
|
||||
UVM_ASSERT(end <= va_range->node.end);
|
||||
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||
@ -617,6 +647,7 @@ NV_STATUS uvm_va_block_create(uvm_va_range_t *va_range,
|
||||
block->end = end;
|
||||
block->va_range = va_range;
|
||||
uvm_tracker_init(&block->tracker);
|
||||
block->prefetch_info.last_migration_proc_id = UVM_ID_INVALID;
|
||||
|
||||
nv_kthread_q_item_init(&block->eviction_mappings_q_item, block_deferred_eviction_mappings_entry, block);
|
||||
|
||||
@ -636,7 +667,7 @@ static void block_gpu_unmap_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu_t
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(block, page_index, chunk, gpu->id);
|
||||
if (gpu_mapping_addr != 0) {
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
|
||||
uvm_gpu_unmap_cpu_pages(gpu, gpu_mapping_addr, uvm_cpu_chunk_get_size(chunk));
|
||||
uvm_gpu_unmap_cpu_pages(gpu->parent, gpu_mapping_addr, uvm_cpu_chunk_get_size(chunk));
|
||||
uvm_cpu_chunk_set_gpu_mapping_addr(block, page_index, chunk, gpu->id, 0);
|
||||
}
|
||||
}
|
||||
@ -657,7 +688,7 @@ static NV_STATUS block_gpu_map_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu
|
||||
|
||||
UVM_ASSERT_MSG(gpu_mapping_addr == 0, "GPU%u DMA address 0x%llx\n", uvm_id_value(gpu->id), gpu_mapping_addr);
|
||||
|
||||
status = uvm_gpu_map_cpu_pages(gpu,
|
||||
status = uvm_gpu_map_cpu_pages(gpu->parent,
|
||||
uvm_cpu_chunk_get_cpu_page(block, chunk, page_index),
|
||||
chunk_size,
|
||||
&gpu_mapping_addr);
|
||||
@ -846,7 +877,7 @@ static void block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block, uvm_cpu_chunk_t
|
||||
|
||||
gpu = block_get_gpu(block, id);
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
|
||||
uvm_gpu_unmap_cpu_pages(gpu, gpu_mapping_addr, uvm_cpu_chunk_get_size(chunk));
|
||||
uvm_gpu_unmap_cpu_pages(gpu->parent, gpu_mapping_addr, uvm_cpu_chunk_get_size(chunk));
|
||||
uvm_cpu_chunk_set_gpu_mapping_addr(block, page_index, chunk, id, 0);
|
||||
}
|
||||
}
|
||||
@ -880,7 +911,7 @@ static NV_STATUS block_map_cpu_chunk_on_gpus(uvm_va_block_t *block, uvm_page_ind
|
||||
UVM_ASSERT_MSG(gpu_mapping_addr == 0, "GPU%u DMA address 0x%llx\n", uvm_id_value(id), gpu_mapping_addr);
|
||||
|
||||
gpu = block_get_gpu(block, id);
|
||||
status = uvm_gpu_map_cpu_pages(gpu,
|
||||
status = uvm_gpu_map_cpu_pages(gpu->parent,
|
||||
uvm_cpu_chunk_get_cpu_page(block, chunk, chunk_region.first),
|
||||
chunk_size,
|
||||
&gpu_mapping_addr);
|
||||
@ -1014,9 +1045,14 @@ static NV_STATUS block_populate_page_cpu(uvm_va_block_t *block, uvm_page_index_t
|
||||
UVM_ASSERT(!uvm_page_mask_test(&block->cpu.resident, page_index));
|
||||
|
||||
// Return out of memory error if the tests have requested it. As opposed to
|
||||
// other error injection settings, this one is persistent.
|
||||
if (block_test && block_test->inject_cpu_pages_allocation_error)
|
||||
// other error injection settings, this one fails N times and then succeeds.
|
||||
// TODO: Bug 3701182: This will print a warning in Linux kernels newer than
|
||||
// 5.16.0-rc1+.
|
||||
if (block_test && block_test->inject_cpu_pages_allocation_error_count) {
|
||||
if (block_test->inject_cpu_pages_allocation_error_count != ~(NvU32)0)
|
||||
block_test->inject_cpu_pages_allocation_error_count--;
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
status = uvm_cpu_chunk_alloc(block, page_index, mm, &chunk);
|
||||
if (status != NV_OK)
|
||||
@ -1178,6 +1214,26 @@ uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block,
|
||||
return range_big_page_region_all(va_block->start, va_block->end, big_page_size);
|
||||
}
|
||||
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
NvU32 big_page_size)
|
||||
{
|
||||
NvU64 start = uvm_va_block_region_start(va_block, region);
|
||||
NvU64 end = uvm_va_block_region_end(va_block, region);
|
||||
uvm_va_block_region_t big_region;
|
||||
|
||||
UVM_ASSERT(start < va_block->end);
|
||||
UVM_ASSERT(end <= va_block->end);
|
||||
|
||||
big_region = range_big_page_region_all(start, end, big_page_size);
|
||||
if (big_region.outer) {
|
||||
big_region.first += region.first;
|
||||
big_region.outer += region.first;
|
||||
}
|
||||
|
||||
return big_region;
|
||||
}
|
||||
|
||||
size_t uvm_va_block_num_big_pages(uvm_va_block_t *va_block, NvU32 big_page_size)
|
||||
{
|
||||
return range_num_big_pages(va_block->start, va_block->end, big_page_size);
|
||||
@ -2159,13 +2215,21 @@ static uvm_gpu_address_t block_phys_page_copy_address(uvm_va_block_t *block,
|
||||
return copy_addr;
|
||||
}
|
||||
|
||||
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
|
||||
uvm_gpu_phys_address_t uvm_va_block_res_phys_page_address(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t residency,
|
||||
uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
return block_phys_page_address(va_block, block_phys_page(gpu->id, page_index), gpu);
|
||||
return block_phys_page_address(va_block, block_phys_page(residency, page_index), gpu);
|
||||
}
|
||||
|
||||
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_gpu_t *gpu)
|
||||
{
|
||||
return uvm_va_block_res_phys_page_address(va_block, page_index, gpu->id, gpu);
|
||||
}
|
||||
|
||||
// Begin a push appropriate for copying data from src_id processor to dst_id processor.
|
||||
@ -2327,12 +2391,17 @@ typedef enum
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_MOVE_TO_STAGE = 3,
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE = 4,
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_COPY_TO_STAGE = 5,
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_COPY_FROM_STAGE = 6
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_COPY_FROM_STAGE = 6,
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY = 7
|
||||
} block_transfer_mode_internal_t;
|
||||
|
||||
static uvm_va_block_transfer_mode_t get_block_transfer_mode_from_internal(block_transfer_mode_internal_t transfer_mode)
|
||||
{
|
||||
switch (transfer_mode) {
|
||||
// For HMM, BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY is just part of a
|
||||
// two phase move. First the pages are copied, then after
|
||||
// migrate_vma_pages() succeeds, residency and mapping are updated.
|
||||
case BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY:
|
||||
case BLOCK_TRANSFER_MODE_INTERNAL_MOVE:
|
||||
case BLOCK_TRANSFER_MODE_INTERNAL_MOVE_TO_STAGE:
|
||||
case BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE:
|
||||
@ -2391,6 +2460,57 @@ static uvm_va_block_region_t block_phys_contig_region(uvm_va_block_t *block,
|
||||
}
|
||||
}
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Location of the memory
|
||||
uvm_processor_id_t id;
|
||||
|
||||
// Whether the whole block has a single physically-contiguous chunk of
|
||||
// storage on the processor.
|
||||
bool is_block_contig;
|
||||
|
||||
// Starting address of the physically-contiguous allocation, from the view
|
||||
// of the copying GPU. Valid only if is_block_contig.
|
||||
uvm_gpu_address_t address;
|
||||
} block_copy_addr_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
block_copy_addr_t src;
|
||||
block_copy_addr_t dst;
|
||||
} block_copy_state_t;
|
||||
|
||||
// Like block_phys_page_copy_address, but uses the address cached in bca when
|
||||
// possible.
|
||||
static uvm_gpu_address_t block_copy_get_address(uvm_va_block_t *block,
|
||||
block_copy_addr_t *bca,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_gpu_t *copying_gpu)
|
||||
{
|
||||
if (bca->is_block_contig) {
|
||||
uvm_gpu_address_t addr = bca->address;
|
||||
addr.address += page_index * PAGE_SIZE;
|
||||
UVM_ASSERT(block_phys_copy_contig_check(block, page_index, &bca->address, bca->id, copying_gpu));
|
||||
return addr;
|
||||
}
|
||||
|
||||
return block_phys_page_copy_address(block, block_phys_page(bca->id, page_index), copying_gpu);
|
||||
}
|
||||
|
||||
static void block_copy_push(uvm_va_block_t *block,
|
||||
block_copy_state_t *state,
|
||||
uvm_va_block_region_t region,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
|
||||
uvm_gpu_t *copying_gpu = uvm_push_get_gpu(push);
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
copying_gpu->parent->ce_hal->memcopy(push,
|
||||
block_copy_get_address(block, &state->dst, region.first, copying_gpu),
|
||||
block_copy_get_address(block, &state->src, region.first, copying_gpu),
|
||||
uvm_va_block_region_size(region));
|
||||
}
|
||||
|
||||
// Copies pages resident on the src_id processor to the dst_id processor
|
||||
//
|
||||
// The function adds the pages that were successfully copied to the output
|
||||
@ -2403,7 +2523,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
uvm_processor_id_t dst_id,
|
||||
uvm_processor_id_t src_id,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask,
|
||||
uvm_page_mask_t *copy_mask,
|
||||
const uvm_page_mask_t *prefetch_page_mask,
|
||||
block_transfer_mode_internal_t transfer_mode,
|
||||
uvm_page_mask_t *migrated_pages,
|
||||
@ -2418,7 +2538,6 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
uvm_page_index_t page_index;
|
||||
uvm_page_index_t contig_start_index = region.outer;
|
||||
uvm_page_index_t last_index = region.outer;
|
||||
uvm_page_mask_t *copy_mask = &block_context->make_resident.copy_resident_pages_between_mask;
|
||||
uvm_range_group_range_t *rgr = NULL;
|
||||
bool rgr_has_changed = false;
|
||||
uvm_make_resident_cause_t cause = block_context->make_resident.cause;
|
||||
@ -2426,26 +2545,21 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
const bool may_prefetch = (cause == UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT ||
|
||||
cause == UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT ||
|
||||
cause == UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER) && !!prefetch_page_mask;
|
||||
const bool is_src_phys_contig = is_block_phys_contig(block, src_id);
|
||||
const bool is_dst_phys_contig = is_block_phys_contig(block, dst_id);
|
||||
uvm_gpu_address_t contig_src_address = {0};
|
||||
uvm_gpu_address_t contig_dst_address = {0};
|
||||
block_copy_state_t state = {0};
|
||||
uvm_va_range_t *va_range = block->va_range;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
|
||||
const uvm_va_block_transfer_mode_t block_transfer_mode = get_block_transfer_mode_from_internal(transfer_mode);
|
||||
|
||||
state.src.id = src_id;
|
||||
state.dst.id = dst_id;
|
||||
state.src.is_block_contig = is_block_phys_contig(block, src_id);
|
||||
state.dst.is_block_contig = is_block_phys_contig(block, dst_id);
|
||||
|
||||
*copied_pages = 0;
|
||||
|
||||
if (uvm_id_equal(dst_id, src_id))
|
||||
return NV_OK;
|
||||
|
||||
uvm_page_mask_init_from_region(copy_mask, region, src_resident_mask);
|
||||
|
||||
if (page_mask)
|
||||
uvm_page_mask_and(copy_mask, copy_mask, page_mask);
|
||||
|
||||
// If there are not pages to be copied, exit early
|
||||
if (!uvm_page_mask_andnot(copy_mask, copy_mask, dst_resident_mask))
|
||||
// If there are no pages to be copied, exit early
|
||||
if (!uvm_page_mask_andnot(copy_mask, copy_mask, dst_resident_mask) ||
|
||||
!uvm_page_mask_andnot(copy_mask, copy_mask, migrated_pages))
|
||||
return NV_OK;
|
||||
|
||||
// uvm_range_group_range_iter_first should only be called when the va_space
|
||||
@ -2458,6 +2572,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
rgr_has_changed = true;
|
||||
}
|
||||
|
||||
// TODO: Bug 3745051: This function is complicated and needs refactoring
|
||||
for_each_va_block_page_in_region_mask(page_index, copy_mask, region) {
|
||||
NvU64 page_start = uvm_va_block_cpu_page_address(block, page_index);
|
||||
uvm_make_resident_cause_t page_cause = (may_prefetch && uvm_page_mask_test(prefetch_page_mask, page_index))?
|
||||
@ -2553,29 +2668,19 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
// NVLINK links. Therefore, for physically-contiguous block
|
||||
// storage, we cache the start address and compute the page address
|
||||
// using the page index.
|
||||
if (is_src_phys_contig)
|
||||
contig_src_address = block_phys_page_copy_address(block, block_phys_page(src_id, 0), copying_gpu);
|
||||
if (is_dst_phys_contig)
|
||||
contig_dst_address = block_phys_page_copy_address(block, block_phys_page(dst_id, 0), copying_gpu);
|
||||
if (state.src.is_block_contig)
|
||||
state.src.address = block_phys_page_copy_address(block, block_phys_page(src_id, 0), copying_gpu);
|
||||
if (state.dst.is_block_contig)
|
||||
state.dst.address = block_phys_page_copy_address(block, block_phys_page(dst_id, 0), copying_gpu);
|
||||
}
|
||||
else if ((page_index != last_index + 1) || contig_cause != page_cause) {
|
||||
uvm_va_block_region_t contig_region = uvm_va_block_region(contig_start_index, last_index + 1);
|
||||
size_t contig_region_size = uvm_va_block_region_size(contig_region);
|
||||
UVM_ASSERT(uvm_va_block_region_contains_region(region, contig_region));
|
||||
|
||||
// If both src and dst are physically-contiguous, consolidate copies
|
||||
// of contiguous pages into a single method.
|
||||
if (is_src_phys_contig && is_dst_phys_contig) {
|
||||
uvm_gpu_address_t src_address = contig_src_address;
|
||||
uvm_gpu_address_t dst_address = contig_dst_address;
|
||||
|
||||
src_address.address += contig_start_index * PAGE_SIZE;
|
||||
dst_address.address += contig_start_index * PAGE_SIZE;
|
||||
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
|
||||
copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, contig_region_size);
|
||||
}
|
||||
if (state.src.is_block_contig && state.dst.is_block_contig)
|
||||
block_copy_push(block, &state, contig_region, &push);
|
||||
|
||||
uvm_perf_event_notify_migration(&va_space->perf_events,
|
||||
&push,
|
||||
@ -2583,7 +2688,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
dst_id,
|
||||
src_id,
|
||||
uvm_va_block_region_start(block, contig_region),
|
||||
contig_region_size,
|
||||
uvm_va_block_region_size(contig_region),
|
||||
block_transfer_mode,
|
||||
contig_cause,
|
||||
&block_context->make_resident);
|
||||
@ -2592,34 +2697,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
contig_cause = page_cause;
|
||||
}
|
||||
|
||||
if (is_src_phys_contig)
|
||||
UVM_ASSERT(block_phys_copy_contig_check(block, page_index, &contig_src_address, src_id, copying_gpu));
|
||||
if (is_dst_phys_contig)
|
||||
UVM_ASSERT(block_phys_copy_contig_check(block, page_index, &contig_dst_address, dst_id, copying_gpu));
|
||||
|
||||
if (!is_src_phys_contig || !is_dst_phys_contig) {
|
||||
uvm_gpu_address_t src_address;
|
||||
uvm_gpu_address_t dst_address;
|
||||
|
||||
if (is_src_phys_contig) {
|
||||
src_address = contig_src_address;
|
||||
src_address.address += page_index * PAGE_SIZE;
|
||||
}
|
||||
else {
|
||||
src_address = block_phys_page_copy_address(block, block_phys_page(src_id, page_index), copying_gpu);
|
||||
}
|
||||
|
||||
if (is_dst_phys_contig) {
|
||||
dst_address = contig_dst_address;
|
||||
dst_address.address += page_index * PAGE_SIZE;
|
||||
}
|
||||
else {
|
||||
dst_address = block_phys_page_copy_address(block, block_phys_page(dst_id, page_index), copying_gpu);
|
||||
}
|
||||
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, PAGE_SIZE);
|
||||
}
|
||||
if (!state.src.is_block_contig || !state.dst.is_block_contig)
|
||||
block_copy_push(block, &state, uvm_va_block_region_for_page(page_index), &push);
|
||||
|
||||
last_index = page_index;
|
||||
}
|
||||
@ -2627,19 +2706,10 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
// Copy the remaining pages
|
||||
if (copying_gpu) {
|
||||
uvm_va_block_region_t contig_region = uvm_va_block_region(contig_start_index, last_index + 1);
|
||||
size_t contig_region_size = uvm_va_block_region_size(contig_region);
|
||||
UVM_ASSERT(uvm_va_block_region_contains_region(region, contig_region));
|
||||
|
||||
if (is_src_phys_contig && is_dst_phys_contig) {
|
||||
uvm_gpu_address_t src_address = contig_src_address;
|
||||
uvm_gpu_address_t dst_address = contig_dst_address;
|
||||
|
||||
src_address.address += contig_start_index * PAGE_SIZE;
|
||||
dst_address.address += contig_start_index * PAGE_SIZE;
|
||||
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
copying_gpu->parent->ce_hal->memcopy(&push, dst_address, src_address, contig_region_size);
|
||||
}
|
||||
if (state.src.is_block_contig && state.dst.is_block_contig)
|
||||
block_copy_push(block, &state, contig_region, &push);
|
||||
|
||||
uvm_perf_event_notify_migration(&va_space->perf_events,
|
||||
&push,
|
||||
@ -2647,7 +2717,7 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
dst_id,
|
||||
src_id,
|
||||
uvm_va_block_region_start(block, contig_region),
|
||||
contig_region_size,
|
||||
uvm_va_block_region_size(contig_region),
|
||||
block_transfer_mode,
|
||||
contig_cause,
|
||||
&block_context->make_resident);
|
||||
@ -2673,8 +2743,11 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
if (*copied_pages) {
|
||||
uvm_page_mask_or(migrated_pages, migrated_pages, copy_mask);
|
||||
|
||||
uvm_page_mask_or(dst_resident_mask, dst_resident_mask, copy_mask);
|
||||
block_set_resident_processor(block, dst_id);
|
||||
// For HMM, the residency is updated after migrate_vma_pages() succeeds.
|
||||
if (transfer_mode != BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY) {
|
||||
uvm_page_mask_or(dst_resident_mask, dst_resident_mask, copy_mask);
|
||||
block_set_resident_processor(block, dst_id);
|
||||
}
|
||||
|
||||
if (transfer_mode == BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE) {
|
||||
// Check whether there are any resident pages left on src
|
||||
@ -2682,7 +2755,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
block_clear_resident_processor(block, src_id);
|
||||
}
|
||||
|
||||
// If we are staging the copy due to read duplication, we keep the copy there
|
||||
// If we are staging the copy due to read duplication, we keep the copy
|
||||
// there
|
||||
if (transfer_mode == BLOCK_TRANSFER_MODE_INTERNAL_COPY ||
|
||||
transfer_mode == BLOCK_TRANSFER_MODE_INTERNAL_COPY_TO_STAGE)
|
||||
uvm_page_mask_or(&block->read_duplicated_pages, &block->read_duplicated_pages, copy_mask);
|
||||
@ -2746,23 +2820,30 @@ static NV_STATUS block_copy_resident_pages_mask(uvm_va_block_t *block,
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
|
||||
uvm_processor_id_t src_id;
|
||||
uvm_processor_mask_t search_mask;
|
||||
uvm_page_mask_t *copy_mask = &block_context->make_resident.copy_resident_pages_between_mask;
|
||||
|
||||
uvm_processor_mask_copy(&search_mask, src_processor_mask);
|
||||
|
||||
*copied_pages_out = 0;
|
||||
|
||||
for_each_closest_id(src_id, &search_mask, dst_id, va_space) {
|
||||
uvm_page_mask_t *src_resident_mask = uvm_va_block_resident_mask_get(block, src_id);
|
||||
NV_STATUS status;
|
||||
NvU32 copied_pages_from_src;
|
||||
|
||||
UVM_ASSERT(!uvm_id_equal(src_id, dst_id));
|
||||
|
||||
uvm_page_mask_init_from_region(copy_mask, region, src_resident_mask);
|
||||
|
||||
if (page_mask)
|
||||
uvm_page_mask_and(copy_mask, copy_mask, page_mask);
|
||||
|
||||
status = block_copy_resident_pages_between(block,
|
||||
block_context,
|
||||
dst_id,
|
||||
src_id,
|
||||
region,
|
||||
page_mask,
|
||||
copy_mask,
|
||||
prefetch_page_mask,
|
||||
transfer_mode,
|
||||
migrated_pages,
|
||||
@ -2852,7 +2933,10 @@ static void block_copy_set_first_touch_residency(uvm_va_block_t *block,
|
||||
//
|
||||
// If UVM_VA_BLOCK_TRANSFER_MODE_COPY is passed, processors that already have a
|
||||
// copy of the page will keep it. Conversely, if UVM_VA_BLOCK_TRANSFER_MODE_MOVE
|
||||
// is passed, the page will no longer be resident in any processor other than dst_id.
|
||||
// is passed, the page will no longer be resident in any processor other than
|
||||
// dst_id. If UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY is passed, the destination
|
||||
// pages are copied into but the residency bits for source and destination are
|
||||
// not updated.
|
||||
static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
|
||||
uvm_va_block_context_t *block_context,
|
||||
uvm_processor_id_t dst_id,
|
||||
@ -2903,6 +2987,13 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
|
||||
uvm_processor_mask_and(&src_processor_mask, block_get_can_copy_from_mask(block, dst_id), &block->resident);
|
||||
uvm_processor_mask_clear(&src_processor_mask, dst_id);
|
||||
|
||||
if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_MOVE)
|
||||
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_MOVE;
|
||||
else if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY)
|
||||
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY;
|
||||
else
|
||||
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY;
|
||||
|
||||
status = block_copy_resident_pages_mask(block,
|
||||
block_context,
|
||||
dst_id,
|
||||
@ -2910,9 +3001,7 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
|
||||
region,
|
||||
copy_page_mask,
|
||||
prefetch_page_mask,
|
||||
transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY?
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_COPY:
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_MOVE,
|
||||
transfer_mode_internal,
|
||||
missing_pages_count,
|
||||
migrated_pages,
|
||||
&pages_copied,
|
||||
@ -2940,7 +3029,9 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
|
||||
|
||||
uvm_page_mask_zero(staged_pages);
|
||||
|
||||
if (UVM_ID_IS_CPU(dst_id)) {
|
||||
if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY)
|
||||
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY;
|
||||
else if (UVM_ID_IS_CPU(dst_id)) {
|
||||
transfer_mode_internal = transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY?
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_COPY:
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_MOVE;
|
||||
@ -2981,6 +3072,13 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
|
||||
goto out;
|
||||
uvm_tracker_clear(&local_tracker);
|
||||
|
||||
if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_MOVE)
|
||||
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE;
|
||||
else if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY)
|
||||
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY_FROM_STAGE;
|
||||
else
|
||||
transfer_mode_internal = BLOCK_TRANSFER_MODE_INTERNAL_COPY_ONLY;
|
||||
|
||||
// Now copy staged pages from the CPU to the destination.
|
||||
status = block_copy_resident_pages_between(block,
|
||||
block_context,
|
||||
@ -2989,9 +3087,7 @@ static NV_STATUS block_copy_resident_pages(uvm_va_block_t *block,
|
||||
region,
|
||||
staged_pages,
|
||||
prefetch_page_mask,
|
||||
transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_COPY?
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_COPY_FROM_STAGE:
|
||||
BLOCK_TRANSFER_MODE_INTERNAL_MOVE_FROM_STAGE,
|
||||
transfer_mode_internal,
|
||||
migrated_pages,
|
||||
&pages_copied,
|
||||
&local_tracker);
|
||||
@ -3010,7 +3106,8 @@ out:
|
||||
// Pages that weren't resident anywhere else were populated at the
|
||||
// destination directly. Mark them as resident now. We only do it if there
|
||||
// have been no errors because we cannot identify which pages failed.
|
||||
if (status == NV_OK && missing_pages_count > 0)
|
||||
// For HMM, don't do this until migrate_vma_pages() succeeds.
|
||||
if (status == NV_OK && missing_pages_count > 0 && transfer_mode != UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY)
|
||||
block_copy_set_first_touch_residency(block, block_context, dst_id, region, page_mask);
|
||||
|
||||
// Break read duplication
|
||||
@ -3048,14 +3145,15 @@ out:
|
||||
return status == NV_OK ? tracker_status : status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
static NV_STATUS block_make_resident(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask,
|
||||
const uvm_page_mask_t *prefetch_page_mask,
|
||||
uvm_make_resident_cause_t cause)
|
||||
uvm_make_resident_cause_t cause,
|
||||
uvm_va_block_transfer_mode_t transfer_mode)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_processor_mask_t unmap_processor_mask;
|
||||
@ -3073,6 +3171,7 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_is_hmm(va_block) || va_block->va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
resident_mask = block_resident_mask_get_alloc(va_block, dest_id);
|
||||
if (!resident_mask)
|
||||
@ -3122,7 +3221,7 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
region,
|
||||
page_mask,
|
||||
prefetch_page_mask,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_MOVE);
|
||||
transfer_mode);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -3132,12 +3231,95 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
//
|
||||
// Skip this if we didn't do anything (the input region and/or page mask was
|
||||
// empty).
|
||||
if (uvm_processor_mask_test(&va_block->resident, dest_id))
|
||||
if (transfer_mode == UVM_VA_BLOCK_TRANSFER_MODE_MOVE && uvm_processor_mask_test(&va_block->resident, dest_id))
|
||||
block_mark_memory_used(va_block, dest_id);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask,
|
||||
const uvm_page_mask_t *prefetch_page_mask,
|
||||
uvm_make_resident_cause_t cause)
|
||||
{
|
||||
return block_make_resident(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
dest_id,
|
||||
region,
|
||||
page_mask,
|
||||
prefetch_page_mask,
|
||||
cause,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_MOVE);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_make_resident_pre(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask,
|
||||
const uvm_page_mask_t *prefetch_page_mask,
|
||||
uvm_make_resident_cause_t cause)
|
||||
{
|
||||
return block_make_resident(va_block,
|
||||
va_block_retry,
|
||||
va_block_context,
|
||||
dest_id,
|
||||
region,
|
||||
page_mask,
|
||||
prefetch_page_mask,
|
||||
cause,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY);
|
||||
}
|
||||
|
||||
void uvm_va_block_make_resident_post(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask)
|
||||
{
|
||||
uvm_page_mask_t *migrated_pages = &va_block_context->make_resident.pages_migrated;
|
||||
uvm_processor_id_t dst_id = va_block_context->make_resident.dest_id;
|
||||
uvm_page_mask_t *dst_resident_mask = uvm_va_block_resident_mask_get(va_block, dst_id);
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (page_mask)
|
||||
uvm_page_mask_and(migrated_pages, migrated_pages, page_mask);
|
||||
|
||||
if (!uvm_page_mask_empty(migrated_pages)) {
|
||||
// The migrated pages are now resident on the destination.
|
||||
uvm_page_mask_or(dst_resident_mask, dst_resident_mask, migrated_pages);
|
||||
block_set_resident_processor(va_block, dst_id);
|
||||
}
|
||||
|
||||
// Pages that weren't resident anywhere else were populated at the
|
||||
// destination directly. Mark them as resident now. We only do it if there
|
||||
// have been no errors because we cannot identify which pages failed.
|
||||
// For HMM, don't do this until migrate_vma_pages() succeeds.
|
||||
block_copy_set_first_touch_residency(va_block, va_block_context, dst_id, region, page_mask);
|
||||
|
||||
// Any move operation implies that mappings have been removed from all
|
||||
// non-UVM-Lite GPUs.
|
||||
uvm_page_mask_andnot(&va_block->maybe_mapped_pages, &va_block->maybe_mapped_pages, migrated_pages);
|
||||
|
||||
// Break read duplication and clear residency from other processors.
|
||||
break_read_duplication_in_region(va_block, va_block_context, dst_id, region, page_mask);
|
||||
|
||||
// Update eviction heuristics, if needed. Notably this could repeat the call
|
||||
// done in block_set_resident_processor(), but that doesn't do anything bad
|
||||
// and it's simpler to keep it in both places.
|
||||
//
|
||||
// Skip this if we didn't do anything (the input region and/or page mask was
|
||||
// empty).
|
||||
if (uvm_processor_mask_test(&va_block->resident, dst_id))
|
||||
block_mark_memory_used(va_block, dst_id);
|
||||
}
|
||||
|
||||
// Combination function which prepares the input {region, page_mask} for
|
||||
// entering read-duplication. It:
|
||||
// - Unmaps all processors but revoke_id
|
||||
@ -3198,6 +3380,10 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_processor_id_t src_id;
|
||||
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
UVM_ASSERT(va_block_context->policy = uvm_va_range_get_policy(va_block->va_range));
|
||||
|
||||
va_block_context->make_resident.dest_id = dest_id;
|
||||
va_block_context->make_resident.cause = cause;
|
||||
|
||||
@ -3568,7 +3754,7 @@ static bool block_check_mappings_page(uvm_va_block_t *block, uvm_page_index_t pa
|
||||
// Processors with mappings must have access to the processor that
|
||||
// has the valid copy
|
||||
UVM_ASSERT_MSG(uvm_processor_mask_subset(&read_mappings, residency_accessible_from),
|
||||
"Not all processors have access to %s\n",
|
||||
"Not all processors have access to %s\n"
|
||||
"Resident: 0x%lx - Mappings R: 0x%lx W: 0x%lx A: 0x%lx -"
|
||||
"Access: 0x%lx - Native Atomics: 0x%lx - SWA: 0x%lx\n",
|
||||
uvm_va_space_processor_name(va_space, residency),
|
||||
@ -3909,9 +4095,12 @@ static void block_unmap_cpu(uvm_va_block_t *block, uvm_va_block_region_t region,
|
||||
if (!block_has_valid_mapping_cpu(block, subregion))
|
||||
continue;
|
||||
|
||||
unmap_mapping_range(&va_space->mapping,
|
||||
uvm_va_block_region_start(block, subregion),
|
||||
uvm_va_block_region_size(subregion), 1);
|
||||
// We can't actually unmap HMM ranges from the CPU here.
|
||||
// It happens as part of migrate_vma_setup().
|
||||
if (!uvm_va_block_is_hmm(block))
|
||||
unmap_mapping_range(&va_space->mapping,
|
||||
uvm_va_block_region_start(block, subregion),
|
||||
uvm_va_block_region_size(subregion), 1);
|
||||
|
||||
for (pte_bit = 0; pte_bit < UVM_PTE_BITS_CPU_MAX; pte_bit++)
|
||||
uvm_page_mask_region_clear(&block->cpu.pte_bits[pte_bit], subregion);
|
||||
@ -5406,7 +5595,7 @@ static void block_gpu_compute_new_pte_state(uvm_va_block_t *block,
|
||||
uvm_page_index_t page_index;
|
||||
size_t big_page_index;
|
||||
DECLARE_BITMAP(big_ptes_not_covered, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
bool can_make_new_big_ptes, region_full;
|
||||
bool can_make_new_big_ptes;
|
||||
|
||||
memset(new_pte_state, 0, sizeof(*new_pte_state));
|
||||
new_pte_state->needs_4k = true;
|
||||
@ -5469,14 +5658,11 @@ static void block_gpu_compute_new_pte_state(uvm_va_block_t *block,
|
||||
|
||||
__set_bit(big_page_index, new_pte_state->big_ptes_covered);
|
||||
|
||||
region_full = uvm_page_mask_region_full(page_mask_after, big_page_region);
|
||||
if (region_full && UVM_ID_IS_INVALID(resident_id))
|
||||
__set_bit(big_page_index, new_pte_state->big_ptes_fully_unmapped);
|
||||
|
||||
// When mapping sysmem, we can use big pages only if we are mapping all pages
|
||||
// in the big page subregion and the CPU pages backing the subregion are
|
||||
// physically contiguous.
|
||||
if (can_make_new_big_ptes && region_full &&
|
||||
// When mapping sysmem, we can use big pages only if we are mapping all
|
||||
// pages in the big page subregion and the CPU pages backing the
|
||||
// subregion are physically contiguous.
|
||||
if (can_make_new_big_ptes &&
|
||||
uvm_page_mask_region_full(page_mask_after, big_page_region) &&
|
||||
(!UVM_ID_IS_CPU(resident_id) ||
|
||||
(contig_region.first <= big_page_region.first && contig_region.outer >= big_page_region.outer))) {
|
||||
__set_bit(big_page_index, new_pte_state->big_ptes);
|
||||
@ -5988,6 +6174,42 @@ static NV_STATUS uvm_cpu_insert_page(struct vm_area_struct *vma,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_prot_t compute_logical_prot(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
struct vm_area_struct *vma;
|
||||
uvm_prot_t logical_prot;
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
NvU64 addr = uvm_va_block_cpu_page_address(va_block, page_index);
|
||||
|
||||
logical_prot = uvm_hmm_compute_logical_prot(va_block, va_block_context, addr);
|
||||
}
|
||||
else {
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
|
||||
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||
|
||||
// Zombified VA ranges no longer have a vma, so they have no permissions
|
||||
if (uvm_va_range_is_managed_zombie(va_range)) {
|
||||
logical_prot = UVM_PROT_NONE;
|
||||
}
|
||||
else {
|
||||
vma = uvm_va_range_vma(va_range);
|
||||
|
||||
if (!(vma->vm_flags & VM_READ))
|
||||
logical_prot = UVM_PROT_NONE;
|
||||
else if (!(vma->vm_flags & VM_WRITE))
|
||||
logical_prot = UVM_PROT_READ_ONLY;
|
||||
else
|
||||
logical_prot = UVM_PROT_READ_WRITE_ATOMIC;
|
||||
}
|
||||
}
|
||||
|
||||
return logical_prot;
|
||||
}
|
||||
|
||||
// Creates or upgrades a CPU mapping for the given page, updating the block's
|
||||
// mapping and pte_bits bitmaps as appropriate. Upon successful return, the page
|
||||
// will be mapped with at least new_prot permissions.
|
||||
@ -6008,6 +6230,7 @@ static NV_STATUS uvm_cpu_insert_page(struct vm_area_struct *vma,
|
||||
// - Ensure that the block hasn't been killed (block->va_range is present)
|
||||
// - Update the pte/mapping tracking state on success
|
||||
static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t resident_id,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_prot_t new_prot)
|
||||
@ -6041,7 +6264,7 @@ static NV_STATUS block_map_cpu_page_to(uvm_va_block_t *block,
|
||||
|
||||
// Check for existing VMA permissions. They could have been modified after
|
||||
// the initial mmap by mprotect.
|
||||
if (!uvm_va_block_is_hmm(block) && new_prot > uvm_va_range_logical_prot(va_range))
|
||||
if (new_prot > compute_logical_prot(block, va_block_context, page_index))
|
||||
return NV_ERR_INVALID_ACCESS_TYPE;
|
||||
|
||||
if (uvm_va_block_is_hmm(block)) {
|
||||
@ -6155,6 +6378,7 @@ static NV_STATUS block_map_cpu_to(uvm_va_block_t *block,
|
||||
|
||||
for_each_va_block_page_in_region_mask(page_index, pages_to_map, region) {
|
||||
status = block_map_cpu_page_to(block,
|
||||
block_context,
|
||||
resident_id,
|
||||
page_index,
|
||||
new_prot);
|
||||
@ -6389,6 +6613,7 @@ NV_STATUS uvm_va_block_map(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(new_prot != UVM_PROT_NONE);
|
||||
UVM_ASSERT(new_prot < UVM_PROT_MAX);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
// Mapping is not supported on the eviction path that doesn't hold the VA
|
||||
// space lock.
|
||||
@ -6730,6 +6955,8 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
|
||||
NV_STATUS tracker_status;
|
||||
uvm_processor_id_t id;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
for_each_id_in_mask(id, map_processor_mask) {
|
||||
status = uvm_va_block_map(va_block,
|
||||
va_block_context,
|
||||
@ -7176,28 +7403,8 @@ static NV_STATUS block_evict_pages_from_gpu(uvm_va_block_t *va_block, uvm_gpu_t
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, mm);
|
||||
|
||||
if (!uvm_va_block_is_hmm(va_block))
|
||||
block_context->policy = uvm_va_range_get_policy(va_block->va_range);
|
||||
|
||||
// Move all subregions resident on the GPU to the CPU
|
||||
for_each_va_block_subregion_in_mask(subregion, resident, region) {
|
||||
// Need to set block_context->policy for HMM.
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_va_policy_node_t *node;
|
||||
|
||||
node = uvm_va_policy_node_find(va_block, uvm_va_block_region_start(va_block, subregion));
|
||||
if (node) {
|
||||
uvm_page_index_t outer = uvm_va_block_cpu_page_index(va_block,
|
||||
node->node.end) + 1;
|
||||
// If the policy doesn't cover the subregion, truncate the
|
||||
// subregion.
|
||||
if (subregion.outer > outer)
|
||||
subregion.outer = outer;
|
||||
block_context->policy = &node->policy;
|
||||
}
|
||||
else
|
||||
block_context->policy = &uvm_va_policy_default;
|
||||
}
|
||||
status = uvm_va_block_migrate_locked(va_block,
|
||||
NULL,
|
||||
block_context,
|
||||
@ -7630,13 +7837,82 @@ error:
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS block_split_cpu_chunk_to_size(uvm_va_block_t *block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_chunk_size_t new_size)
|
||||
{
|
||||
size_t num_new_chunks = uvm_cpu_chunk_get_size(chunk) / new_size;
|
||||
uvm_cpu_chunk_t **new_chunks = NULL;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU64 gpu_mapping_addr;
|
||||
uvm_processor_mask_t gpu_split_mask;
|
||||
uvm_gpu_id_t id;
|
||||
NV_STATUS status;
|
||||
size_t i;
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED(uvm_cpu_chunk_get_size(chunk), new_size));
|
||||
|
||||
uvm_processor_mask_zero(&gpu_split_mask);
|
||||
for_each_gpu_id(id) {
|
||||
if (!uvm_va_block_gpu_state_get(block, id))
|
||||
continue;
|
||||
|
||||
// If the parent chunk has not been mapped, there is nothing to split.
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(block, page_index, chunk, id);
|
||||
if (gpu_mapping_addr == 0)
|
||||
continue;
|
||||
|
||||
gpu = block_get_gpu(block, id);
|
||||
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
|
||||
gpu_mapping_addr,
|
||||
new_size);
|
||||
if (status != NV_OK)
|
||||
goto merge;
|
||||
|
||||
uvm_processor_mask_set(&gpu_split_mask, id);
|
||||
}
|
||||
|
||||
uvm_cpu_chunk_remove_from_block(block, chunk, page_index);
|
||||
new_chunks = uvm_kvmalloc(num_new_chunks * sizeof(*new_chunks));
|
||||
if (new_chunks)
|
||||
status = uvm_cpu_chunk_split(block, chunk, new_size, UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index), new_chunks);
|
||||
else
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
|
||||
if (status != NV_OK) {
|
||||
uvm_cpu_chunk_insert_in_block(block, chunk, UVM_CPU_CHUNK_PAGE_INDEX(chunk, page_index));
|
||||
|
||||
merge:
|
||||
for_each_gpu_id_in_mask (id, &gpu_split_mask) {
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(block, page_index, chunk, id);
|
||||
gpu = block_get_gpu(block, id);
|
||||
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
|
||||
gpu_mapping_addr,
|
||||
uvm_cpu_chunk_get_size(chunk));
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < num_new_chunks; i++) {
|
||||
status = uvm_cpu_chunk_insert_in_block(block,
|
||||
new_chunks[i],
|
||||
UVM_CPU_CHUNK_PAGE_INDEX(new_chunks[i],
|
||||
(page_index +
|
||||
(i * (new_size / PAGE_SIZE)))));
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_kvfree(new_chunks);
|
||||
return status;
|
||||
}
|
||||
|
||||
// Perform any CPU chunk splitting that may be required for this block split.
|
||||
// Just like block_presplit_gpu_chunks, no chunks are moved to the new block.
|
||||
static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
|
||||
{
|
||||
uvm_page_index_t page_index = uvm_va_block_cpu_page_index(existing, new->start);
|
||||
uvm_cpu_chunk_t *splitting_chunk;
|
||||
uvm_chunk_size_t split_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
uvm_chunk_sizes_mask_t split_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
uvm_chunk_size_t subchunk_size;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@ -7660,32 +7936,7 @@ static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_bloc
|
||||
split_sizes &= ~(IS_ALIGNED(new->start, UVM_CHUNK_SIZE_64K) ? UVM_CHUNK_SIZE_64K - 1 : 0);
|
||||
|
||||
for_each_chunk_size_rev(subchunk_size, split_sizes) {
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED(uvm_cpu_chunk_get_size(splitting_chunk), subchunk_size));
|
||||
|
||||
for_each_gpu_id(id) {
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (!uvm_va_block_gpu_state_get(existing, id))
|
||||
continue;
|
||||
|
||||
// If the parent chunk has not been mapped, there is nothing to split.
|
||||
if (uvm_cpu_chunk_get_gpu_mapping_addr(existing, page_index, splitting_chunk, id) == 0)
|
||||
continue;
|
||||
|
||||
gpu = block_get_gpu(existing, id);
|
||||
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
|
||||
uvm_cpu_chunk_get_gpu_mapping_addr(existing,
|
||||
page_index,
|
||||
splitting_chunk,
|
||||
id),
|
||||
subchunk_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_cpu_chunk_split(existing, splitting_chunk, subchunk_size);
|
||||
status = block_split_cpu_chunk_to_size(existing, page_index, splitting_chunk, subchunk_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -7695,36 +7946,114 @@ static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_bloc
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void block_merge_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
|
||||
static NV_STATUS block_merge_cpu_chunks_to_size(uvm_va_block_t *block,
|
||||
uvm_chunk_size_t size,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
uvm_page_index_t page_index = uvm_va_block_cpu_page_index(existing, new->start);
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(existing, page_index);
|
||||
uvm_va_space_t *va_space = existing->va_range->va_space;
|
||||
uvm_cpu_chunk_t *chunk;
|
||||
size_t num_merge_chunks;
|
||||
uvm_chunk_size_t chunk_size;
|
||||
uvm_cpu_chunk_t **merge_chunks;
|
||||
uvm_gpu_id_t id;
|
||||
size_t i;
|
||||
NV_STATUS status;
|
||||
|
||||
if (!chunk)
|
||||
return;
|
||||
chunk = uvm_cpu_chunk_get_chunk_for_page(block, page_index);
|
||||
chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
num_merge_chunks = size / chunk_size;
|
||||
|
||||
// Merge the CPU chunk. If a merge was not done, nothing else needs to be done.
|
||||
chunk = uvm_cpu_chunk_merge(existing, chunk);
|
||||
if (!chunk)
|
||||
return;
|
||||
// It's OK if we can't merge here. We know that the CPU chunk split
|
||||
// operation completed successfully. Therefore, the CPU chunks are in a
|
||||
// sane state.
|
||||
merge_chunks = uvm_kvmalloc(num_merge_chunks * sizeof(*merge_chunks));
|
||||
if (!merge_chunks)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
for (i = 0; i < num_merge_chunks; i++) {
|
||||
merge_chunks[i] = uvm_cpu_chunk_get_chunk_for_page(block, page_index + (i * (chunk_size / PAGE_SIZE)));
|
||||
UVM_ASSERT(merge_chunks[i]);
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(merge_chunks[i]) == chunk_size);
|
||||
uvm_cpu_chunk_remove_from_block(block, merge_chunks[i], page_index + (i * (chunk_size / PAGE_SIZE)));
|
||||
}
|
||||
|
||||
// Merge the CPU chunk. If a merge was not done, re-insert the original chunks.
|
||||
status = uvm_cpu_chunk_merge(block, merge_chunks, num_merge_chunks, size, &chunk);
|
||||
if (status == NV_WARN_NOTHING_TO_DO) {
|
||||
for (i = 0; i < num_merge_chunks; i++)
|
||||
uvm_cpu_chunk_insert_in_block(block, merge_chunks[i], page_index + (i * (chunk_size / PAGE_SIZE)));
|
||||
|
||||
goto done;
|
||||
}
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
status = uvm_cpu_chunk_insert_in_block(block, chunk, page_index);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
for_each_gpu_id(id) {
|
||||
NvU64 gpu_mapping_addr;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (!uvm_va_block_gpu_state_get(existing, id))
|
||||
if (!uvm_va_block_gpu_state_get(block, id))
|
||||
continue;
|
||||
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(existing, page_index, chunk, id);
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_mapping_addr(block, page_index, chunk, id);
|
||||
if (gpu_mapping_addr == 0)
|
||||
continue;
|
||||
|
||||
gpu = uvm_va_space_get_gpu(va_space, id);
|
||||
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
|
||||
gpu_mapping_addr,
|
||||
uvm_cpu_chunk_get_size(chunk));
|
||||
gpu = block_get_gpu(block, id);
|
||||
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr, size);
|
||||
}
|
||||
|
||||
done:
|
||||
uvm_kvfree(merge_chunks);
|
||||
return status;
|
||||
}
|
||||
|
||||
static void block_merge_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
|
||||
{
|
||||
uvm_page_index_t page_index = uvm_va_block_cpu_page_index(existing, new->start);
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(existing, page_index);
|
||||
uvm_chunk_sizes_mask_t merge_sizes = uvm_cpu_chunk_get_allocation_sizes();
|
||||
uvm_chunk_size_t largest_size;
|
||||
uvm_chunk_size_t chunk_size;
|
||||
uvm_chunk_size_t merge_size;
|
||||
size_t block_size = uvm_va_block_size(existing);
|
||||
NV_STATUS status;
|
||||
|
||||
if (!chunk)
|
||||
return;
|
||||
|
||||
chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
|
||||
// Remove all CPU chunk sizes above the size of the existing VA block.
|
||||
// Since block sizes are not always powers of 2, use the largest power of 2
|
||||
// less than or equal to the block size since we can't merge to a size
|
||||
// larger than the block's size.
|
||||
largest_size = rounddown_pow_of_two(block_size);
|
||||
merge_sizes &= (largest_size | (largest_size - 1));
|
||||
|
||||
// Remove all CPU chunk sizes smaller than the size of the chunk being merged up.
|
||||
merge_sizes &= ~(chunk_size | (chunk_size - 1));
|
||||
|
||||
for_each_chunk_size(merge_size, merge_sizes) {
|
||||
uvm_va_block_region_t chunk_region;
|
||||
|
||||
// The block has to fully contain the VA range after the merge.
|
||||
if (!uvm_va_block_contains_address(existing, UVM_ALIGN_DOWN(new->start, merge_size)) ||
|
||||
!uvm_va_block_contains_address(existing, UVM_ALIGN_DOWN(new->start, merge_size) + merge_size - 1))
|
||||
break;
|
||||
|
||||
chunk_region = uvm_va_block_chunk_region(existing, merge_size, page_index);
|
||||
|
||||
// If not all pages in the region covered by the chunk are allocated,
|
||||
// we can't merge.
|
||||
if (!uvm_page_mask_region_full(&existing->cpu.allocated, chunk_region))
|
||||
break;
|
||||
|
||||
status = block_merge_cpu_chunks_to_size(existing, merge_size, chunk_region.first);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -7737,7 +8066,7 @@ static NV_STATUS block_split_preallocate_no_retry(uvm_va_block_t *existing, uvm_
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_gpu_id_t id;
|
||||
uvm_page_index_t split_page_index;
|
||||
uvm_va_range_t *existing_va_range = existing->va_range;
|
||||
uvm_va_block_test_t *block_test;
|
||||
|
||||
status = block_presplit_cpu_chunks(existing, new);
|
||||
if (status != NV_OK)
|
||||
@ -7759,8 +8088,13 @@ static NV_STATUS block_split_preallocate_no_retry(uvm_va_block_t *existing, uvm_
|
||||
}
|
||||
}
|
||||
|
||||
if (existing_va_range && existing_va_range->inject_split_error) {
|
||||
existing_va_range->inject_split_error = false;
|
||||
block_test = uvm_va_block_get_test(existing);
|
||||
if (block_test && block_test->inject_split_error) {
|
||||
block_test->inject_split_error = false;
|
||||
if (!uvm_va_block_is_hmm(existing)) {
|
||||
UVM_ASSERT(existing->va_range->inject_split_error);
|
||||
existing->va_range->inject_split_error = false;
|
||||
}
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto error;
|
||||
}
|
||||
@ -8330,8 +8664,10 @@ NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block,
|
||||
block_set_processor_masks(existing_va_block);
|
||||
block_set_processor_masks(new_block);
|
||||
|
||||
if (uvm_va_block_is_hmm(existing_va_block))
|
||||
if (uvm_va_block_is_hmm(existing_va_block)) {
|
||||
uvm_hmm_va_block_split_tree(existing_va_block, new_block);
|
||||
uvm_va_policy_node_split_move(existing_va_block, new_block);
|
||||
}
|
||||
|
||||
out:
|
||||
// Run checks on existing_va_block even on failure, since an error must
|
||||
@ -8363,7 +8699,7 @@ static bool block_region_might_read_duplicate(uvm_va_block_t *va_block,
|
||||
if (!uvm_va_space_can_read_duplicate(va_space, NULL))
|
||||
return false;
|
||||
|
||||
// TODO: Bug 2046423: need to implement read duplication support in Linux.
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
if (uvm_va_block_is_hmm(va_block) ||
|
||||
uvm_va_range_get_policy(va_range)->read_duplication == UVM_READ_DUPLICATION_DISABLED)
|
||||
return false;
|
||||
@ -8382,22 +8718,20 @@ static bool block_region_might_read_duplicate(uvm_va_block_t *va_block,
|
||||
// could be changed in the future to optimize multiple faults/counters on
|
||||
// contiguous pages.
|
||||
static uvm_prot_t compute_new_permission(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t fault_processor_id,
|
||||
uvm_processor_id_t new_residency,
|
||||
uvm_fault_access_type_t access_type)
|
||||
{
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_prot_t logical_prot, new_prot;
|
||||
|
||||
// TODO: Bug 1766432: Refactor into policies. Current policy is
|
||||
// query_promote: upgrade access privileges to avoid future faults IF
|
||||
// they don't trigger further revocations.
|
||||
va_range = va_block->va_range;
|
||||
|
||||
new_prot = uvm_fault_access_type_to_prot(access_type);
|
||||
logical_prot = uvm_va_range_logical_prot(va_range);
|
||||
logical_prot = compute_logical_prot(va_block, va_block_context, page_index);
|
||||
|
||||
UVM_ASSERT(logical_prot >= new_prot);
|
||||
|
||||
@ -8542,7 +8876,10 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
|
||||
uvm_va_policy_t *policy = va_block_context->policy;
|
||||
uvm_processor_id_t preferred_location;
|
||||
|
||||
// Read duplication takes precedence over SetAccesedBy.
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, policy, region));
|
||||
|
||||
// Read duplication takes precedence over SetAccessedBy.
|
||||
//
|
||||
// Exclude ranges with read duplication set...
|
||||
if (uvm_va_policy_is_read_duplicate(policy, va_space)) {
|
||||
@ -8764,6 +9101,8 @@ NV_STATUS uvm_va_block_add_mappings(uvm_va_block_t *va_block,
|
||||
uvm_range_group_range_iter_t iter;
|
||||
uvm_prot_t prot_to_map;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block, va_block_context->policy, region));
|
||||
|
||||
if (UVM_ID_IS_CPU(processor_id) && !uvm_va_block_is_hmm(va_block)) {
|
||||
if (!uvm_va_range_vma_check(va_range, va_block_context->mm))
|
||||
return NV_OK;
|
||||
@ -8778,7 +9117,7 @@ NV_STATUS uvm_va_block_add_mappings(uvm_va_block_t *va_block,
|
||||
va_block_context->mask_by_prot[prot_to_map - 1].count = 0;
|
||||
|
||||
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
||||
// Read duplication takes precedence over SetAccesedBy. Exclude pages
|
||||
// Read duplication takes precedence over SetAccessedBy. Exclude pages
|
||||
// read-duplicated by performance heuristics
|
||||
if (uvm_page_mask_test(&va_block->read_duplicated_pages, page_index))
|
||||
continue;
|
||||
@ -8882,6 +9221,7 @@ static bool map_remote_on_atomic_fault(uvm_va_space_t *va_space,
|
||||
// could be changed in the future to optimize multiple faults or access
|
||||
// counter notifications on contiguous pages.
|
||||
static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor_id,
|
||||
NvU32 access_type_mask,
|
||||
@ -8895,7 +9235,9 @@ static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
|
||||
bool may_read_duplicate;
|
||||
uvm_processor_id_t preferred_location;
|
||||
|
||||
if (is_uvm_fault_force_sysmem_set()) {
|
||||
// TODO: Bug 3660968: Remove uvm_hmm_force_sysmem_set() check as soon as
|
||||
// HMM migration is implemented VMAs other than anonymous memory.
|
||||
if (is_uvm_fault_force_sysmem_set() || uvm_hmm_must_use_sysmem(va_block, va_block_context)) {
|
||||
*read_duplicate = false;
|
||||
return UVM_ID_CPU;
|
||||
}
|
||||
@ -8990,6 +9332,7 @@ static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor_id,
|
||||
NvU32 access_type_mask,
|
||||
@ -8998,14 +9341,24 @@ uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
|
||||
uvm_service_operation_t operation,
|
||||
bool *read_duplicate)
|
||||
{
|
||||
uvm_processor_id_t id = block_select_residency(va_block,
|
||||
page_index,
|
||||
processor_id,
|
||||
access_type_mask,
|
||||
policy,
|
||||
thrashing_hint,
|
||||
operation,
|
||||
read_duplicate);
|
||||
uvm_processor_id_t id;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
|
||||
va_block_context->policy,
|
||||
uvm_va_block_region_for_page(page_index)));
|
||||
UVM_ASSERT(uvm_hmm_va_block_context_vma_is_valid(va_block,
|
||||
va_block_context,
|
||||
uvm_va_block_region_for_page(page_index)));
|
||||
|
||||
id = block_select_residency(va_block,
|
||||
va_block_context,
|
||||
page_index,
|
||||
processor_id,
|
||||
access_type_mask,
|
||||
policy,
|
||||
thrashing_hint,
|
||||
operation,
|
||||
read_duplicate);
|
||||
|
||||
// If the intended residency doesn't have memory, fall back to the CPU.
|
||||
if (!block_processor_has_memory(va_block, id)) {
|
||||
@ -9035,32 +9388,13 @@ static bool check_access_counters_dont_revoke(uvm_va_block_t *block,
|
||||
return true;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *block_retry,
|
||||
uvm_service_block_context_t *service_context)
|
||||
// Update service_context->prefetch_hint, service_context->per_processor_masks,
|
||||
// and service_context->region.
|
||||
static void uvm_va_block_get_prefetch_hint(uvm_va_block_t *va_block,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_processor_id_t new_residency;
|
||||
uvm_prot_t new_prot;
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_perf_prefetch_hint_t prefetch_hint = UVM_PERF_PREFETCH_HINT_NONE();
|
||||
uvm_processor_mask_t processors_involved_in_cpu_migration;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||
|
||||
// GPU fault servicing must be done under the VA space read lock. GPU fault
|
||||
// servicing is required for RM to make forward progress, and we allow other
|
||||
// threads to call into RM while holding the VA space lock in read mode. If
|
||||
// we took the VA space lock in write mode on the GPU fault service path,
|
||||
// we could deadlock because the thread in RM which holds the VA space lock
|
||||
// for read wouldn't be able to complete until fault servicing completes.
|
||||
if (service_context->operation != UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS || UVM_ID_IS_CPU(processor_id))
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
else
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
|
||||
// Performance heuristics policy: we only consider prefetching when there
|
||||
// are migrations to a single processor, only.
|
||||
@ -9074,20 +9408,20 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
|
||||
// Update prefetch tracking structure with the pages that will migrate
|
||||
// due to faults
|
||||
uvm_perf_prefetch_prenotify_fault_migrations(va_block,
|
||||
&service_context->block_context,
|
||||
new_residency,
|
||||
new_residency_mask,
|
||||
service_context->region);
|
||||
|
||||
prefetch_hint = uvm_perf_prefetch_get_hint(va_block, new_residency_mask);
|
||||
uvm_perf_prefetch_get_hint(va_block,
|
||||
&service_context->block_context,
|
||||
new_residency,
|
||||
new_residency_mask,
|
||||
service_context->region,
|
||||
&service_context->prefetch_bitmap_tree,
|
||||
&service_context->prefetch_hint);
|
||||
|
||||
// Obtain the prefetch hint and give a fake fault access type to the
|
||||
// prefetched pages
|
||||
if (UVM_ID_IS_VALID(prefetch_hint.residency)) {
|
||||
UVM_ASSERT(prefetch_hint.prefetch_pages_mask != NULL);
|
||||
if (UVM_ID_IS_VALID(service_context->prefetch_hint.residency)) {
|
||||
const uvm_page_mask_t *prefetch_pages_mask = &service_context->prefetch_hint.prefetch_pages_mask;
|
||||
|
||||
for_each_va_block_page_in_mask(page_index, prefetch_hint.prefetch_pages_mask, va_block) {
|
||||
for_each_va_block_page_in_mask(page_index, prefetch_pages_mask, va_block) {
|
||||
UVM_ASSERT(!uvm_page_mask_test(new_residency_mask, page_index));
|
||||
|
||||
service_context->access_type[page_index] = UVM_FAULT_ACCESS_TYPE_PREFETCH;
|
||||
@ -9102,9 +9436,43 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
}
|
||||
}
|
||||
|
||||
service_context->region = uvm_va_block_region_from_block(va_block);
|
||||
uvm_page_mask_or(new_residency_mask, new_residency_mask, prefetch_pages_mask);
|
||||
service_context->region = uvm_va_block_region_from_mask(va_block, new_residency_mask);
|
||||
}
|
||||
}
|
||||
else {
|
||||
service_context->prefetch_hint.residency = UVM_ID_INVALID;
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *block_retry,
|
||||
uvm_service_block_context_t *service_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_processor_id_t new_residency;
|
||||
uvm_prot_t new_prot;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_processor_mask_t processors_involved_in_cpu_migration;
|
||||
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
|
||||
service_context->block_context.policy,
|
||||
service_context->region));
|
||||
|
||||
// GPU fault servicing must be done under the VA space read lock. GPU fault
|
||||
// servicing is required for RM to make forward progress, and we allow other
|
||||
// threads to call into RM while holding the VA space lock in read mode. If
|
||||
// we took the VA space lock in write mode on the GPU fault service path,
|
||||
// we could deadlock because the thread in RM which holds the VA space lock
|
||||
// for read wouldn't be able to complete until fault servicing completes.
|
||||
if (service_context->operation != UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS || UVM_ID_IS_CPU(processor_id))
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
else
|
||||
uvm_assert_rwsem_locked_read(&va_space->lock);
|
||||
|
||||
uvm_va_block_get_prefetch_hint(va_block, service_context);
|
||||
|
||||
for (new_prot = UVM_PROT_READ_ONLY; new_prot < UVM_PROT_MAX; ++new_prot)
|
||||
service_context->mappings_by_prot[new_prot-1].count = 0;
|
||||
@ -9137,11 +9505,10 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
uvm_page_mask_zero(did_migrate_mask);
|
||||
uvm_processor_mask_zero(all_involved_processors);
|
||||
|
||||
if (UVM_ID_IS_VALID(prefetch_hint.residency)) {
|
||||
UVM_ASSERT(uvm_id_equal(prefetch_hint.residency, new_residency));
|
||||
UVM_ASSERT(prefetch_hint.prefetch_pages_mask != NULL);
|
||||
if (UVM_ID_IS_VALID(service_context->prefetch_hint.residency)) {
|
||||
UVM_ASSERT(uvm_id_equal(service_context->prefetch_hint.residency, new_residency));
|
||||
|
||||
uvm_page_mask_or(new_residency_mask, new_residency_mask, prefetch_hint.prefetch_pages_mask);
|
||||
uvm_page_mask_or(new_residency_mask, new_residency_mask, &service_context->prefetch_hint.prefetch_pages_mask);
|
||||
}
|
||||
|
||||
if (service_context->read_duplicate_count == 0 ||
|
||||
@ -9156,7 +9523,7 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
service_context->read_duplicate_count == 0?
|
||||
new_residency_mask:
|
||||
&service_context->block_context.caller_page_mask,
|
||||
prefetch_hint.prefetch_pages_mask,
|
||||
&service_context->prefetch_hint.prefetch_pages_mask,
|
||||
cause);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -9172,7 +9539,7 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
new_residency,
|
||||
service_context->region,
|
||||
&service_context->block_context.caller_page_mask,
|
||||
prefetch_hint.prefetch_pages_mask,
|
||||
&service_context->prefetch_hint.prefetch_pages_mask,
|
||||
cause);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -9193,6 +9560,7 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
// the new residency
|
||||
for_each_va_block_page_in_region_mask(page_index, new_residency_mask, service_context->region) {
|
||||
new_prot = compute_new_permission(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
processor_id,
|
||||
new_residency,
|
||||
@ -9465,6 +9833,65 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_fault_type_t access_type,
|
||||
bool allow_migration)
|
||||
{
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
uvm_prot_t access_prot = uvm_fault_access_type_to_prot(access_type);
|
||||
|
||||
UVM_ASSERT(uvm_va_block_check_policy_is_valid(va_block,
|
||||
va_block_context->policy,
|
||||
uvm_va_block_region_for_page(page_index)));
|
||||
UVM_ASSERT(uvm_hmm_va_block_context_vma_is_valid(va_block,
|
||||
va_block_context,
|
||||
uvm_va_block_region_for_page(page_index)));
|
||||
|
||||
// CPU permissions are checked later by block_map_cpu_page.
|
||||
//
|
||||
// TODO: Bug 1766124: permissions are checked by block_map_cpu_page because
|
||||
// it can also be called from change_pte. Make change_pte call this
|
||||
// function and only check CPU permissions here.
|
||||
if (UVM_ID_IS_GPU(processor_id)) {
|
||||
if (va_range && uvm_va_range_is_managed_zombie(va_range))
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
|
||||
// GPU faults only check vma permissions if a mm is registered with the
|
||||
// VA space (ie. uvm_va_space_mm_retain_lock(va_space) != NULL) or if
|
||||
// uvm_enable_builtin_tests is set, because the Linux kernel can change
|
||||
// vm_flags at any moment (for example on mprotect) and here we are not
|
||||
// guaranteed to have vma->vm_mm->mmap_lock. During tests we ensure that
|
||||
// this scenario does not happen.
|
||||
if ((va_block_context->mm || uvm_enable_builtin_tests) &&
|
||||
(access_prot > compute_logical_prot(va_block, va_block_context, page_index)))
|
||||
return NV_ERR_INVALID_ACCESS_TYPE;
|
||||
}
|
||||
|
||||
// Non-migratable range:
|
||||
// - CPU accesses are always fatal, regardless of the VA range residency
|
||||
// - GPU accesses are fatal if the GPU can't map the preferred location
|
||||
if (!allow_migration) {
|
||||
UVM_ASSERT(!uvm_va_block_is_hmm(va_block));
|
||||
|
||||
if (UVM_ID_IS_CPU(processor_id)) {
|
||||
return NV_ERR_INVALID_OPERATION;
|
||||
}
|
||||
else {
|
||||
uvm_va_space_t *va_space = va_range->va_space;
|
||||
|
||||
return uvm_processor_mask_test(
|
||||
&va_space->accessible_from[uvm_id_value(uvm_va_range_get_policy(va_range)->preferred_location)],
|
||||
processor_id)?
|
||||
NV_OK : NV_ERR_INVALID_ACCESS_TYPE;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Check if we are faulting on a page with valid permissions to check if we can
|
||||
// skip fault handling. See uvm_va_block_t::cpu::fault_authorized for more
|
||||
// details
|
||||
@ -9528,19 +9955,6 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(fault_addr >= va_block->start);
|
||||
UVM_ASSERT(fault_addr <= va_block->end);
|
||||
|
||||
// There are up to three mm_structs to worry about, and they might all be
|
||||
// different:
|
||||
//
|
||||
// 1) vma->vm_mm
|
||||
// 2) current->mm
|
||||
// 3) va_space->va_space_mm.mm (though note that if this is valid, then it
|
||||
// must match vma->vm_mm).
|
||||
//
|
||||
// The kernel guarantees that vma->vm_mm has a reference taken with
|
||||
// mmap_lock held on the CPU fault path, so tell the fault handler to use
|
||||
// that one. current->mm might differ if we're on the access_process_vm
|
||||
// (ptrace) path or if another driver is calling get_user_pages.
|
||||
service_context->block_context.mm = uvm_va_range_vma(va_range)->vm_mm;
|
||||
uvm_assert_mmap_lock_locked(service_context->block_context.mm);
|
||||
|
||||
service_context->block_context.policy = uvm_va_policy_get(va_block, fault_addr);
|
||||
@ -9556,8 +9970,11 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
|
||||
}
|
||||
|
||||
// Check logical permissions
|
||||
status = uvm_va_range_check_logical_permissions(va_block->va_range,
|
||||
page_index = uvm_va_block_cpu_page_index(va_block, fault_addr);
|
||||
status = uvm_va_block_check_logical_permissions(va_block,
|
||||
&service_context->block_context,
|
||||
UVM_ID_CPU,
|
||||
page_index,
|
||||
fault_access_type,
|
||||
uvm_range_group_address_migratable(va_space, fault_addr));
|
||||
if (status != NV_OK)
|
||||
@ -9565,7 +9982,6 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
|
||||
|
||||
uvm_processor_mask_zero(&service_context->cpu_fault.gpus_to_check_for_ecc);
|
||||
|
||||
page_index = uvm_va_block_cpu_page_index(va_block, fault_addr);
|
||||
if (skip_cpu_fault_with_valid_permissions(va_block, page_index, fault_access_type))
|
||||
return NV_OK;
|
||||
|
||||
@ -9588,6 +10004,7 @@ static NV_STATUS block_cpu_fault_locked(uvm_va_block_t *va_block,
|
||||
|
||||
// Compute new residency and update the masks
|
||||
new_residency = uvm_va_block_select_residency(va_block,
|
||||
&service_context->block_context,
|
||||
page_index,
|
||||
UVM_ID_CPU,
|
||||
uvm_fault_access_type_mask_bit(fault_access_type),
|
||||
@ -9689,7 +10106,6 @@ NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **out_block)
|
||||
@ -9697,9 +10113,12 @@ NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
uvm_va_range_t *va_range;
|
||||
size_t index;
|
||||
|
||||
if (uvm_enable_builtin_tests && atomic_dec_if_positive(&va_space->test.va_block_allocation_fail_nth) == 0)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
va_range = uvm_va_range_find(va_space, addr);
|
||||
if (!va_range) {
|
||||
if (!mm)
|
||||
if (!va_block_context || !va_block_context->mm)
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
return uvm_hmm_va_block_find_create(va_space, addr, va_block_context, out_block);
|
||||
}
|
||||
@ -9738,6 +10157,8 @@ NV_STATUS uvm_va_block_write_from_cpu(uvm_va_block_t *va_block,
|
||||
if (UVM_ID_IS_INVALID(proc))
|
||||
proc = UVM_ID_CPU;
|
||||
|
||||
block_context->policy = uvm_va_policy_get(va_block, dst);
|
||||
|
||||
// Use make_resident() in all cases to break read-duplication, but
|
||||
// block_retry can be NULL as if the page is not resident yet we will make
|
||||
// it resident on the CPU.
|
||||
@ -10222,12 +10643,19 @@ NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *
|
||||
struct mm_struct *mm;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_va_block_test_t *va_block_test;
|
||||
uvm_va_block_context_t *block_context = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
mm = uvm_va_space_mm_or_current_retain_lock(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
status = uvm_va_block_find_create(va_space, mm, params->lookup_address, NULL, &va_block);
|
||||
block_context = uvm_va_block_context_alloc(mm);
|
||||
if (!block_context) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = uvm_va_block_find_create(va_space, params->lookup_address, block_context, &va_block);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
@ -10248,8 +10676,8 @@ NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *
|
||||
if (params->eviction_error)
|
||||
va_block_test->inject_eviction_error = params->eviction_error;
|
||||
|
||||
if (params->cpu_pages_allocation_error)
|
||||
va_block_test->inject_cpu_pages_allocation_error = params->cpu_pages_allocation_error;
|
||||
if (params->cpu_pages_allocation_error_count)
|
||||
va_block_test->inject_cpu_pages_allocation_error_count = params->cpu_pages_allocation_error_count;
|
||||
|
||||
if (params->populate_error)
|
||||
va_block_test->inject_populate_error = params->populate_error;
|
||||
@ -10258,7 +10686,8 @@ NV_STATUS uvm_test_va_block_inject_error(UVM_TEST_VA_BLOCK_INJECT_ERROR_PARAMS *
|
||||
|
||||
out:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
uvm_va_space_mm_or_current_release_unlock(va_space, mm);
|
||||
uvm_va_block_context_free(block_context);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -10329,7 +10758,7 @@ NV_STATUS uvm_test_change_pte_mapping(UVM_TEST_CHANGE_PTE_MAPPING_PARAMS *params
|
||||
goto out;
|
||||
}
|
||||
|
||||
status = uvm_va_block_find_create(va_space, mm, params->va, block_context, &block);
|
||||
status = uvm_va_block_find_create(va_space, params->va, block_context, &block);
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
|
@ -249,7 +249,7 @@ struct uvm_va_block_struct
|
||||
// Lock protecting the block. See the comment at the top of uvm.c.
|
||||
uvm_mutex_t lock;
|
||||
|
||||
// Parent VA range. UVM managed blocks have this set. HMM blocks will have
|
||||
// Parent VA range. Managed blocks have this set. HMM blocks will have
|
||||
// va_range set to NULL and hmm.va_space set instead. Dead blocks that are
|
||||
// waiting for the last ref count to be removed have va_range and
|
||||
// hmm.va_space set to NULL (could be either type of block).
|
||||
@ -437,13 +437,22 @@ struct uvm_va_block_struct
|
||||
|
||||
uvm_perf_module_data_desc_t perf_modules_data[UVM_PERF_MODULE_TYPE_COUNT];
|
||||
|
||||
// Prefetch infomation that is updated while holding the va_block lock but
|
||||
// records state while the lock is not held.
|
||||
struct
|
||||
{
|
||||
uvm_processor_id_t last_migration_proc_id;
|
||||
|
||||
NvU16 fault_migrations_to_last_proc;
|
||||
} prefetch_info;
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
struct
|
||||
{
|
||||
// The MMU notifier is registered per va_block.
|
||||
struct mmu_interval_notifier notifier;
|
||||
|
||||
// Parent VA space pointer. It is NULL for UVM managed blocks or if
|
||||
// Parent VA space pointer. It is NULL for managed blocks or if
|
||||
// the HMM block is dead. This field can be read while holding the
|
||||
// block lock and is only modified while holding the va_space write
|
||||
// lock and va_block lock (same as the va_range pointer).
|
||||
@ -488,21 +497,27 @@ struct uvm_va_block_wrapper_struct
|
||||
// uvm_cpu_chunk_allocation_sizes module parameter.
|
||||
NvU32 cpu_chunk_allocation_size_mask;
|
||||
|
||||
// Force the next eviction attempt on this block to fail. Used for
|
||||
// testing only.
|
||||
bool inject_eviction_error;
|
||||
|
||||
// Subsequent operations that need to allocate CPU pages will fail. As
|
||||
// opposed to other error injection settings, this one is persistent.
|
||||
// opposed to other error injection settings, this one fails N times
|
||||
// and then succeeds instead of failing on the Nth try. A value of ~0u
|
||||
// means fail indefinitely.
|
||||
// This is because this error is supposed to be fatal and tests verify
|
||||
// the state of the VA blocks after the failure. However, some tests
|
||||
// use kernels to trigger migrations and a fault replay could trigger
|
||||
// a successful migration if this error flag is cleared.
|
||||
bool inject_cpu_pages_allocation_error;
|
||||
NvU32 inject_cpu_pages_allocation_error_count;
|
||||
|
||||
// Force the next eviction attempt on this block to fail. Used for
|
||||
// testing only.
|
||||
bool inject_eviction_error;
|
||||
|
||||
// Force the next successful chunk allocation to then fail. Used for testing
|
||||
// only to simulate driver metadata allocation failure.
|
||||
bool inject_populate_error;
|
||||
|
||||
// Force the next split on this block to fail.
|
||||
// Set by error injection ioctl for testing purposes only.
|
||||
bool inject_split_error;
|
||||
} test;
|
||||
};
|
||||
|
||||
@ -639,8 +654,18 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
|
||||
memset(va_block_context, 0xff, sizeof(*va_block_context));
|
||||
|
||||
va_block_context->mm = mm;
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
va_block_context->hmm.vma = NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Check that a single policy covers the given region for the given va_block.
|
||||
// This always returns true and is intended to only be used with UVM_ASSERT().
|
||||
// Locking: the va_block lock must be held.
|
||||
bool uvm_va_block_check_policy_is_valid(uvm_va_block_t *va_block,
|
||||
uvm_va_policy_t *policy,
|
||||
uvm_va_block_region_t region);
|
||||
|
||||
// TODO: Bug 1766480: Using only page masks instead of a combination of regions
|
||||
// and page masks could simplify the below APIs and their implementations
|
||||
// at the cost of having to scan the whole mask for small regions.
|
||||
@ -651,8 +676,10 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
|
||||
// pages in the region which are present in the mask.
|
||||
//
|
||||
// prefetch_page_mask may be passed as a subset of page_mask when cause is
|
||||
// UVM_MAKE_RESIDENT_CAUSE_FAULT to indicate pages that have been pulled due
|
||||
// to automatic page prefetching heuristics. For pages in this mask,
|
||||
// UVM_MAKE_RESIDENT_CAUSE_REPLAYABLE_FAULT,
|
||||
// UVM_MAKE_RESIDENT_CAUSE_NON_REPLAYABLE_FAULT, or
|
||||
// UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER to indicate pages that have been
|
||||
// pulled due to automatic page prefetching heuristics. For pages in this mask,
|
||||
// UVM_MAKE_RESIDENT_CAUSE_PREFETCH will be reported in migration events,
|
||||
// instead.
|
||||
//
|
||||
@ -674,20 +701,24 @@ static void uvm_va_block_context_init(uvm_va_block_context_t *va_block_context,
|
||||
// block's lock has been unlocked and relocked as part of the call and that the
|
||||
// whole sequence of operations performed under the block's lock needs to be
|
||||
// attempted again. To facilitate that, the caller needs to provide the same
|
||||
// va_block_retry struct for each attempt that has been initialized before the first
|
||||
// attempt and needs to be deinitialized after the last one. Most callers can
|
||||
// just use UVM_VA_BLOCK_LOCK_RETRY() that takes care of that for the caller.
|
||||
// va_block_retry struct for each attempt that has been initialized before the
|
||||
// first attempt and needs to be deinitialized after the last one. Most callers
|
||||
// can just use UVM_VA_BLOCK_LOCK_RETRY() that takes care of that for the
|
||||
// caller.
|
||||
//
|
||||
// If dest_id is the CPU then va_block_retry can be NULL and allocation-retry of
|
||||
// user memory is guaranteed not to happen. Allocation-retry of page tables can
|
||||
// still occur though.
|
||||
//
|
||||
// va_block_context must be non-NULL. This function will set a bit in
|
||||
// va_block_context must not be NULL. This function will set a bit in
|
||||
// va_block_context->make_resident.pages_changed_residency for each page that
|
||||
// changed residency (due to a migration or first population) as a result of the
|
||||
// operation. This function only sets bits in that mask. It is the caller's
|
||||
// responsiblity to zero the mask or not first.
|
||||
//
|
||||
// va_block_context->policy must also be set by the caller for the given region.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// Notably any status other than NV_OK indicates that the block's lock might
|
||||
// have been unlocked and relocked.
|
||||
//
|
||||
@ -710,6 +741,8 @@ NV_STATUS uvm_va_block_make_resident(uvm_va_block_t *va_block,
|
||||
// where they are unmapped
|
||||
// - All remote mappings (due to either SetAccessedBy or performance heuristics)
|
||||
// are broken
|
||||
// - Only managed va_blocks are supported.
|
||||
// TODO: Bug 3660922: need to implement HMM read duplication support.
|
||||
// - LOCKING: If va_block_context->mm != NULL, va_block_context->mm->mmap_lock
|
||||
// must be held in at least read mode.
|
||||
NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
@ -721,6 +754,34 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
const uvm_page_mask_t *prefetch_page_mask,
|
||||
uvm_make_resident_cause_t cause);
|
||||
|
||||
// Similar to uvm_va_block_make_resident() (read documentation there). The
|
||||
// difference is that source pages are only copied to the destination and the
|
||||
// residency is not updated until uvm_va_block_make_resident_post() is called.
|
||||
// Otherwise, the combination of uvm_va_block_make_resident_pre() and
|
||||
// uvm_va_block_make_resident_post() should be the same as just calling
|
||||
// uvm_va_block_make_resident().
|
||||
// This split is needed when using migrate_vma_setup() and migrate_vma_pages()
|
||||
// so that when migrate_vma_pages() indicates a page is not migrating, the
|
||||
// va_block state is not updated.
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
NV_STATUS uvm_va_block_make_resident_pre(uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t dest_id,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask,
|
||||
const uvm_page_mask_t *prefetch_page_mask,
|
||||
uvm_make_resident_cause_t cause);
|
||||
|
||||
// The page_mask must be the same or a subset of the page_mask passed to
|
||||
// uvm_va_block_make_resident_pre(). This step updates the residency and breaks
|
||||
// read duplication.
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
void uvm_va_block_make_resident_post(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask);
|
||||
|
||||
// Creates or upgrades a mapping from the input processor to the given virtual
|
||||
// address region. Pages which already have new_prot permissions or higher are
|
||||
// skipped, so this call ensures that the range is mapped with at least new_prot
|
||||
@ -749,7 +810,8 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
// pages because the earlier operation can cause a PTE split or merge which is
|
||||
// assumed by the later operation.
|
||||
//
|
||||
// va_block_context must not be NULL.
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
|
||||
@ -805,7 +867,7 @@ NV_STATUS uvm_va_block_map_mask(uvm_va_block_t *va_block,
|
||||
// pages because the earlier operation can cause a PTE split or merge which is
|
||||
// assumed by the later operation.
|
||||
//
|
||||
// va_block_context must not be NULL.
|
||||
// va_block_context must not be NULL. The va_block_context->policy is unused.
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
|
||||
@ -837,12 +899,20 @@ NV_STATUS uvm_va_block_unmap_mask(uvm_va_block_t *va_block,
|
||||
// up-to-date data.
|
||||
// - Unmap the preferred location's processor from any pages in this region
|
||||
// which are not resident on the preferred location.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: The caller must hold the VA block lock.
|
||||
NV_STATUS uvm_va_block_set_preferred_location_locked(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
|
||||
// Maps the given processor to all resident pages in this block, as allowed by
|
||||
// location and policy. Waits for the operation to complete before returning.
|
||||
// This function should only be called with managed va_blocks.
|
||||
//
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
|
||||
// != NULL, va_block_context->mm->mmap_lock must be held in at least
|
||||
@ -852,8 +922,10 @@ NV_STATUS uvm_va_block_set_accessed_by(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor_id);
|
||||
|
||||
// Breaks SetAccessedBy and remote mappings
|
||||
// This function should only be called with managed va_blocks.
|
||||
//
|
||||
// va_block_context must NOT be NULL
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
|
||||
// != NULL, va_block_context->mm->mmap_lock must be held in at least
|
||||
@ -862,8 +934,10 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
|
||||
// Restores SetAccessedBy mappings
|
||||
// This function should only be called with managed va_blocks.
|
||||
//
|
||||
// va_block_context must NOT be NULL
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: This takes and releases the VA block lock. If va_block_context->mm
|
||||
// != NULL, va_block_context->mm->mmap_lock must be held in at least
|
||||
@ -871,6 +945,29 @@ NV_STATUS uvm_va_block_set_read_duplication(uvm_va_block_t *va_block,
|
||||
NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context);
|
||||
|
||||
// Check if processor_id is allowed to access the va_block with access_type
|
||||
// permissions. Return values:
|
||||
//
|
||||
// NV_ERR_INVALID_ADDRESS The VA block is logically dead (zombie)
|
||||
// NV_ERR_INVALID_ACCESS_TYPE The vma corresponding to the VA range does not
|
||||
// allow access_type permissions, or migration is
|
||||
// disallowed and processor_id cannot access the
|
||||
// range remotely (UVM-Lite).
|
||||
// NV_ERR_INVALID_OPERATION The access would violate the policies specified
|
||||
// by UvmPreventMigrationRangeGroups.
|
||||
//
|
||||
// va_block_context must not be NULL, va_block_context->policy must be valid,
|
||||
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
|
||||
// which also means the va_block_context->mm is not NULL, retained, and locked
|
||||
// for at least read.
|
||||
// Locking: the va_block lock must be held.
|
||||
NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_fault_type_t access_type,
|
||||
bool allow_migration);
|
||||
|
||||
// API for access privilege revocation
|
||||
//
|
||||
// Revoke prot_to_revoke access permissions for the given processor.
|
||||
@ -898,7 +995,7 @@ NV_STATUS uvm_va_block_unset_read_duplication(uvm_va_block_t *va_block,
|
||||
// different pages because the earlier operation can cause a PTE split or merge
|
||||
// which is assumed by the later operation.
|
||||
//
|
||||
// va_block_context must not be NULL.
|
||||
// va_block_context must not be NULL. The va_block_context->policy is unused.
|
||||
//
|
||||
// If allocation-retry was required as part of the operation and was successful,
|
||||
// NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case, the entries in the
|
||||
@ -938,7 +1035,8 @@ NV_STATUS uvm_va_block_revoke_prot_mask(uvm_va_block_t *va_block,
|
||||
// processor_id, which triggered the migration and should have already been
|
||||
// mapped).
|
||||
//
|
||||
// va_block_context must not be NULL.
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// This function acquires/waits for the va_block tracker and updates that
|
||||
// tracker with any new work pushed.
|
||||
@ -968,7 +1066,8 @@ NV_STATUS uvm_va_block_add_mappings_after_migration(uvm_va_block_t *va_block,
|
||||
// Note that this can return NV_ERR_MORE_PROCESSING_REQUIRED just like
|
||||
// uvm_va_block_map() indicating that the operation needs to be retried.
|
||||
//
|
||||
// va_block_context must not be NULL.
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: The caller must hold the va block lock. If va_block_context->mm !=
|
||||
// NULL, va_block_context->mm->mmap_lock must be held in at least read
|
||||
@ -989,6 +1088,8 @@ NV_STATUS uvm_va_block_add_gpu_va_space(uvm_va_block_t *va_block, uvm_gpu_va_spa
|
||||
// If mm != NULL, that mm is used for any CPU mappings which may be created as
|
||||
// a result of this call. See uvm_va_block_context_t::mm for details.
|
||||
//
|
||||
// va_block_context must not be NULL. The va_block_context->policy is unused.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock. If block_context->mm is not
|
||||
// NULL, the caller must hold mm->mmap_lock in at least read mode.
|
||||
void uvm_va_block_remove_gpu_va_space(uvm_va_block_t *va_block,
|
||||
@ -1057,10 +1158,7 @@ NV_STATUS uvm_va_block_split(uvm_va_block_t *existing_va_block,
|
||||
// Exactly the same split semantics as uvm_va_block_split, including error
|
||||
// handling except the existing_va_block block lock needs to be held and
|
||||
// the new_va_block has to be preallocated.
|
||||
//
|
||||
// new_va_block's va_range is set to new_va_range before any reverse mapping is
|
||||
// established to the new block, but the caller is responsible for inserting the
|
||||
// new block into the range.
|
||||
// Also note that the existing_va_block lock may be dropped and re-acquired.
|
||||
NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block,
|
||||
NvU64 new_end,
|
||||
uvm_va_block_t *new_va_block,
|
||||
@ -1076,6 +1174,7 @@ NV_STATUS uvm_va_block_split_locked(uvm_va_block_t *existing_va_block,
|
||||
// - va_space lock must be held in at least read mode
|
||||
//
|
||||
// service_context->block_context.mm is ignored and vma->vm_mm is used instead.
|
||||
// service_context->block_context.policy is set by this function.
|
||||
//
|
||||
// Returns NV_ERR_INVALID_ACCESS_TYPE if a CPU mapping to fault_addr cannot be
|
||||
// accessed, for example because it's within a range group which is non-
|
||||
@ -1089,6 +1188,8 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
|
||||
// (migrations, cache invalidates, etc.) in response to the given service block
|
||||
// context
|
||||
//
|
||||
// service_context->block_context.policy is set by this function.
|
||||
//
|
||||
// Locking:
|
||||
// - service_context->block_context.mm->mmap_lock must be held in at least
|
||||
// read mode, if valid.
|
||||
@ -1132,10 +1233,18 @@ static inline NvU64 uvm_va_block_cpu_page_address(uvm_va_block_t *block, uvm_pag
|
||||
return block->start + PAGE_SIZE * page_index;
|
||||
}
|
||||
|
||||
// Get the physical address on the given GPU for given residency
|
||||
uvm_gpu_phys_address_t uvm_va_block_res_phys_page_address(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t residency,
|
||||
uvm_gpu_t *gpu);
|
||||
|
||||
// Get the page physical address on the given GPU
|
||||
//
|
||||
// This will assert that GPU state is indeed present.
|
||||
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block, uvm_page_index_t page_index, uvm_gpu_t *gpu);
|
||||
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_gpu_t *gpu);
|
||||
|
||||
static bool uvm_va_block_contains_address(uvm_va_block_t *block, NvU64 address)
|
||||
{
|
||||
@ -1191,26 +1300,28 @@ NV_STATUS uvm_va_block_find(uvm_va_space_t *va_space, NvU64 addr, uvm_va_block_t
|
||||
|
||||
// Same as uvm_va_block_find except that the block is created if not found.
|
||||
// If addr is covered by a UVM_VA_RANGE_TYPE_MANAGED va_range, a managed block
|
||||
// will be created. Otherwise, if addr is not covered by any va_range, mm is
|
||||
// non-NULL, and HMM is enabled in the va_space, an HMM block will be created.
|
||||
// In either case, if mm is non-NULL, it must be retained and locked in at
|
||||
// least read mode. Return values:
|
||||
// will be created. Otherwise, if addr is not covered by any va_range, HMM is
|
||||
// enabled in the va_space, and va_block_context and va_block_context->mm are
|
||||
// non-NULL, then a HMM block will be created and va_block_context->hmm.vma is
|
||||
// set to the VMA covering 'addr'. The va_block_context->policy field is left
|
||||
// unchanged.
|
||||
// In either case, if va_block_context->mm is non-NULL, it must be retained and
|
||||
// locked in at least read mode. Return values:
|
||||
// NV_ERR_INVALID_ADDRESS addr is not a UVM_VA_RANGE_TYPE_MANAGED va_range nor
|
||||
// a HMM enabled VMA.
|
||||
// NV_ERR_NO_MEMORY memory could not be allocated.
|
||||
NV_STATUS uvm_va_block_find_create(uvm_va_space_t *va_space,
|
||||
struct mm_struct *mm,
|
||||
NvU64 addr,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_va_block_t **out_block);
|
||||
|
||||
// Same as uvm_va_block_find_create except that only UVM managed va_blocks are
|
||||
// Same as uvm_va_block_find_create except that only managed va_blocks are
|
||||
// created if not already present in the VA range.
|
||||
static NV_STATUS uvm_va_block_find_create_managed(uvm_va_space_t *va_space,
|
||||
NvU64 addr,
|
||||
uvm_va_block_t **out_block)
|
||||
{
|
||||
return uvm_va_block_find_create(va_space, NULL, addr, NULL, out_block);
|
||||
return uvm_va_block_find_create(va_space, addr, NULL, out_block);
|
||||
}
|
||||
|
||||
// Look up a chunk backing a specific address within the VA block. Returns NULL if none.
|
||||
@ -1232,7 +1343,8 @@ typedef enum
|
||||
// The caller needs to handle allocation-retry. va_block_retry can be NULL if
|
||||
// the destination is the CPU.
|
||||
//
|
||||
// va_block_context must not be NULL.
|
||||
// va_block_context must not be NULL and va_block_context->policy must be valid.
|
||||
// See the comments for uvm_va_block_check_policy_is_valid().
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock. If va_block_context->mm !=
|
||||
// NULL, va_block_context->mm->mmap_lock must be held in at least
|
||||
@ -1249,6 +1361,9 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||
//
|
||||
// The [dst, dst + size) range has to fit within a single PAGE_SIZE page.
|
||||
//
|
||||
// va_block_context must not be NULL. The caller is not required to set
|
||||
// va_block_context->policy.
|
||||
//
|
||||
// The caller needs to support allocation-retry of page tables.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock
|
||||
@ -1317,6 +1432,8 @@ void uvm_va_block_mark_cpu_dirty(uvm_va_block_t *va_block);
|
||||
// successful, NV_ERR_MORE_PROCESSING_REQUIRED is returned. In this case the
|
||||
// block's lock was unlocked and relocked.
|
||||
//
|
||||
// va_block_context must not be NULL. The va_block_context->policy is unused.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock.
|
||||
NV_STATUS uvm_va_block_set_cancel(uvm_va_block_t *va_block, uvm_va_block_context_t *block_context, uvm_gpu_t *gpu);
|
||||
|
||||
@ -1396,6 +1513,26 @@ static uvm_va_block_region_t uvm_va_block_region_from_block(uvm_va_block_t *va_b
|
||||
return uvm_va_block_region(0, uvm_va_block_num_cpu_pages(va_block));
|
||||
}
|
||||
|
||||
// Create a block region from a va block and page mask. Note that the region
|
||||
// covers the first through the last set bit and may have unset bits in between.
|
||||
static uvm_va_block_region_t uvm_va_block_region_from_mask(uvm_va_block_t *va_block, const uvm_page_mask_t *page_mask)
|
||||
{
|
||||
uvm_va_block_region_t region;
|
||||
uvm_page_index_t outer = uvm_va_block_num_cpu_pages(va_block);
|
||||
|
||||
region.first = find_first_bit(page_mask->bitmap, outer);
|
||||
if (region.first >= outer) {
|
||||
region = uvm_va_block_region(0, 0);
|
||||
}
|
||||
else {
|
||||
// At least one bit is set so find_last_bit() should not return 'outer'.
|
||||
region.outer = find_last_bit(page_mask->bitmap, outer) + 1;
|
||||
UVM_ASSERT(region.outer <= outer);
|
||||
}
|
||||
|
||||
return region;
|
||||
}
|
||||
|
||||
static bool uvm_page_mask_test(const uvm_page_mask_t *mask, uvm_page_index_t page_index)
|
||||
{
|
||||
UVM_ASSERT(page_index < PAGES_PER_UVM_VA_BLOCK);
|
||||
@ -1715,61 +1852,6 @@ static NvU64 uvm_reverse_map_end(const uvm_reverse_map_t *reverse_map)
|
||||
#define for_each_va_block_page(page_index, va_block) \
|
||||
for_each_va_block_page_in_region((page_index), uvm_va_block_region_from_block(va_block))
|
||||
|
||||
static void uvm_va_block_bitmap_tree_init_from_page_count(uvm_va_block_bitmap_tree_t *bitmap_tree, size_t page_count)
|
||||
{
|
||||
bitmap_tree->leaf_count = page_count;
|
||||
bitmap_tree->level_count = ilog2(roundup_pow_of_two(page_count)) + 1;
|
||||
uvm_page_mask_zero(&bitmap_tree->pages);
|
||||
}
|
||||
|
||||
static void uvm_va_block_bitmap_tree_init(uvm_va_block_bitmap_tree_t *bitmap_tree, uvm_va_block_t *va_block)
|
||||
{
|
||||
size_t num_pages = uvm_va_block_num_cpu_pages(va_block);
|
||||
uvm_va_block_bitmap_tree_init_from_page_count(bitmap_tree, num_pages);
|
||||
}
|
||||
|
||||
static void uvm_va_block_bitmap_tree_iter_init(const uvm_va_block_bitmap_tree_t *bitmap_tree,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_va_block_bitmap_tree_iter_t *iter)
|
||||
{
|
||||
UVM_ASSERT(bitmap_tree->level_count > 0);
|
||||
UVM_ASSERT_MSG(page_index < bitmap_tree->leaf_count,
|
||||
"%zd vs %zd",
|
||||
(size_t)page_index,
|
||||
(size_t)bitmap_tree->leaf_count);
|
||||
|
||||
iter->level_idx = bitmap_tree->level_count - 1;
|
||||
iter->node_idx = page_index;
|
||||
}
|
||||
|
||||
static uvm_va_block_region_t uvm_va_block_bitmap_tree_iter_get_range(const uvm_va_block_bitmap_tree_t *bitmap_tree,
|
||||
const uvm_va_block_bitmap_tree_iter_t *iter)
|
||||
{
|
||||
NvU16 range_leaves = uvm_perf_tree_iter_leaf_range(bitmap_tree, iter);
|
||||
NvU16 range_start = uvm_perf_tree_iter_leaf_range_start(bitmap_tree, iter);
|
||||
uvm_va_block_region_t subregion = uvm_va_block_region(range_start, range_start + range_leaves);
|
||||
|
||||
UVM_ASSERT(iter->level_idx >= 0);
|
||||
UVM_ASSERT(iter->level_idx < bitmap_tree->level_count);
|
||||
|
||||
return subregion;
|
||||
}
|
||||
|
||||
static NvU16 uvm_va_block_bitmap_tree_iter_get_count(const uvm_va_block_bitmap_tree_t *bitmap_tree,
|
||||
const uvm_va_block_bitmap_tree_iter_t *iter)
|
||||
{
|
||||
uvm_va_block_region_t subregion = uvm_va_block_bitmap_tree_iter_get_range(bitmap_tree, iter);
|
||||
|
||||
return uvm_page_mask_region_weight(&bitmap_tree->pages, subregion);
|
||||
}
|
||||
|
||||
#define uvm_va_block_bitmap_tree_traverse_counters(counter,tree,page,iter) \
|
||||
for (uvm_va_block_bitmap_tree_iter_init((tree), (page), (iter)), \
|
||||
(counter) = uvm_va_block_bitmap_tree_iter_get_count((tree), (iter)); \
|
||||
(iter)->level_idx >= 0; \
|
||||
(counter) = --(iter)->level_idx < 0? 0: \
|
||||
uvm_va_block_bitmap_tree_iter_get_count((tree), (iter)))
|
||||
|
||||
// Return the block region covered by the given chunk size. page_index must be
|
||||
// any page within the block known to be covered by the chunk.
|
||||
static uvm_va_block_region_t uvm_va_block_chunk_region(uvm_va_block_t *block,
|
||||
@ -1898,6 +1980,12 @@ uvm_va_block_region_t uvm_va_block_big_page_region(uvm_va_block_t *va_block,
|
||||
// returned.
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region_all(uvm_va_block_t *va_block, NvU32 big_page_size);
|
||||
|
||||
// Returns the largest sub-region region of 'region' which can fit big pages.
|
||||
// If the region cannot fit any big pages, an invalid region (0, 0) is returned.
|
||||
uvm_va_block_region_t uvm_va_block_big_page_region_subset(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
NvU32 big_page_size);
|
||||
|
||||
// Returns the big page index (the bit index within
|
||||
// uvm_va_block_gpu_state_t::big_ptes) corresponding to page_index. If
|
||||
// page_index cannot be covered by a big PTE due to alignment or block size,
|
||||
@ -1907,7 +1995,14 @@ size_t uvm_va_block_big_page_index(uvm_va_block_t *va_block, uvm_page_index_t pa
|
||||
// Returns the new residency for a page that faulted or triggered access
|
||||
// counter notifications. The read_duplicate output parameter indicates if the
|
||||
// page meets the requirements to be read-duplicated
|
||||
// va_block_context must not be NULL, va_block_context->policy must be valid,
|
||||
// and if the va_block is a HMM block, va_block_context->hmm.vma must be valid
|
||||
// which also means the va_block_context->mm is not NULL, retained, and locked
|
||||
// for at least read. See the comments for uvm_va_block_check_policy_is_valid()
|
||||
// and uvm_hmm_va_block_context_vma_is_valid() in uvm_hmm.h.
|
||||
// Locking: the va_block lock must be held.
|
||||
uvm_processor_id_t uvm_va_block_select_residency(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_processor_id_t processor_id,
|
||||
NvU32 access_type_mask,
|
||||
|
@ -75,28 +75,6 @@ typedef struct
|
||||
DECLARE_BITMAP(bitmap, PAGES_PER_UVM_VA_BLOCK);
|
||||
} uvm_page_mask_t;
|
||||
|
||||
// Encapsulates a counter tree built on top of a page mask bitmap in
|
||||
// which each leaf represents a page in the block. It contains
|
||||
// leaf_count and level_count so that it can use some macros for
|
||||
// perf trees
|
||||
typedef struct
|
||||
{
|
||||
uvm_page_mask_t pages;
|
||||
|
||||
NvU16 leaf_count;
|
||||
|
||||
NvU8 level_count;
|
||||
} uvm_va_block_bitmap_tree_t;
|
||||
|
||||
// Iterator for the bitmap tree. It contains level_idx and node_idx so
|
||||
// that it can use some macros for perf trees
|
||||
typedef struct
|
||||
{
|
||||
s8 level_idx;
|
||||
|
||||
uvm_page_index_t node_idx;
|
||||
} uvm_va_block_bitmap_tree_iter_t;
|
||||
|
||||
// When updating GPU PTEs, this struct describes the new arrangement of PTE
|
||||
// sizes. It is calculated before the operation is applied so we know which PTE
|
||||
// sizes to allocate.
|
||||
@ -127,11 +105,6 @@ typedef struct
|
||||
// that region should be 4k, and that some of those 4k PTEs will be written
|
||||
// by the operation.
|
||||
DECLARE_BITMAP(big_ptes_covered, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
|
||||
// These are the big PTE regions which will no longer have any valid
|
||||
// mappings after the operation. Only the bits which are set in
|
||||
// big_ptes_covered are valid.
|
||||
DECLARE_BITMAP(big_ptes_fully_unmapped, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
} uvm_va_block_new_pte_state_t;
|
||||
|
||||
// Event that triggered the call to uvm_va_block_make_resident/
|
||||
@ -269,7 +242,8 @@ typedef struct
|
||||
typedef enum
|
||||
{
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_MOVE = 1,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_COPY = 2
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_COPY = 2,
|
||||
UVM_VA_BLOCK_TRANSFER_MODE_COPY_ONLY = 3
|
||||
} uvm_va_block_transfer_mode_t;
|
||||
|
||||
struct uvm_reverse_map_struct
|
||||
|
@ -49,8 +49,9 @@ uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr)
|
||||
|
||||
return node ? &node->policy : &uvm_va_policy_default;
|
||||
}
|
||||
else
|
||||
else {
|
||||
return uvm_va_range_get_policy(va_block->va_range);
|
||||
}
|
||||
}
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
@ -50,7 +50,7 @@ typedef enum
|
||||
//
|
||||
// A policy covers one or more contiguous Linux VMAs or portion of a VMA and
|
||||
// does not cover non-existant VMAs.
|
||||
// The VA range is determined from either the uvm_va_range_t for UVM managed
|
||||
// The VA range is determined from either the uvm_va_range_t for managed
|
||||
// allocations or the uvm_va_policy_node_t for HMM allocations.
|
||||
//
|
||||
typedef struct uvm_va_policy_struct
|
||||
@ -94,6 +94,12 @@ bool uvm_va_policy_is_read_duplicate(uvm_va_policy_t *policy, uvm_va_space_t *va
|
||||
// Locking: The va_block lock must be held.
|
||||
uvm_va_policy_t *uvm_va_policy_get(uvm_va_block_t *va_block, NvU64 addr);
|
||||
|
||||
// Return a uvm_va_policy_node_t given a uvm_va_policy_t pointer.
|
||||
static uvm_va_policy_node_t *uvm_va_policy_node_from_policy(uvm_va_policy_t *policy)
|
||||
{
|
||||
return container_of(policy, uvm_va_policy_node_t, policy);
|
||||
}
|
||||
|
||||
#if UVM_IS_CONFIG_HMM()
|
||||
|
||||
// Module load/exit
|
||||
@ -239,6 +245,11 @@ static NV_STATUS uvm_va_policy_set_range(uvm_va_block_t *va_block,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static uvm_va_policy_node_t *uvm_va_policy_node_iter_first(uvm_va_block_t *va_block, NvU64 start, NvU64 end)
|
||||
{
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#endif // UVM_IS_CONFIG_HMM()
|
||||
|
||||
#endif // __UVM_VA_POLICY_H__
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue
Block a user