535.43.24

This commit is contained in:
russellcnv 2024-01-31 14:02:06 -08:00
parent 2a3b58b8c8
commit e558660fc2
No known key found for this signature in database
GPG Key ID: 0F15D664965FBC5A
267 changed files with 89045 additions and 82824 deletions

View File

@ -2,7 +2,7 @@
## Release 535 Entries
### [535.54.03] 2023-06-14
### [535.43.24] 2024-01-31
### [535.43.23] 2024-01-24
@ -30,6 +30,7 @@
#### Fixed
- Fixed building main against current centos stream 8 fails, [#550](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/550) by @airlied
- Fixed console restore with traditional VGA consoles.
#### Added
@ -58,6 +59,14 @@
## Release 525 Entries
### [525.147.05] 2023-10-31
#### Fixed
- Fix nvidia_p2p_get_pages(): Fix double-free in register-callback error path, [#557](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/557) by @BrendanCunningham
### [525.125.06] 2023-06-26
### [525.116.04] 2023-05-09
### [525.116.03] 2023-04-25

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 535.43.23.
version 535.43.24.
## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
535.43.23 driver release. This can be achieved by installing
535.43.24 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -180,7 +180,7 @@ software applications.
## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 535.43.23 release,
(see the table below). However, in the 535.43.24 release,
GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user
README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.43.23/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.43.24/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.23\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.24\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@ -123,6 +123,9 @@ ifneq ($(wildcard /proc/sgi_uv),)
EXTRA_CFLAGS += -DNV_CONFIG_X86_UV
endif
ifdef VGX_FORCE_VFIO_PCI_CORE
EXTRA_CFLAGS += -DNV_VGPU_FORCE_VFIO_PCI_CORE
endif
#
# The conftest.sh script tests various aspects of the target kernel.

View File

@ -2067,4 +2067,7 @@ typedef enum
#include <linux/clk-provider.h>
#endif
#define NV_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL_GPL(symbol)
#define NV_CHECK_EXPORT_SYMBOL(symbol) NV_IS_EXPORT_SYMBOL_PRESENT_##symbol
#endif /* _NV_LINUX_H_ */

View File

@ -924,6 +924,7 @@ NV_STATUS NV_API_CALL rm_ioctl (nvidia_stack_t *, nv_state_t *
NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t *, NvU32 *);
void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *);
void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *);
NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *);
NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);

View File

@ -207,9 +207,13 @@ enum os_pci_req_atomics_type {
OS_INTF_PCIE_REQ_ATOMICS_128BIT
};
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
void* NV_API_CALL os_get_pid_info(void);
void NV_API_CALL os_put_pid_info(void *pid_info);
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
extern NvU32 os_page_size;
extern NvU64 os_page_mask;

View File

@ -316,7 +316,7 @@ export_symbol_present_conftest() {
SYMBOL="$1"
TAB=' '
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_SYMBOL.*\$" \
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_SYMBOL\(_GPL\)\?\s*\$" \
"$OUTPUT/Module.symvers" >/dev/null 2>&1; then
echo "#define NV_IS_EXPORT_SYMBOL_PRESENT_$SYMBOL 1" |
append_conftest "symbols"
@ -337,7 +337,7 @@ export_symbol_gpl_conftest() {
SYMBOL="$1"
TAB=' '
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_\(UNUSED_\)*SYMBOL_GPL\$" \
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_\(UNUSED_\)*SYMBOL_GPL\s*\$" \
"$OUTPUT/Module.symvers" >/dev/null 2>&1; then
echo "#define NV_IS_EXPORT_SYMBOL_GPL_$SYMBOL 1" |
append_conftest "symbols"
@ -4468,6 +4468,24 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE" "" "types"
;;
mmu_notifier_ops_arch_invalidate_secondary_tlbs)
#
# Determine if the mmu_notifier_ops struct has the
# 'arch_invalidate_secondary_tlbs' member.
#
# struct mmu_notifier_ops.invalidate_range was renamed to
# arch_invalidate_secondary_tlbs by commit 1af5a8109904
# ("mmu_notifiers: rename invalidate_range notifier") due to be
# added in v6.6
CODE="
#include <linux/mmu_notifier.h>
int conftest_mmu_notifier_ops_arch_invalidate_secondary_tlbs(void) {
return offsetof(struct mmu_notifier_ops, arch_invalidate_secondary_tlbs);
}"
compile_check_conftest "$CODE" "NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS" "" "types"
;;
drm_format_num_planes)
#
# Determine if drm_format_num_planes() function is present.
@ -5636,23 +5654,6 @@ compile_test() {
compile_check_conftest "$CODE" "NV_GPIO_TO_IRQ_PRESENT" "" "functions"
;;
migrate_vma_setup)
#
# Determine if migrate_vma_setup() function is present
#
# migrate_vma_setup() function was added by commit
# a7d1f22bb74f32cf3cd93f52776007e161f1a738 ("mm: turn migrate_vma
# upside down) in v5.4.
# (2019-08-20).
CODE="
#include <linux/migrate.h>
int conftest_migrate_vma_setup(void) {
migrate_vma_setup();
}"
compile_check_conftest "$CODE" "NV_MIGRATE_VMA_SETUP_PRESENT" "" "functions"
;;
migrate_vma_added_flags)
#
# Determine if migrate_vma structure has flags
@ -5743,23 +5744,25 @@ compile_test() {
compile_check_conftest "$CODE" "NV_IOASID_GET_PRESENT" "" "functions"
;;
mm_pasid_set)
mm_pasid_drop)
#
# Determine if mm_pasid_set() function is present
# Determine if mm_pasid_drop() function is present
#
# Added by commit 701fac40384f ("iommu/sva: Assign a PASID to mm
# on PASID allocation and free it on mm exit") in v5.18.
# Moved to linux/iommu.h in commit cd3891158a77 ("iommu/sva: Move
# PASID helpers to sva code") in v6.4.
#
# mm_pasid_set() function was added by commit
# 701fac40384f07197b106136012804c3cae0b3de (iommu/sva: Assign a
# PASID to mm on PASID allocation and free it on mm exit) in v5.18.
# (2022-02-15).
CODE="
#if defined(NV_LINUX_SCHED_MM_H_PRESENT)
#include <linux/sched/mm.h>
#endif
void conftest_mm_pasid_set(void) {
mm_pasid_set();
#include <linux/iommu.h>
void conftest_mm_pasid_drop(void) {
mm_pasid_drop();
}"
compile_check_conftest "$CODE" "NV_MM_PASID_SET_PRESENT" "" "functions"
compile_check_conftest "$CODE" "NV_MM_PASID_DROP_PRESENT" "" "functions"
;;
drm_crtc_state_has_no_vblank)
@ -6279,6 +6282,21 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MEMORY_FAILURE_MF_SW_SIMULATED_DEFINED" "" "types"
;;
crypto_tfm_ctx_aligned)
# Determine if 'crypto_tfm_ctx_aligned' is defined.
#
# Removed by commit 25c74a39e0f6 ("crypto: hmac - remove unnecessary
# alignment logic") in v6.7.
#
CODE="
#include <crypto/algapi.h>
void conftest_crypto_tfm_ctx_aligned(void) {
(void)crypto_tfm_ctx_aligned();
}"
compile_check_conftest "$CODE" "NV_CRYPTO_TFM_CTX_ALIGNED_PRESENT" "" "functions"
;;
crypto)
#
# Determine if we support various crypto functions.
@ -6341,6 +6359,22 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MEMPOLICY_HAS_HOME_NODE" "" "types"
;;
mpol_preferred_many_present)
#
# Determine if MPOL_PREFERRED_MANY enum is present or not
#
# Added by commit b27abaccf8e8b ("mm/mempolicy: add
# MPOL_PREFERRED_MANY for multiple preferred nodes") in
# v5.15
#
CODE="
#include <linux/mempolicy.h>
int mpol_preferred_many = MPOL_PREFERRED_MANY;
"
compile_check_conftest "$CODE" "NV_MPOL_PREFERRED_MANY_PRESENT" "" "types"
;;
mmu_interval_notifier)
#
# Determine if mmu_interval_notifier struct is present or not
@ -6356,6 +6390,21 @@ compile_test() {
compile_check_conftest "$CODE" "NV_MMU_INTERVAL_NOTIFIER" "" "types"
;;
drm_unlocked_ioctl_flag_present)
# Determine if DRM_UNLOCKED IOCTL flag is present.
#
# DRM_UNLOCKED was removed by commit 2798ffcc1d6a ("drm: Remove
# locking for legacy ioctls and DRM_UNLOCKED") in Linux
# next-20231208.
CODE="
#if defined(NV_DRM_DRM_IOCTL_H_PRESENT)
#include <drm/drm_ioctl.h>
#endif
int flags = DRM_UNLOCKED;"
compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types"
;;
# When adding a new conftest entry, please use the correct format for
# specifying the relevant upstream Linux kernel commit.
#
@ -6680,18 +6729,9 @@ case "$5" in
VFIO_PCI_CORE_PRESENT=1
fi
# When this sanity check is run via nvidia-installer, it sets ARCH as aarch64.
# But, when it is run via Kbuild, ARCH is set as arm64
if [ "$ARCH" = "aarch64" ]; then
ARCH="arm64"
fi
if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then
# On x86_64, vGPU requires MDEV framework to be present.
# On aarch64, vGPU requires MDEV or vfio-pci-core framework to be present.
if ([ "$ARCH" = "arm64" ] && ([ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ])) ||
([ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" != "0" ];) then
# vGPU requires either MDEV or vfio-pci-core framework to be present.
if [ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ]; then
exit 0
fi
fi
@ -6702,14 +6742,10 @@ case "$5" in
echo "CONFIG_VFIO_IOMMU_TYPE1";
fi
if [ "$ARCH" = "arm64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
if [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
echo "either CONFIG_VFIO_MDEV or CONFIG_VFIO_PCI_CORE";
fi
if [ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ]; then
echo "CONFIG_VFIO_MDEV";
fi
if [ "$KVM_PRESENT" = "0" ]; then
echo "CONFIG_KVM";
fi

View File

@ -1312,9 +1312,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
DRM_RENDER_ALLOW|DRM_UNLOCKED),
#endif
/*
* DRM_UNLOCKED is implicit for all non-legacy DRM driver IOCTLs since Linux
* v4.10 commit fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions"
* (Linux v4.4 commit ea487835e887 "drm: Enforce unlocked ioctl operation
* for kms driver ioctls" previously did it only for drivers that set the
* DRM_MODESET flag), so this will race with SET_CLIENT_CAP. Linux v4.11
* commit dcf727ab5d17 "drm: setclientcap doesn't need the drm BKL" also
* removed locking from SET_CLIENT_CAP so there is no use attempting to lock
* manually. The latter commit acknowledges that this can expose userspace
* to inconsistent behavior when racing with itself, but accepts that risk.
*/
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CLIENT_CAPABILITY,
nv_drm_get_client_capability_ioctl,
0),
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CRTC_CRC32,
nv_drm_get_crtc_crc32_ioctl,

View File

@ -243,6 +243,15 @@ static int __nv_drm_nvkms_gem_obj_init(
NvU64 *pages = NULL;
NvU32 numPages = 0;
if ((size % PAGE_SIZE) != 0) {
NV_DRM_DEV_LOG_ERR(
nv_dev,
"NvKmsKapiMemory 0x%p size should be in a multiple of page size to "
"create a gem object",
pMemory);
return -EINVAL;
}
nv_nvkms_memory->pPhysicalAddress = NULL;
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
nv_nvkms_memory->physically_mapped = false;

View File

@ -582,6 +582,19 @@ static inline int nv_drm_format_num_planes(uint32_t format)
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
/*
* DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
* locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
* implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
* fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
* commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver
* ioctls" previously did it only for drivers that set the DRM_MODESET flag), so
* it was effectively a no-op anyway.
*/
#if !defined(NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT)
#define DRM_UNLOCKED 0
#endif
/*
* drm_vma_offset_exact_lookup_locked() were added
* by kernel commit 2225cfe46bcc which was Signed-off-by:

View File

@ -133,3 +133,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present

View File

@ -68,6 +68,9 @@ module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
static bool disable_vrr_memclk_switch = false;
module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);
static bool opportunistic_display_sync = true;
module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool, 0400);
/* These parameters are used for fault injection tests. Normally the defaults
* should be used. */
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
@ -99,6 +102,11 @@ NvBool nvkms_disable_vrr_memclk_switch(void)
return disable_vrr_memclk_switch;
}
NvBool nvkms_opportunistic_display_sync(void)
{
return opportunistic_display_sync;
}
#define NVKMS_SYNCPT_STUBS_NEEDED
/*************************************************************************
@ -200,9 +208,23 @@ static inline int nvkms_read_trylock_pm_lock(void)
static inline void nvkms_read_lock_pm_lock(void)
{
while (!down_read_trylock(&nvkms_pm_lock)) {
try_to_freeze();
cond_resched();
if ((current->flags & PF_NOFREEZE)) {
/*
* Non-freezable tasks (i.e. kthreads in this case) don't have to worry
* about being frozen during system suspend, but do need to block so
* that the CPU can go idle during s2idle. Do a normal uninterruptible
* blocking wait for the PM lock.
*/
down_read(&nvkms_pm_lock);
} else {
/*
* For freezable tasks, make sure we give the kernel an opportunity to
* freeze if taking the PM lock fails.
*/
while (!down_read_trylock(&nvkms_pm_lock)) {
try_to_freeze();
cond_resched();
}
}
}

View File

@ -99,6 +99,7 @@ typedef struct {
NvBool nvkms_output_rounding_fix(void);
NvBool nvkms_disable_vrr_memclk_switch(void);
NvBool nvkms_opportunistic_display_sync(void);
void nvkms_call_rm (void *ops);
void* nvkms_alloc (size_t size,

View File

@ -1,20 +1,25 @@
/* SPDX-License-Identifier: Linux-OpenIB */
/*
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
* This software is available to you under a choice of one of two
* licenses. You may choose to be licensed under the terms of the GNU
* General Public License (GPL) Version 2, available from the file
* COPYING in the main directory of this source tree, or the
* OpenIB.org BSD license below:
*
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
* - Redistributions of source code must retain the above
* copyright notice, this list of conditions and the following
* disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
@ -43,7 +48,9 @@
MODULE_AUTHOR("Yishai Hadas");
MODULE_DESCRIPTION("NVIDIA GPU memory plug-in");
MODULE_LICENSE("Linux-OpenIB");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION(DRV_VERSION);
enum {
NV_MEM_PEERDIRECT_SUPPORT_DEFAULT = 0,
@ -53,7 +60,13 @@ static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
module_param(peerdirect_support, int, S_IRUGO);
MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS)
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d ERROR " FMT, __FUNCTION__, __LINE__, ## ARGS)
#ifdef NV_MEM_DEBUG
#define peer_trace(FMT, ARGS...) printk(KERN_DEBUG "nvidia-peermem" " %s:%d TRACE " FMT, __FUNCTION__, __LINE__, ## ARGS)
#else
#define peer_trace(FMT, ARGS...) do {} while (0)
#endif
#if defined(NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
@ -74,7 +87,10 @@ invalidate_peer_memory mem_invalidate_callback;
static void *reg_handle = NULL;
static void *reg_handle_nc = NULL;
#define NV_MEM_CONTEXT_MAGIC ((u64)0xF1F4F1D0FEF0DAD0ULL)
struct nv_mem_context {
u64 pad1;
struct nvidia_p2p_page_table *page_table;
struct nvidia_p2p_dma_mapping *dma_mapping;
u64 core_context;
@ -86,8 +102,22 @@ struct nv_mem_context {
struct task_struct *callback_task;
int sg_allocated;
struct sg_table sg_head;
u64 pad2;
};
#define NV_MEM_CONTEXT_CHECK_OK(MC) ({ \
struct nv_mem_context *mc = (MC); \
int rc = ((0 != mc) && \
(READ_ONCE(mc->pad1) == NV_MEM_CONTEXT_MAGIC) && \
(READ_ONCE(mc->pad2) == NV_MEM_CONTEXT_MAGIC)); \
if (!rc) { \
peer_trace("invalid nv_mem_context=%px pad1=%016llx pad2=%016llx\n", \
mc, \
mc?mc->pad1:0, \
mc?mc->pad2:0); \
} \
rc; \
})
static void nv_get_p2p_free_callback(void *data)
{
@ -97,8 +127,9 @@ static void nv_get_p2p_free_callback(void *data)
struct nvidia_p2p_dma_mapping *dma_mapping = NULL;
__module_get(THIS_MODULE);
if (!nv_mem_context) {
peer_err("nv_get_p2p_free_callback -- invalid nv_mem_context\n");
if (!NV_MEM_CONTEXT_CHECK_OK(nv_mem_context)) {
peer_err("detected invalid context, skipping further processing\n");
goto out;
}
@ -169,9 +200,11 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
/* Error case handled as not mine */
return 0;
nv_mem_context->pad1 = NV_MEM_CONTEXT_MAGIC;
nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
nv_mem_context->page_virt_end = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
nv_mem_context->mapped_size = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
nv_mem_context->pad2 = NV_MEM_CONTEXT_MAGIC;
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
&nv_mem_context->page_table, nv_mem_dummy_callback, nv_mem_context);
@ -195,6 +228,7 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
return 1;
err:
memset(nv_mem_context, 0, sizeof(*nv_mem_context));
kfree(nv_mem_context);
/* Error case handled as not mine */
@ -342,6 +376,7 @@ static void nv_mem_release(void *context)
sg_free_table(&nv_mem_context->sg_head);
nv_mem_context->sg_allocated = 0;
}
memset(nv_mem_context, 0, sizeof(*nv_mem_context));
kfree(nv_mem_context);
module_put(THIS_MODULE);
return;

View File

@ -81,8 +81,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
@ -100,6 +99,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlbs
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
@ -110,6 +110,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup

View File

@ -571,7 +571,6 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
static void uvm_vm_close_managed(struct vm_area_struct *vma)
{
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
uvm_processor_id_t gpu_id;
bool make_zombie = false;
if (current->mm != NULL)
@ -606,12 +605,6 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)
uvm_destroy_vma_managed(vma, make_zombie);
// Notify GPU address spaces that the fault buffer needs to be flushed to
// avoid finding stale entries that can be attributed to new VA ranges
// reallocated at the same address.
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
}
uvm_va_space_up_write(va_space);
if (current->mm != NULL)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Copyright (c) 2021-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -94,4 +94,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->map_remap_larger_page_promotion = false;
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = false;
}

View File

@ -101,4 +101,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->map_remap_larger_page_promotion = false;
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = false;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018 NVIDIA Corporation
Copyright (c) 2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -107,10 +107,10 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
return status;
}
static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
NvU64 addr,
size_t size,
uvm_fault_client_type_t client_type)
static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
NvU64 addr,
size_t size,
uvm_fault_client_type_t client_type)
{
uvm_ats_fault_invalidate_t *ats_invalidate;
@ -119,12 +119,12 @@ static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
else
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
if (!ats_invalidate->write_faults_in_batch) {
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
ats_invalidate->write_faults_in_batch = true;
if (!ats_invalidate->tlb_batch_pending) {
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
ats_invalidate->tlb_batch_pending = true;
}
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
uvm_tlb_batch_invalidate(&ats_invalidate->tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
}
static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
@ -149,7 +149,11 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
mode = vma_policy->mode;
if ((mode == MPOL_BIND) || (mode == MPOL_PREFERRED_MANY) || (mode == MPOL_PREFERRED)) {
if ((mode == MPOL_BIND)
#if defined(NV_MPOL_PREFERRED_MANY_PRESENT)
|| (mode == MPOL_PREFERRED_MANY)
#endif
|| (mode == MPOL_PREFERRED)) {
int home_node = NUMA_NO_NODE;
#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
@ -467,6 +471,10 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_and(write_fault_mask, write_fault_mask, read_fault_mask);
else
uvm_page_mask_zero(write_fault_mask);
// There are no pending faults beyond write faults to RO region.
if (uvm_page_mask_empty(read_fault_mask))
return status;
}
ats_batch_select_residency(gpu_va_space, vma, ats_context);
@ -489,6 +497,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
if (vma->vm_flags & VM_WRITE) {
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
// The Linux kernel never invalidates TLB entries on mapping
// permission upgrade. This is a problem if the GPU has cached
@ -499,7 +508,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
// infinite loop because we just forward the fault to the Linux
// kernel and it will see that the permissions in the page table are
// correct. Therefore, we flush TLB entries on ATS write faults.
flush_tlb_write_faults(gpu_va_space, start, length, client_type);
flush_tlb_va_region(gpu_va_space, start, length, client_type);
}
else {
uvm_page_mask_region_fill(reads_serviced_mask, subregion);
@ -522,6 +531,15 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
return status;
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
// Similarly to permission upgrade scenario, discussed above, GPU
// will not re-fetch the entry if the PTE is invalid and page size
// is 4K. To avoid infinite faulting loop, invalidate TLB for every
// new translation written explicitly like in the case of permission
// upgrade.
if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
flush_tlb_va_region(gpu_va_space, start, length, client_type);
}
return status;
@ -556,7 +574,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
NV_STATUS status;
uvm_push_t push;
if (!ats_invalidate->write_faults_in_batch)
if (!ats_invalidate->tlb_batch_pending)
return NV_OK;
UVM_ASSERT(gpu_va_space);
@ -568,7 +586,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
"Invalidate ATS entries");
if (status == NV_OK) {
uvm_tlb_batch_end(&ats_invalidate->write_faults_tlb_batch, &push, UVM_MEMBAR_NONE);
uvm_tlb_batch_end(&ats_invalidate->tlb_batch, &push, UVM_MEMBAR_NONE);
uvm_push_end(&push);
// Add this push to the GPU's tracker so that fault replays/clears can
@ -576,8 +594,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
status = uvm_tracker_add_push_safe(out_tracker, &push);
}
ats_invalidate->write_faults_in_batch = false;
ats_invalidate->tlb_batch_pending = false;
return status;
}

View File

@ -52,7 +52,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);
// This function performs pending TLB invalidations for ATS and clears the
// ats_invalidate->write_faults_in_batch flag
// ats_invalidate->tlb_batch_pending flag
NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_invalidate_t *ats_invalidate,
uvm_tracker_t *out_tracker);

View File

@ -29,8 +29,13 @@
#include "uvm_va_space.h"
#include "uvm_va_space_mm.h"
#include <asm/io.h>
#include <linux/log2.h>
#include <linux/iommu.h>
#include <linux/mm_types.h>
#include <linux/acpi.h>
#include <linux/device.h>
#include <linux/mmu_context.h>
// linux/sched/mm.h is needed for mmget_not_zero and mmput to get the mm
// reference required for the iommu_sva_bind_device() call. This header is not
@ -46,17 +51,276 @@
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
#endif
// Type to represent a 128-bit SMMU command queue command.
struct smmu_cmd {
NvU64 low;
NvU64 high;
};
// Base address of SMMU CMDQ-V for GSMMU0.
#define SMMU_CMDQV_BASE_ADDR(smmu_base) (smmu_base + 0x200000)
#define SMMU_CMDQV_BASE_LEN 0x00830000
// CMDQV configuration is done by firmware but we check status here.
#define SMMU_CMDQV_CONFIG 0x0
#define SMMU_CMDQV_CONFIG_CMDQV_EN BIT(0)
// Used to map a particular VCMDQ to a VINTF.
#define SMMU_CMDQV_CMDQ_ALLOC_MAP(vcmdq_id) (0x200 + 0x4 * (vcmdq_id))
#define SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC BIT(0)
// Shift for the field containing the index of the virtual interface
// owning the VCMDQ.
#define SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT 15
// Base address for the VINTF registers.
#define SMMU_VINTF_BASE_ADDR(cmdqv_base_addr, vintf_id) (cmdqv_base_addr + 0x1000 + 0x100 * (vintf_id))
// Virtual interface (VINTF) configuration registers. The WAR only
// works on baremetal so we need to configure ourselves as the
// hypervisor owner.
#define SMMU_VINTF_CONFIG 0x0
#define SMMU_VINTF_CONFIG_ENABLE BIT(0)
#define SMMU_VINTF_CONFIG_HYP_OWN BIT(17)
#define SMMU_VINTF_STATUS 0x0
#define SMMU_VINTF_STATUS_ENABLED BIT(0)
// Caclulates the base address for a particular VCMDQ instance.
#define SMMU_VCMDQ_BASE_ADDR(cmdqv_base_addr, vcmdq_id) (cmdqv_base_addr + 0x10000 + 0x80 * (vcmdq_id))
// SMMU command queue consumer index register. Updated by SMMU
// when commands are consumed.
#define SMMU_VCMDQ_CONS 0x0
// SMMU command queue producer index register. Updated by UVM when
// commands are added to the queue.
#define SMMU_VCMDQ_PROD 0x4
// Configuration register used to enable a VCMDQ.
#define SMMU_VCMDQ_CONFIG 0x8
#define SMMU_VCMDQ_CONFIG_ENABLE BIT(0)
// Status register used to check the VCMDQ is enabled.
#define SMMU_VCMDQ_STATUS 0xc
#define SMMU_VCMDQ_STATUS_ENABLED BIT(0)
// Base address offset for the VCMDQ registers.
#define SMMU_VCMDQ_CMDQ_BASE 0x10000
// Size of the command queue. Each command is 16 bytes and we can't
// have a command queue greater than one page in size.
#define SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE (PAGE_SHIFT - ilog2(sizeof(struct smmu_cmd)))
#define SMMU_VCMDQ_CMDQ_ENTRIES (1UL << SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE)
// We always use VINTF63 for the WAR
#define VINTF 63
static void smmu_vintf_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
{
iowrite32(val, SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
}
static NvU32 smmu_vintf_read32(void __iomem *smmu_cmdqv_base, int reg)
{
return ioread32(SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
}
// We always use VCMDQ127 for the WAR
#define VCMDQ 127
void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
{
iowrite32(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
}
NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
{
return ioread32(SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
}
static void smmu_vcmdq_write64(void __iomem *smmu_cmdqv_base, int reg, NvU64 val)
{
iowrite64(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
}
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
// TLB invalidates on read-only to read-write upgrades
static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
{
uvm_spin_loop_t spin;
NV_STATUS status;
unsigned long cmdqv_config;
void __iomem *smmu_cmdqv_base;
struct acpi_iort_node *node;
struct acpi_iort_smmu_v3 *iort_smmu;
node = *(struct acpi_iort_node **) dev_get_platdata(parent_gpu->pci_dev->dev.iommu->iommu_dev->dev->parent);
iort_smmu = (struct acpi_iort_smmu_v3 *) node->node_data;
smmu_cmdqv_base = ioremap(SMMU_CMDQV_BASE_ADDR(iort_smmu->base_address), SMMU_CMDQV_BASE_LEN);
if (!smmu_cmdqv_base)
return NV_ERR_NO_MEMORY;
parent_gpu->smmu_war.smmu_cmdqv_base = smmu_cmdqv_base;
cmdqv_config = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CONFIG);
if (!(cmdqv_config & SMMU_CMDQV_CONFIG_CMDQV_EN)) {
status = NV_ERR_OBJECT_NOT_FOUND;
goto out;
}
// Allocate SMMU CMDQ pages for WAR
parent_gpu->smmu_war.smmu_cmdq = alloc_page(NV_UVM_GFP_FLAGS | __GFP_ZERO);
if (!parent_gpu->smmu_war.smmu_cmdq) {
status = NV_ERR_NO_MEMORY;
goto out;
}
// Initialise VINTF for the WAR
smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, SMMU_VINTF_CONFIG_ENABLE | SMMU_VINTF_CONFIG_HYP_OWN);
UVM_SPIN_WHILE(!(smmu_vintf_read32(smmu_cmdqv_base, SMMU_VINTF_STATUS) & SMMU_VINTF_STATUS_ENABLED), &spin);
// Allocate VCMDQ to VINTF
iowrite32((VINTF << SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT) | SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC,
smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
smmu_vcmdq_write64(smmu_cmdqv_base, SMMU_VCMDQ_CMDQ_BASE,
page_to_phys(parent_gpu->smmu_war.smmu_cmdq) | SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE);
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONS, 0);
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_PROD, 0);
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, SMMU_VCMDQ_CONFIG_ENABLE);
UVM_SPIN_WHILE(!(smmu_vcmdq_read32(smmu_cmdqv_base, SMMU_VCMDQ_STATUS) & SMMU_VCMDQ_STATUS_ENABLED), &spin);
uvm_mutex_init(&parent_gpu->smmu_war.smmu_lock, UVM_LOCK_ORDER_LEAF);
parent_gpu->smmu_war.smmu_prod = 0;
parent_gpu->smmu_war.smmu_cons = 0;
return NV_OK;
out:
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
parent_gpu->smmu_war.smmu_cmdqv_base = NULL;
return status;
}
static void uvm_ats_smmu_war_deinit(uvm_parent_gpu_t *parent_gpu)
{
void __iomem *smmu_cmdqv_base = parent_gpu->smmu_war.smmu_cmdqv_base;
NvU32 cmdq_alloc_map;
if (parent_gpu->smmu_war.smmu_cmdqv_base) {
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, 0);
cmdq_alloc_map = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
iowrite32(cmdq_alloc_map & SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC, smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, 0);
}
if (parent_gpu->smmu_war.smmu_cmdq)
__free_page(parent_gpu->smmu_war.smmu_cmdq);
if (parent_gpu->smmu_war.smmu_cmdqv_base)
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
}
// The SMMU on ARM64 can run under different translation regimes depending on
// what features the OS and CPU variant support. The CPU for GH180 supports
// virtualisation extensions and starts the kernel at EL2 meaning SMMU operates
// under the NS-EL2-E2H translation regime. Therefore we need to use the
// TLBI_EL2_* commands which invalidate TLB entries created under this
// translation regime.
#define CMDQ_OP_TLBI_EL2_ASID 0x21;
#define CMDQ_OP_TLBI_EL2_VA 0x22;
#define CMDQ_OP_CMD_SYNC 0x46
// Use the same maximum as used for MAX_TLBI_OPS in the upstream
// kernel.
#define UVM_MAX_TLBI_OPS (1UL << (PAGE_SHIFT - 3))
#if UVM_ATS_SMMU_WAR_REQUIRED()
void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
{
struct mm_struct *mm = gpu_va_space->va_space->va_space_mm.mm;
uvm_parent_gpu_t *parent_gpu = gpu_va_space->gpu->parent;
struct {
NvU64 low;
NvU64 high;
} *vcmdq;
unsigned long vcmdq_prod;
NvU64 end;
uvm_spin_loop_t spin;
NvU16 asid;
if (!parent_gpu->smmu_war.smmu_cmdqv_base)
return;
asid = arm64_mm_context_get(mm);
vcmdq = kmap(parent_gpu->smmu_war.smmu_cmdq);
uvm_mutex_lock(&parent_gpu->smmu_war.smmu_lock);
vcmdq_prod = parent_gpu->smmu_war.smmu_prod;
// Our queue management is very simple. The mutex prevents multiple
// producers writing to the queue and all our commands require waiting for
// the queue to drain so we know it's empty. If we can't fit enough commands
// in the queue we just invalidate the whole ASID.
//
// The command queue is a cirular buffer with the MSB representing a wrap
// bit that must toggle on each wrap. See the SMMU architecture
// specification for more details.
//
// SMMU_VCMDQ_CMDQ_ENTRIES - 1 because we need to leave space for the
// CMD_SYNC.
if ((size >> PAGE_SHIFT) > min(UVM_MAX_TLBI_OPS, SMMU_VCMDQ_CMDQ_ENTRIES - 1)) {
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_ASID;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0;
vcmdq_prod++;
}
else {
for (end = addr + size; addr < end; addr += PAGE_SIZE) {
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_VA;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = addr & ~((1UL << 12) - 1);
vcmdq_prod++;
}
}
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_CMD_SYNC;
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0x0;
vcmdq_prod++;
// MSB is the wrap bit
vcmdq_prod &= (1UL << (SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE + 1)) - 1;
parent_gpu->smmu_war.smmu_prod = vcmdq_prod;
smmu_vcmdq_write32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_PROD, parent_gpu->smmu_war.smmu_prod);
UVM_SPIN_WHILE(
(smmu_vcmdq_read32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_CONS) & GENMASK(19, 0)) != vcmdq_prod,
&spin);
uvm_mutex_unlock(&parent_gpu->smmu_war.smmu_lock);
kunmap(parent_gpu->smmu_war.smmu_cmdq);
arm64_mm_context_put(mm);
}
#endif
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
int ret;
ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
if (ret)
return errno_to_nv_status(ret);
return errno_to_nv_status(ret);
if (UVM_ATS_SMMU_WAR_REQUIRED())
return uvm_ats_smmu_war_init(parent_gpu);
else
return NV_OK;
}
void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
{
if (UVM_ATS_SMMU_WAR_REQUIRED())
uvm_ats_smmu_war_deinit(parent_gpu);
iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
}

View File

@ -32,23 +32,38 @@
// For ATS support on aarch64, arm_smmu_sva_bind() is needed for
// iommu_sva_bind_device() calls. Unfortunately, arm_smmu_sva_bind() is not
// conftest-able. We instead look for the presence of ioasid_get() or
// mm_pasid_set(). ioasid_get() was added in the same patch series as
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_set() was added in the
// mm_pasid_drop(). ioasid_get() was added in the same patch series as
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_drop() was added in the
// same patch as the removal of ioasid_get(). We assume the presence of
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_set(v5.18+) is
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_drop(v5.18+) is
// present.
//
// arm_smmu_sva_bind() was added with commit
// 32784a9562fb0518b12e9797ee2aec52214adf6f and ioasid_get() was added with
// commit cb4789b0d19ff231ce9f73376a023341300aed96 (11/23/2020). Commit
// 701fac40384f07197b106136012804c3cae0b3de (02/15/2022) removed ioasid_get()
// and added mm_pasid_set().
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_SET_PRESENT))
#define UVM_ATS_SVA_SUPPORTED() 1
// and added mm_pasid_drop().
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_DROP_PRESENT))
#if defined(CONFIG_IOMMU_SVA)
#define UVM_ATS_SVA_SUPPORTED() 1
#else
#define UVM_ATS_SVA_SUPPORTED() 0
#endif
#else
#define UVM_ATS_SVA_SUPPORTED() 0
#endif
// If NV_ARCH_INVALIDATE_SECONDARY_TLBS is defined it means the upstream fix is
// in place so no need for the WAR from Bug 4130089: [GH180][r535] WAR for
// kernel not issuing SMMU TLB invalidates on read-only
#if defined(NV_ARCH_INVALIDATE_SECONDARY_TLBS)
#define UVM_ATS_SMMU_WAR_REQUIRED() 0
#elif NVCPU_IS_AARCH64
#define UVM_ATS_SMMU_WAR_REQUIRED() 1
#else
#define UVM_ATS_SMMU_WAR_REQUIRED() 0
#endif
typedef struct
{
int placeholder;
@ -77,6 +92,17 @@ typedef struct
// LOCKING: None
void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
// TLB invalidates on read-only to read-write upgrades
#if UVM_ATS_SMMU_WAR_REQUIRED()
void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size);
#else
static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
{
}
#endif
#else
static NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
{
@ -107,6 +133,11 @@ typedef struct
{
}
static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
{
}
#endif // UVM_ATS_SVA_SUPPORTED
#endif // __UVM_ATS_SVA_H__

View File

@ -191,7 +191,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
for (i = 0; i < REDUCTIONS; ++i) {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS + 1);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS);
}
// Without a sys membar the channel tracking semaphore can and does complete
@ -577,7 +577,7 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
for (i = 0; i < REDUCTIONS; i++) {
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, i+1);
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, REDUCTIONS);
}
status = uvm_push_end_and_wait(&push);

View File

@ -21,8 +21,8 @@
*******************************************************************************/
#ifndef _UVM_COMMON_H
#define _UVM_COMMON_H
#ifndef __UVM_COMMON_H__
#define __UVM_COMMON_H__
#ifdef DEBUG
#define UVM_IS_DEBUG() 1
@ -413,4 +413,40 @@ static inline void uvm_touch_page(struct page *page)
// Return true if the VMA is one used by UVM managed allocations.
bool uvm_vma_is_managed(struct vm_area_struct *vma);
#endif /* _UVM_COMMON_H */
static bool uvm_platform_uses_canonical_form_address(void)
{
if (NVCPU_IS_PPC64LE)
return false;
return true;
}
// Similar to the GPU MMU HAL num_va_bits(), it returns the CPU's num_va_bits().
static NvU32 uvm_cpu_num_va_bits(void)
{
return fls64(TASK_SIZE - 1) + 1;
}
// Return the unaddressable range in a num_va_bits-wide VA space, [first, outer)
static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *outer)
{
UVM_ASSERT(num_va_bits < 64);
UVM_ASSERT(first);
UVM_ASSERT(outer);
if (uvm_platform_uses_canonical_form_address()) {
*first = 1ULL << (num_va_bits - 1);
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
}
else {
*first = 1ULL << num_va_bits;
*outer = ~0Ull;
}
}
static void uvm_cpu_get_unaddressable_range(NvU64 *first, NvU64 *outer)
{
return uvm_get_unaddressable_range(uvm_cpu_num_va_bits(), first, outer);
}
#endif /* __UVM_COMMON_H__ */

View File

@ -218,19 +218,12 @@ static bool gpu_supports_uvm(uvm_parent_gpu_t *parent_gpu)
return parent_gpu->rm_info.subdeviceCount == 1;
}
static bool platform_uses_canonical_form_address(void)
{
if (NVCPU_IS_PPC64LE)
return false;
return true;
}
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
{
// Lower and upper address spaces are typically found in platforms that use
// the canonical address form.
NvU64 max_va_lower;
NvU64 min_va_upper;
NvU64 addr_end = addr + size - 1;
NvU8 gpu_addr_shift;
NvU8 cpu_addr_shift;
@ -243,7 +236,7 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
UVM_ASSERT(size > 0);
gpu_addr_shift = gpu->address_space_tree.hal->num_va_bits();
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
cpu_addr_shift = uvm_cpu_num_va_bits();
addr_shift = gpu_addr_shift;
// Pascal+ GPUs are capable of accessing kernel pointers in various modes
@ -279,9 +272,7 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
// 0 +----------------+ 0 +----------------+
// On canonical form address platforms and Pascal+ GPUs.
if (platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
NvU64 min_va_upper;
if (uvm_platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
// On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
// 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
// wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
@ -292,15 +283,11 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
addr_shift = gpu_addr_shift;
else
addr_shift = cpu_addr_shift;
}
min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - addr_shift));
max_va_lower = 1ULL << (addr_shift - 1);
return (addr_end < max_va_lower) || (addr >= min_va_upper);
}
else {
max_va_lower = 1ULL << addr_shift;
return addr_end < max_va_lower;
}
uvm_get_unaddressable_range(addr_shift, &max_va_lower, &min_va_upper);
return (addr_end < max_va_lower) || (addr >= min_va_upper);
}
// The internal UVM VAS does not use canonical form addresses.
@ -326,14 +313,14 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
NvU8 addr_shift;
NvU64 input_addr = addr;
if (platform_uses_canonical_form_address()) {
if (uvm_platform_uses_canonical_form_address()) {
// When the CPU VA width is larger than GPU's, it means that:
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
// behavior of CPUs with smaller (than GPU) VA widths.
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
cpu_addr_shift = uvm_cpu_num_va_bits();
if (cpu_addr_shift > gpu_addr_shift)
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;

View File

@ -57,14 +57,16 @@
typedef struct
{
// Number of faults from this uTLB that have been fetched but have not been serviced yet
// Number of faults from this uTLB that have been fetched but have not been
// serviced yet.
NvU32 num_pending_faults;
// Whether the uTLB contains fatal faults
bool has_fatal_faults;
// We have issued a replay of type START_ACK_ALL while containing fatal faults. This puts
// the uTLB in lockdown mode and no new translations are accepted
// We have issued a replay of type START_ACK_ALL while containing fatal
// faults. This puts the uTLB in lockdown mode and no new translations are
// accepted.
bool in_lockdown;
// We have issued a cancel on this uTLB
@ -126,8 +128,8 @@ struct uvm_service_block_context_struct
struct list_head service_context_list;
// A mask of GPUs that need to be checked for ECC errors before the CPU
// fault handler returns, but after the VA space lock has been unlocked to
// avoid the RM/UVM VA space lock deadlocks.
// fault handler returns, but after the VA space lock has been unlocked
// to avoid the RM/UVM VA space lock deadlocks.
uvm_processor_mask_t gpus_to_check_for_ecc;
// This is set to throttle page fault thrashing.
@ -160,9 +162,9 @@ struct uvm_service_block_context_struct
struct
{
// Per-processor mask with the pages that will be resident after servicing.
// We need one mask per processor because we may coalesce faults that
// trigger migrations to different processors.
// Per-processor mask with the pages that will be resident after
// servicing. We need one mask per processor because we may coalesce
// faults that trigger migrations to different processors.
uvm_page_mask_t new_residency;
} per_processor_masks[UVM_ID_MAX_PROCESSORS];
@ -263,7 +265,10 @@ struct uvm_fault_service_batch_context_struct
NvU32 num_coalesced_faults;
bool has_fatal_faults;
// One of the VA spaces in this batch which had fatal faults. If NULL, no
// faults were fatal. More than one VA space could have fatal faults, but we
// pick one to be the target of the cancel sequence.
uvm_va_space_t *fatal_va_space;
bool has_throttled_faults;
@ -291,11 +296,8 @@ struct uvm_fault_service_batch_context_struct
struct uvm_ats_fault_invalidate_struct
{
// Whether the TLB batch contains any information
bool write_faults_in_batch;
// Batch of TLB entries to be invalidated
uvm_tlb_batch_t write_faults_tlb_batch;
bool tlb_batch_pending;
uvm_tlb_batch_t tlb_batch;
};
typedef struct
@ -440,20 +442,9 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by instance_ptr if we
// determine at fetch time that all the access counter notifications in the
// batch report the same instance_ptr
// determine at fetch time that all the access counter notifications in
// the batch report the same instance_ptr
bool is_single_instance_ptr;
// Scratch space, used to generate artificial physically addressed notifications.
// Virtual address notifications are always aligned to 64k. This means up to 16
// different physical locations could have been accessed to trigger one notification.
// The sub-granularity mask can correspond to any of them.
struct
{
uvm_processor_id_t resident_processors[16];
uvm_gpu_phys_address_t phys_addresses[16];
uvm_access_counter_buffer_entry_t phys_entry;
} scratch;
} virt;
struct
@ -464,8 +455,8 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by aperture if we
// determine at fetch time that all the access counter notifications in the
// batch report the same aperture
// determine at fetch time that all the access counter notifications in
// the batch report the same aperture
bool is_single_aperture;
} phys;
@ -661,8 +652,8 @@ struct uvm_gpu_struct
struct
{
// Big page size used by the internal UVM VA space
// Notably it may be different than the big page size used by a user's VA
// space in general.
// Notably it may be different than the big page size used by a user's
// VA space in general.
NvU32 internal_size;
} big_page;
@ -688,8 +679,8 @@ struct uvm_gpu_struct
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
// Leaf spinlock used to synchronize access to the peer_gpus table so that
// it can be safely accessed from the access counters bottom half
// Leaf spinlock used to synchronize access to the peer_gpus table so
// that it can be safely accessed from the access counters bottom half
uvm_spinlock_t peer_gpus_lock;
} peer_info;
@ -980,6 +971,10 @@ struct uvm_parent_gpu_struct
bool plc_supported;
// If true, page_tree initialization pre-populates no_ats_ranges. It only
// affects ATS systems.
bool no_ats_range_required;
// Parameters used by the TLB batching API
struct
{
@ -1051,14 +1046,16 @@ struct uvm_parent_gpu_struct
// Interrupt handling state and locks
uvm_isr_info_t isr;
// Fault buffer info. This is only valid if supports_replayable_faults is set to true
// Fault buffer info. This is only valid if supports_replayable_faults is
// set to true.
uvm_fault_buffer_info_t fault_buffer_info;
// PMM lazy free processing queue.
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
nv_kthread_q_t lazy_free_q;
// Access counter buffer info. This is only valid if supports_access_counters is set to true
// Access counter buffer info. This is only valid if
// supports_access_counters is set to true.
uvm_access_counter_buffer_info_t access_counter_buffer_info;
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
@ -1108,7 +1105,7 @@ struct uvm_parent_gpu_struct
uvm_rb_tree_t instance_ptr_table;
uvm_spinlock_t instance_ptr_table_lock;
// This is set to true if the GPU belongs to an SLI group. Else, set to false.
// This is set to true if the GPU belongs to an SLI group.
bool sli_enabled;
struct
@ -1135,8 +1132,8 @@ struct uvm_parent_gpu_struct
// environment, rather than using the peer-id field of the PTE (which can
// only address 8 gpus), all gpus are assigned a 47-bit physical address
// space by the fabric manager. Any physical address access to these
// physical address spaces are routed through the switch to the corresponding
// peer.
// physical address spaces are routed through the switch to the
// corresponding peer.
struct
{
bool is_nvswitch_connected;
@ -1162,6 +1159,16 @@ struct uvm_parent_gpu_struct
NvU64 memory_window_start;
NvU64 memory_window_end;
} system_bus;
// WAR to issue ATS TLB invalidation commands ourselves.
struct
{
uvm_mutex_t smmu_lock;
struct page *smmu_cmdq;
void __iomem *smmu_cmdqv_base;
unsigned long smmu_prod;
unsigned long smmu_cons;
} smmu_war;
};
static const char *uvm_gpu_name(uvm_gpu_t *gpu)
@ -1351,7 +1358,8 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
// They must not be the same gpu.
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);
// Get the processor id accessible by the given GPU for the given physical address
// Get the processor id accessible by the given GPU for the given physical
// address.
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
// Get the P2P capabilities between the gpus with the given indexes
@ -1448,9 +1456,9 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);
// Check for ECC errors without calling into RM
//
// Calling into RM is problematic in many places, this check is always safe to do.
// Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error and
// it's required to call uvm_gpu_check_ecc_error() to be sure.
// Calling into RM is problematic in many places, this check is always safe to
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error
// and it's required to call uvm_gpu_check_ecc_error() to be sure.
NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
// Map size bytes of contiguous sysmem on the GPU for physical access
@ -1507,6 +1515,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
// The GPU must be initialized before calling this function.
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
bool uvm_platform_uses_canonical_form_address(void);
// Returns addr's canonical form for host systems that use canonical form
// addresses.
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
@ -1553,8 +1563,9 @@ uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu);
// Debug print of GPU properties
void uvm_gpu_print(uvm_gpu_t *gpu);
// Add the given instance pointer -> user_channel mapping to this GPU. The bottom
// half GPU page fault handler uses this to look up the VA space for GPU faults.
// Add the given instance pointer -> user_channel mapping to this GPU. The
// bottom half GPU page fault handler uses this to look up the VA space for GPU
// faults.
NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);

View File

@ -33,17 +33,17 @@
#include "uvm_va_space_mm.h"
#include "uvm_pmm_sysmem.h"
#include "uvm_perf_module.h"
#include "uvm_ats_ibm.h"
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
#define UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT "2m"
#define UVM_PERF_ACCESS_COUNTER_GRANULARITY UVM_ACCESS_COUNTER_GRANULARITY_2M
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN 1
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
#define UVM_ACCESS_COUNTER_ACTION_NOTIFY 0x1
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x2
#define UVM_ACCESS_COUNTER_ON_MANAGED 0x4
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x1
#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x2
// Each page in a tracked physical range may belong to a different VA Block. We
// preallocate an array of reverse map translations. However, access counter
@ -54,12 +54,6 @@
#define UVM_MAX_TRANSLATION_SIZE (2 * 1024 * 1024ULL)
#define UVM_SUB_GRANULARITY_REGIONS 32
// The GPU offers the following tracking granularities: 64K, 2M, 16M, 16G
//
// Use the largest granularity to minimize the number of access counter
// notifications. This is fine because we simply drop the notifications during
// normal operation, and tests override these values.
static UVM_ACCESS_COUNTER_GRANULARITY g_uvm_access_counter_granularity;
static unsigned g_uvm_access_counter_threshold;
// Per-VA space access counters information
@ -87,7 +81,6 @@ static int uvm_perf_access_counter_momc_migration_enable = -1;
static unsigned uvm_perf_access_counter_batch_count = UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT;
// See module param documentation below
static char *uvm_perf_access_counter_granularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT;
static unsigned uvm_perf_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT;
// Module parameters for the tunables
@ -100,10 +93,6 @@ MODULE_PARM_DESC(uvm_perf_access_counter_momc_migration_enable,
"Whether MOMC access counters will trigger migrations."
"Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
module_param(uvm_perf_access_counter_batch_count, uint, S_IRUGO);
module_param(uvm_perf_access_counter_granularity, charp, S_IRUGO);
MODULE_PARM_DESC(uvm_perf_access_counter_granularity,
"Size of the physical memory region tracked by each counter. Valid values as"
"of Volta: 64k, 2m, 16m, 16g");
module_param(uvm_perf_access_counter_threshold, uint, S_IRUGO);
MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
"Number of remote accesses on a region required to trigger a notification."
@ -136,7 +125,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
// Whether access counter migrations are enabled or not. The policy is as
// follows:
// - MIMC migrations are enabled by default on P9 systems with ATS support
// - MIMC migrations are disabled by default on all systems except P9.
// - MOMC migrations are disabled by default on all systems
// - Users can override this policy by specifying on/off
static bool is_migration_enabled(uvm_access_counter_type_t type)
@ -159,7 +148,10 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
return false;
return g_uvm_global.ats.supported;
if (UVM_ATS_IBM_SUPPORTED())
return g_uvm_global.ats.supported;
return false;
}
// Create the access counters tracking struct for the given VA space
@ -225,30 +217,18 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
return NV_OK;
}
// Clear the given access counter and add it to the per-GPU clear tracker
static NV_STATUS access_counter_clear_targeted(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *entry)
// Clear the access counter notifications and add it to the per-GPU clear
// tracker.
static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
uvm_access_counter_buffer_entry_t **notification_start,
NvU32 num_notifications)
{
NvU32 i;
NV_STATUS status;
uvm_push_t push;
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
if (entry->address.is_virtual) {
status = uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
&push,
"Clear access counter with virtual address: 0x%llx",
entry->address.address);
}
else {
status = uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
&push,
"Clear access counter with physical address: 0x%llx:%s",
entry->address.address,
uvm_aperture_string(entry->address.aperture));
}
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
if (status != NV_OK) {
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
nvstatusToString(status),
@ -256,7 +236,8 @@ static NV_STATUS access_counter_clear_targeted(uvm_gpu_t *gpu,
return status;
}
gpu->parent->host_hal->access_counter_clear_targeted(&push, entry);
for (i = 0; i < num_notifications; i++)
gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
uvm_push_end(&push);
@ -381,25 +362,6 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
g_uvm_access_counter_threshold = uvm_perf_access_counter_threshold;
}
if (strcmp(uvm_perf_access_counter_granularity, "64k") == 0) {
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_64K;
}
else if (strcmp(uvm_perf_access_counter_granularity, "2m") == 0) {
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_2M;
}
else if (strcmp(uvm_perf_access_counter_granularity, "16m") == 0) {
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_16M;
}
else if (strcmp(uvm_perf_access_counter_granularity, "16g") == 0) {
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_16G;
}
else {
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_2M;
pr_info("Invalid value '%s' for uvm_perf_access_counter_granularity, using '%s' instead",
uvm_perf_access_counter_granularity,
UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT);
}
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);
@ -422,7 +384,7 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(access_counters->rm_info.bufferSize %
parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu) == 0);
status = config_granularity_to_bytes(g_uvm_access_counter_granularity, &granularity_bytes);
status = config_granularity_to_bytes(UVM_PERF_ACCESS_COUNTER_GRANULARITY, &granularity_bytes);
UVM_ASSERT(status == NV_OK);
if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
@ -641,8 +603,8 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
else {
UvmGpuAccessCntrConfig default_config =
{
.mimcGranularity = g_uvm_access_counter_granularity,
.momcGranularity = g_uvm_access_counter_granularity,
.mimcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
.momcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
.mimcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
.momcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
.threshold = g_uvm_access_counter_threshold,
@ -767,6 +729,22 @@ static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const voi
return cmp_access_counter_instance_ptr(a, b);
}
// Sort comparator for pointers to GVA access counter notification buffer
// entries that sorts by va_space, and fault address.
static int cmp_sort_virt_notifications_by_va_space_address(const void *_a, const void *_b)
{
const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
int result;
result = UVM_CMP_DEFAULT((*a)->virtual_info.va_space, (*b)->virtual_info.va_space);
if (result != 0)
return result;
return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
}
// Sort comparator for pointers to GPA access counter notification buffer
// entries that sorts by physical address' aperture
static int cmp_sort_phys_notifications_by_processor_id(const void *_a, const void *_b)
@ -924,12 +902,11 @@ static void translate_virt_notifications_instance_ptrs(uvm_gpu_t *gpu,
// GVA notifications provide an instance_ptr and ve_id that can be directly
// translated to a VA space. In order to minimize translations, we sort the
// entries by instance_ptr.
// entries by instance_ptr, va_space and notification address in that order.
static void preprocess_virt_notifications(uvm_gpu_t *gpu,
uvm_access_counter_service_batch_context_t *batch_context)
{
if (!batch_context->virt.is_single_instance_ptr) {
// Sort by instance_ptr
sort(batch_context->virt.notifications,
batch_context->virt.num_notifications,
sizeof(*batch_context->virt.notifications),
@ -938,6 +915,12 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
}
translate_virt_notifications_instance_ptrs(gpu, batch_context);
sort(batch_context->virt.notifications,
batch_context->virt.num_notifications,
sizeof(*batch_context->virt.notifications),
cmp_sort_virt_notifications_by_va_space_address,
NULL);
}
// GPA notifications provide a physical address and an aperture. Sort
@ -946,7 +929,6 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
static void preprocess_phys_notifications(uvm_access_counter_service_batch_context_t *batch_context)
{
if (!batch_context->phys.is_single_aperture) {
// Sort by instance_ptr
sort(batch_context->phys.notifications,
batch_context->phys.num_notifications,
sizeof(*batch_context->phys.notifications),
@ -955,6 +937,28 @@ static void preprocess_phys_notifications(uvm_access_counter_service_batch_conte
}
}
static NV_STATUS notify_tools_and_process_flags(uvm_gpu_t *gpu,
uvm_access_counter_buffer_entry_t **notification_start,
NvU32 num_entries,
NvU32 flags)
{
NV_STATUS status = NV_OK;
if (uvm_enable_builtin_tests) {
// TODO: Bug 4310744: [UVM][TOOLS] Attribute access counter tools events
// to va_space instead of broadcasting.
NvU32 i;
for (i = 0; i < num_entries; i++)
uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
}
if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
return status;
}
static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
uvm_va_block_t *va_block,
uvm_va_block_retry_t *va_block_retry,
@ -1163,7 +1167,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *current_entry,
const uvm_reverse_map_t *reverse_mappings,
size_t num_reverse_mappings,
unsigned *out_flags)
NvU32 *out_flags)
{
size_t index;
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
@ -1190,7 +1194,6 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
// If an mm is registered with the VA space, we have to retain it
// in order to lock it before locking the VA space.
mm = uvm_va_space_mm_retain_lock(va_space);
uvm_va_space_down_read(va_space);
// Re-check that the VA block is valid after taking the VA block lock.
@ -1251,7 +1254,7 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *current_entry,
const uvm_reverse_map_t *reverse_mappings,
size_t num_reverse_mappings,
unsigned *out_flags)
NvU32 *out_flags)
{
NV_STATUS status = NV_OK;
size_t index;
@ -1259,7 +1262,7 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
for (index = 0; index < num_reverse_mappings; ++index) {
unsigned out_flags_local = 0;
NvU32 out_flags_local = 0;
status = service_phys_single_va_block(gpu,
batch_context,
current_entry,
@ -1318,7 +1321,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
NvU64 address,
unsigned long sub_granularity,
size_t *num_reverse_mappings,
unsigned *out_flags)
NvU32 *out_flags)
{
NV_STATUS status;
NvU32 region_start, region_end;
@ -1327,7 +1330,10 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
// Get the reverse_map translations for all the regions set in the
// sub_granularity field of the counter.
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config->sub_granularity_regions_per_translation) {
for_each_sub_granularity_region(region_start,
region_end,
sub_granularity,
config->sub_granularity_regions_per_translation) {
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
@ -1376,7 +1382,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
uvm_access_counter_service_batch_context_t *batch_context,
const uvm_access_counter_buffer_entry_t *current_entry,
unsigned *out_flags)
NvU32 *out_flags)
{
NvU64 address;
NvU64 translation_index;
@ -1387,7 +1393,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
size_t total_reverse_mappings = 0;
uvm_gpu_t *resident_gpu = NULL;
NV_STATUS status = NV_OK;
unsigned flags = 0;
NvU32 flags = 0;
address = current_entry->address.address;
UVM_ASSERT(address % config->translation_size == 0);
@ -1415,7 +1421,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
size_t num_reverse_mappings;
unsigned out_flags_local = 0;
NvU32 out_flags_local = 0;
status = service_phys_notification_translation(gpu,
resident_gpu,
batch_context,
@ -1437,11 +1443,8 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
}
// Currently we only report events for our tests, not for tools
if (uvm_enable_builtin_tests) {
*out_flags |= UVM_ACCESS_COUNTER_ACTION_NOTIFY;
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_ON_MANAGED : 0);
}
if (uvm_enable_builtin_tests)
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
@ -1454,22 +1457,21 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
uvm_access_counter_service_batch_context_t *batch_context)
{
NvU32 i;
uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;
preprocess_phys_notifications(batch_context);
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
NV_STATUS status;
uvm_access_counter_buffer_entry_t *current_entry = batch_context->phys.notifications[i];
unsigned flags = 0;
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
NvU32 flags = 0;
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
continue;
status = service_phys_notification(gpu, batch_context, current_entry, &flags);
if (flags & UVM_ACCESS_COUNTER_ACTION_NOTIFY)
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
status = access_counter_clear_targeted(gpu, current_entry);
notify_tools_and_process_flags(gpu, &notifications[i], 1, flags);
if (status != NV_OK)
return status;
@ -1478,152 +1480,218 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
return NV_OK;
}
static int cmp_sort_gpu_phys_addr(const void *_a, const void *_b)
static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
uvm_va_block_t *va_block,
uvm_processor_id_t processor,
uvm_access_counter_service_batch_context_t *batch_context)
{
return uvm_gpu_phys_addr_cmp(*(uvm_gpu_phys_address_t*)_a,
*(uvm_gpu_phys_address_t*)_b);
}
uvm_va_block_retry_t va_block_retry;
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
static bool gpu_phys_same_region(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b, NvU64 granularity)
{
if (a.aperture != b.aperture)
return false;
UVM_ASSERT(is_power_of_2(granularity));
return UVM_ALIGN_DOWN(a.address, granularity) == UVM_ALIGN_DOWN(b.address, granularity);
}
static bool phys_address_in_accessed_sub_region(uvm_gpu_phys_address_t address,
NvU64 region_size,
NvU64 sub_region_size,
NvU32 accessed_mask)
{
const unsigned accessed_index = (address.address % region_size) / sub_region_size;
// accessed_mask is only filled for tracking granularities larger than 64K
if (region_size == UVM_PAGE_SIZE_64K)
return true;
UVM_ASSERT(accessed_index < 32);
return ((1 << accessed_index) & accessed_mask) != 0;
}
static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
uvm_access_counter_service_batch_context_t *batch_context,
const uvm_access_counter_buffer_entry_t *current_entry,
unsigned *out_flags)
{
NV_STATUS status = NV_OK;
NvU64 notification_size;
NvU64 address;
uvm_processor_id_t *resident_processors = batch_context->virt.scratch.resident_processors;
uvm_gpu_phys_address_t *phys_addresses = batch_context->virt.scratch.phys_addresses;
int num_addresses = 0;
int i;
// Virtual address notifications are always 64K aligned
NvU64 region_start = current_entry->address.address;
NvU64 region_end = current_entry->address.address + UVM_PAGE_SIZE_64K;
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
uvm_access_counter_type_t counter_type = current_entry->counter_type;
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
uvm_va_space_t *va_space = current_entry->virtual_info.va_space;
UVM_ASSERT(counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC);
// Entries with NULL va_space are simply dropped.
if (!va_space)
if (uvm_page_mask_empty(accessed_pages))
return NV_OK;
status = config_granularity_to_bytes(config->rm.granularity, &notification_size);
if (status != NV_OK)
return status;
uvm_assert_mutex_locked(&va_block->lock);
// Collect physical locations that could have been touched
// in the reported 64K VA region. The notification mask can
// correspond to any of them.
uvm_va_space_down_read(va_space);
for (address = region_start; address < region_end;) {
uvm_va_block_t *va_block;
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
service_context->num_retries = 0;
service_context->block_context.mm = mm;
NV_STATUS local_status = uvm_va_block_find(va_space, address, &va_block);
if (local_status == NV_ERR_INVALID_ADDRESS || local_status == NV_ERR_OBJECT_NOT_FOUND) {
address += PAGE_SIZE;
continue;
}
return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
&va_block_retry,
service_va_block_locked(processor,
va_block,
&va_block_retry,
service_context,
accessed_pages));
}
uvm_mutex_lock(&va_block->lock);
while (address < va_block->end && address < region_end) {
const unsigned page_index = uvm_va_block_cpu_page_index(va_block, address);
static void expand_notification_block(struct mm_struct *mm,
uvm_gpu_va_space_t *gpu_va_space,
uvm_va_block_t *va_block,
uvm_page_mask_t *accessed_pages,
const uvm_access_counter_buffer_entry_t *current_entry)
{
NvU64 addr;
NvU64 granularity = 0;
uvm_gpu_t *resident_gpu = NULL;
uvm_processor_id_t resident_id;
uvm_page_index_t page_index;
uvm_gpu_t *gpu = gpu_va_space->gpu;
const uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters,
UVM_ACCESS_COUNTER_TYPE_MIMC);
// UVM va_block always maps the closest resident location to processor
const uvm_processor_id_t res_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
config_granularity_to_bytes(config->rm.granularity, &granularity);
// Add physical location if it's valid and not local vidmem
if (UVM_ID_IS_VALID(res_id) && !uvm_id_equal(res_id, gpu->id)) {
uvm_gpu_phys_address_t phys_address = uvm_va_block_res_phys_page_address(va_block, page_index, res_id, gpu);
if (phys_address_in_accessed_sub_region(phys_address,
notification_size,
config->sub_granularity_region_size,
current_entry->sub_granularity)) {
resident_processors[num_addresses] = res_id;
phys_addresses[num_addresses] = phys_address;
++num_addresses;
}
else {
UVM_DBG_PRINT_RL("Skipping phys address %llx:%s, because it couldn't have been accessed in mask %x",
phys_address.address,
uvm_aperture_string(phys_address.aperture),
current_entry->sub_granularity);
}
}
// Granularities other than 2MB can only be enabled by UVM tests. Do nothing
// in that case.
if (granularity != UVM_PAGE_SIZE_2M)
return;
address += PAGE_SIZE;
}
uvm_mutex_unlock(&va_block->lock);
addr = current_entry->address.address;
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
uvm_assert_mutex_locked(&va_block->lock);
page_index = uvm_va_block_cpu_page_index(va_block, addr);
resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
// resident_id might be invalid or might already be the same as the GPU
// which received the notification if the memory was already migrated before
// acquiring the locks either during the servicing of previous notifications
// or during faults or because of explicit migrations or if the VA range was
// freed after receving the notification. Return NV_OK in such cases.
if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
return;
if (UVM_ID_IS_GPU(resident_id))
resident_gpu = uvm_va_space_get_gpu(gpu_va_space->va_space, resident_id);
if (uvm_va_block_get_physical_size(va_block, resident_id, page_index) != granularity) {
uvm_page_mask_set(accessed_pages, page_index);
}
uvm_va_space_up_read(va_space);
else {
NvU32 region_start;
NvU32 region_end;
unsigned long sub_granularity = current_entry->sub_granularity;
NvU32 num_regions = config->sub_granularity_regions_per_translation;
NvU32 num_sub_pages = config->sub_granularity_region_size / PAGE_SIZE;
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id);
// The addresses need to be sorted to aid coalescing.
sort(phys_addresses,
num_addresses,
sizeof(*phys_addresses),
cmp_sort_gpu_phys_addr,
NULL);
UVM_ASSERT(num_sub_pages >= 1);
for (i = 0; i < num_addresses; ++i) {
uvm_access_counter_buffer_entry_t *fake_entry = &batch_context->virt.scratch.phys_entry;
// Skip the current pointer if the physical region was already handled
if (i > 0 && gpu_phys_same_region(phys_addresses[i - 1], phys_addresses[i], notification_size)) {
UVM_ASSERT(uvm_id_equal(resident_processors[i - 1], resident_processors[i]));
continue;
// region_start and region_end refer to sub_granularity indices, not
// page_indices.
for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) {
uvm_page_mask_region_fill(accessed_pages,
uvm_va_block_region(region_start * num_sub_pages,
region_end * num_sub_pages));
}
UVM_DBG_PRINT_RL("Faking MIMC address[%i/%i]: %llx (granularity mask: %llx) in aperture %s on device %s\n",
i,
num_addresses,
phys_addresses[i].address,
notification_size - 1,
uvm_aperture_string(phys_addresses[i].aperture),
uvm_gpu_name(gpu));
// Construct a fake phys addr AC entry
fake_entry->counter_type = current_entry->counter_type;
fake_entry->address.address = UVM_ALIGN_DOWN(phys_addresses[i].address, notification_size);
fake_entry->address.aperture = phys_addresses[i].aperture;
fake_entry->address.is_virtual = false;
fake_entry->physical_info.resident_id = resident_processors[i];
fake_entry->counter_value = current_entry->counter_value;
fake_entry->sub_granularity = current_entry->sub_granularity;
// Remove pages in the va_block which are not resident on resident_id.
// If the GPU is heavily accessing those pages, future access counter
// migrations will migrate them to the GPU.
uvm_page_mask_and(accessed_pages, accessed_pages, resident_mask);
}
}
status = service_phys_notification(gpu, batch_context, fake_entry, out_flags);
if (status != NV_OK)
static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
uvm_gpu_va_space_t *gpu_va_space,
uvm_va_block_t *va_block,
uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index,
NvU32 *out_index)
{
NvU32 i = index;
NvU32 flags = 0;
NV_STATUS status = NV_OK;
NV_STATUS flags_status;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
UVM_ASSERT(va_block);
UVM_ASSERT(i < batch_context->virt.num_notifications);
uvm_assert_rwsem_locked(&va_space->lock);
uvm_page_mask_zero(accessed_pages);
uvm_mutex_lock(&va_block->lock);
while (i < batch_context->virt.num_notifications) {
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
NvU64 address = current_entry->address.address;
if ((current_entry->virtual_info.va_space != va_space) || (address > va_block->end)) {
*out_index = i;
break;
}
expand_notification_block(mm, gpu_va_space, va_block, accessed_pages, current_entry);
i++;
*out_index = i;
}
status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
uvm_mutex_unlock(&va_block->lock);
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
if (status == NV_OK)
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
flags_status = notify_tools_and_process_flags(gpu, &notifications[index], *out_index - index, flags);
if ((status == NV_OK) && (flags_status != NV_OK))
status = flags_status;
return status;
}
static NV_STATUS service_virt_notifications_batch(struct mm_struct *mm,
uvm_gpu_va_space_t *gpu_va_space,
uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index,
NvU32 *out_index)
{
NV_STATUS status;
uvm_va_block_t *va_block;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
NvU64 address = current_entry->address.address;
UVM_ASSERT(va_space);
uvm_assert_rwsem_locked(&va_space->lock);
// Virtual address notifications are always 64K aligned
UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));
// TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
// migrations for virtual notifications on configs with
// 4KB page size
status = uvm_va_block_find(va_space, address, &va_block);
if ((status == NV_OK) && !uvm_va_block_is_hmm(va_block)) {
UVM_ASSERT(va_block);
status = service_virt_notifications_in_block(mm, gpu_va_space, va_block, batch_context, index, out_index);
}
else {
NvU32 flags = 0;
UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
(status == NV_ERR_INVALID_ADDRESS) ||
uvm_va_block_is_hmm(va_block));
// NV_ERR_OBJECT_NOT_FOUND is returned if the VA range is valid but no
// VA block has been allocated yet. This can happen if there are stale
// notifications in the batch. A new VA range may have been allocated in
// that range. So, clear the notification entry to continue getting
// notifications for the new VA range.
if (status == NV_ERR_OBJECT_NOT_FOUND)
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
// NV_ERR_INVALID_ADDRESS is returned if the corresponding VA range
// doesn't exist or it's not a managed range. Access counter migrations
// are not currently supported on such ranges.
//
// TODO: Bug 1990466: [uvm] Use access counters to trigger migrations
// When support for SAM migrations is addded, clear the notification
// entry if the VA range doesn't exist in order to receive notifications
// when a new VA range is allocated in that region.
status = notify_tools_and_process_flags(gpu_va_space->gpu, &batch_context->virt.notifications[index], 1, flags);
*out_index = index + 1;
status = NV_OK;
}
return status;
@ -1632,33 +1700,67 @@ static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
uvm_access_counter_service_batch_context_t *batch_context)
{
NvU32 i;
NvU32 i = 0;
NV_STATUS status = NV_OK;
struct mm_struct *mm = NULL;
uvm_va_space_t *va_space = NULL;
uvm_va_space_t *prev_va_space = NULL;
uvm_gpu_va_space_t *gpu_va_space = NULL;
// TODO: Bug 4299018 : Add support for virtual access counter migrations on
// 4K page sizes.
if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
return notify_tools_and_process_flags(gpu,
batch_context->virt.notifications,
batch_context->virt.num_notifications,
0);
}
preprocess_virt_notifications(gpu, batch_context);
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
unsigned flags = 0;
while (i < batch_context->virt.num_notifications) {
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
va_space = current_entry->virtual_info.va_space;
status = service_virt_notification(gpu, batch_context, current_entry, &flags);
if (va_space != prev_va_space) {
UVM_DBG_PRINT_RL("Processed virt access counter (%d/%d): %sMANAGED (status: %d) clear: %s\n",
i + 1,
batch_context->virt.num_notifications,
(flags & UVM_ACCESS_COUNTER_ON_MANAGED) ? "" : "NOT ",
status,
(flags & UVM_ACCESS_COUNTER_ACTION_CLEAR) ? "YES" : "NO");
// New va_space detected, drop locks of the old va_space.
if (prev_va_space) {
uvm_va_space_up_read(prev_va_space);
uvm_va_space_mm_release_unlock(prev_va_space, mm);
if (uvm_enable_builtin_tests)
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
mm = NULL;
gpu_va_space = NULL;
}
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
status = access_counter_clear_targeted(gpu, current_entry);
// Acquire locks for the new va_space.
if (va_space) {
mm = uvm_va_space_mm_retain_lock(va_space);
uvm_va_space_down_read(va_space);
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
}
prev_va_space = va_space;
}
if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
status = service_virt_notifications_batch(mm, gpu_va_space, batch_context, i, &i);
}
else {
status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
i++;
}
if (status != NV_OK)
break;
}
if (va_space) {
uvm_va_space_up_read(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
}
return status;
}
@ -1941,6 +2043,7 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
}
else {
uvm_access_counter_buffer_entry_t entry = { 0 };
uvm_access_counter_buffer_entry_t *notification = &entry;
if (params->counter_type == UVM_TEST_ACCESS_COUNTER_TYPE_MIMC)
entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MIMC;
@ -1950,7 +2053,7 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
entry.bank = params->bank;
entry.tag = params->tag;
status = access_counter_clear_targeted(gpu, &entry);
status = access_counter_clear_notifications(gpu, &notification, 1);
}
if (status == NV_OK)

View File

@ -235,17 +235,27 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
return NV_OK;
}
// In SRIOV, the UVM (guest) driver does not have access to the privileged
// registers used to clear the faulted bit. Instead, UVM requests host RM to do
// the clearing on its behalf, using a SW method.
static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
{
if (uvm_gpu_is_virt_mode_sriov(gpu)) {
UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
return true;
}
// If true, UVM uses a SW method to request RM to do the clearing on its
// behalf.
bool use_sw_method = false;
return false;
// In SRIOV, the UVM (guest) driver does not have access to the privileged
// registers used to clear the faulted bit.
if (uvm_gpu_is_virt_mode_sriov(gpu))
use_sw_method = true;
// In Confidential Computing access to the privileged registers is blocked,
// in order to prevent interference between guests, or between the
// (untrusted) host and the guests.
if (g_uvm_global.conf_computing_enabled)
use_sw_method = true;
if (use_sw_method)
UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
return use_sw_method;
}
static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
@ -570,7 +580,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;
ats_invalidate->write_faults_in_batch = false;
ats_invalidate->tlb_batch_pending = false;
va_range_next = uvm_va_space_iter_first(gpu_va_space->va_space, fault_entry->fault_address, ~0ULL);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -362,7 +362,8 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
"Cancel targeting instance_ptr {0x%llx:%s}\n",
instance_ptr.address,
uvm_aperture_string(instance_ptr.aperture));
} else {
}
else {
status = uvm_push_begin_acquire(gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
&replayable_faults->replay_tracker,
@ -697,9 +698,6 @@ static inline int cmp_access_type(uvm_fault_access_type_t a, uvm_fault_access_ty
typedef enum
{
// Fetch a batch of faults from the buffer.
FAULT_FETCH_MODE_BATCH_ALL,
// Fetch a batch of faults from the buffer. Stop at the first entry that is
// not ready yet
FAULT_FETCH_MODE_BATCH_READY,
@ -857,9 +855,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
// written out of order
UVM_SPIN_WHILE(!gpu->parent->fault_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
// We have some entry to work on. Let's do the rest later.
if (fetch_mode != FAULT_FETCH_MODE_ALL &&
fetch_mode != FAULT_FETCH_MODE_BATCH_ALL &&
fault_index > 0)
if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
goto done;
}
@ -888,6 +884,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
current_entry->va_space = NULL;
current_entry->filtered = false;
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
if (current_entry->fault_source.utlb_id > batch_context->max_utlb_id) {
UVM_ASSERT(current_entry->fault_source.utlb_id < replayable_faults->utlb_count);
@ -1184,7 +1181,11 @@ static void mark_fault_fatal(uvm_fault_service_batch_context_t *batch_context,
fault_entry->replayable.cancel_va_mode = cancel_va_mode;
utlb->has_fatal_faults = true;
batch_context->has_fatal_faults = true;
if (!batch_context->fatal_va_space) {
UVM_ASSERT(fault_entry->va_space);
batch_context->fatal_va_space = fault_entry->va_space;
}
}
static void fault_entry_duplicate_flags(uvm_fault_service_batch_context_t *batch_context,
@ -1378,7 +1379,10 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
UVM_ASSERT(current_entry->fault_access_type ==
uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
current_entry->is_fatal = false;
// Unserviceable faults were already skipped by the caller. There are no
// unserviceable fault types that could be in the same VA block as a
// serviceable fault.
UVM_ASSERT(!current_entry->is_fatal);
current_entry->is_throttled = false;
current_entry->is_invalid_prefetch = false;
@ -1512,7 +1516,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
++block_context->num_retries;
if (status == NV_OK && batch_context->has_fatal_faults)
if (status == NV_OK && batch_context->fatal_va_space)
status = uvm_va_block_set_cancel(va_block, &block_context->block_context, gpu);
return status;
@ -1676,7 +1680,8 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
if (access_type <= UVM_FAULT_ACCESS_TYPE_READ) {
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
}
else if (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE) {
else {
UVM_ASSERT(access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
if (uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ) &&
!uvm_page_mask_test(reads_serviced_mask, page_index))
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
@ -1735,6 +1740,10 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
uvm_fault_access_type_t access_type = current_entry->fault_access_type;
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
// ATS faults can't be unserviceable, since unserviceable faults require
// GMMU PTEs.
UVM_ASSERT(!current_entry->is_fatal);
i++;
update_batch_and_notify_fault(gpu_va_space->gpu,
@ -1934,14 +1943,198 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
return status;
}
// Called when a fault in the batch has been marked fatal. Flush the buffer
// under the VA and mmap locks to remove any potential stale fatal faults, then
// service all new faults for just that VA space and cancel those which are
// fatal. Faults in other VA spaces are replayed when done and will be processed
// when normal fault servicing resumes.
static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
{
NV_STATUS status = NV_OK;
NvU32 i;
uvm_va_space_t *va_space = batch_context->fatal_va_space;
uvm_gpu_va_space_t *gpu_va_space = NULL;
struct mm_struct *mm;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
uvm_va_block_context_t *va_block_context = &service_context->block_context;
UVM_ASSERT(gpu->parent->replayable_faults_supported);
UVM_ASSERT(va_space);
// Perform the flush and re-fetch while holding the mmap_lock and the
// VA space lock. This avoids stale faults because it prevents any vma
// modifications (mmap, munmap, mprotect) from happening between the time HW
// takes the fault and we cancel it.
mm = uvm_va_space_mm_retain_lock(va_space);
va_block_context->mm = mm;
uvm_va_space_down_read(va_space);
// We saw fatal faults in this VA space before. Flush while holding
// mmap_lock to make sure those faults come back (aren't stale).
//
// We need to wait until all old fault messages have arrived before
// flushing, hence UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT.
status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
batch_context);
if (status != NV_OK)
goto done;
// Wait for the flush's replay to finish to give the legitimate faults a
// chance to show up in the buffer again.
status = uvm_tracker_wait(&replayable_faults->replay_tracker);
if (status != NV_OK)
goto done;
// We expect all replayed faults to have arrived in the buffer so we can re-
// service them. The replay-and-wait sequence above will ensure they're all
// in the HW buffer. When GSP owns the HW buffer, we also have to wait for
// GSP to copy all available faults from the HW buffer into the shadow
// buffer.
//
// TODO: Bug 2533557: This flush does not actually guarantee that GSP will
// copy over all faults.
status = hw_fault_buffer_flush_locked(gpu->parent);
if (status != NV_OK)
goto done;
// If there is no GPU VA space for the GPU, ignore all faults in the VA
// space. This can happen if the GPU VA space has been destroyed since we
// unlocked the VA space in service_fault_batch. That means the fatal faults
// are stale, because unregistering the GPU VA space requires preempting the
// context and detaching all channels in that VA space. Restart fault
// servicing from the top.
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
if (!gpu_va_space)
goto done;
// Re-parse the new faults
batch_context->num_invalid_prefetch_faults = 0;
batch_context->num_duplicate_faults = 0;
batch_context->num_replays = 0;
batch_context->fatal_va_space = NULL;
batch_context->has_throttled_faults = false;
status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_ALL);
if (status != NV_OK)
goto done;
// No more faults left. Either the previously-seen fatal entry was stale, or
// RM killed the context underneath us.
if (batch_context->num_cached_faults == 0)
goto done;
++batch_context->batch_id;
status = preprocess_fault_batch(gpu, batch_context);
if (status != NV_OK) {
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
// Another flush happened due to stale faults or a context-fatal
// error. The previously-seen fatal fault might not exist anymore,
// so restart fault servicing from the top.
status = NV_OK;
}
goto done;
}
// Search for the target VA space
for (i = 0; i < batch_context->num_coalesced_faults; i++) {
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
UVM_ASSERT(current_entry->va_space);
if (current_entry->va_space == va_space)
break;
}
while (i < batch_context->num_coalesced_faults) {
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
if (current_entry->va_space != va_space)
break;
// service_fault_batch_dispatch() doesn't expect unserviceable faults.
// Just cancel them directly.
if (current_entry->is_fatal) {
status = cancel_fault_precise_va(gpu, current_entry, UVM_FAULT_CANCEL_VA_MODE_ALL);
if (status != NV_OK)
break;
++i;
}
else {
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
NvU32 block_faults;
ats_invalidate->tlb_batch_pending = false;
uvm_hmm_service_context_init(service_context);
// Service all the faults that we can. We only really need to search
// for fatal faults, but attempting to service all is the easiest
// way to do that.
status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults, false);
if (status != NV_OK) {
// TODO: Bug 3900733: clean up locking in service_fault_batch().
// We need to drop lock and retry. That means flushing and
// starting over.
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
status = NV_OK;
break;
}
// Invalidate TLBs before cancel to ensure that fatal faults don't
// get stuck in HW behind non-fatal faults to the same line.
status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
if (status != NV_OK)
break;
while (block_faults-- > 0) {
current_entry = batch_context->ordered_fault_cache[i];
if (current_entry->is_fatal) {
status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
if (status != NV_OK)
break;
}
++i;
}
}
}
done:
uvm_va_space_up_read(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
if (status == NV_OK) {
// There are two reasons to flush the fault buffer here.
//
// 1) Functional. We need to replay both the serviced non-fatal faults
// and the skipped faults in other VA spaces. The former need to be
// restarted and the latter need to be replayed so the normal fault
// service mechanism can fetch and process them.
//
// 2) Performance. After cancelling the fatal faults, a flush removes
// any potential duplicated fault that may have been added while
// processing the faults in this batch. This flush also avoids doing
// unnecessary processing after the fatal faults have been cancelled,
// so all the rest are unlikely to remain after a replay because the
// context is probably in the process of dying.
status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
batch_context);
}
return status;
}
// Scan the ordered view of faults and group them by different va_blocks
// (managed faults) and service faults for each va_block, in batch.
// Service non-managed faults one at a time as they are encountered during the
// scan.
//
// This function returns NV_WARN_MORE_PROCESSING_REQUIRED if the fault buffer
// was flushed because the needs_fault_buffer_flush flag was set on some GPU VA
// space
// Fatal faults are marked for later processing by the caller.
static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
fault_service_mode_t service_mode,
uvm_fault_service_batch_context_t *batch_context)
@ -1960,7 +2153,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
UVM_ASSERT(gpu->parent->replayable_faults_supported);
ats_invalidate->write_faults_in_batch = false;
ats_invalidate->tlb_batch_pending = false;
uvm_hmm_service_context_init(service_context);
for (i = 0; i < batch_context->num_coalesced_faults;) {
@ -1995,38 +2188,25 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
va_block_context->mm = mm;
uvm_va_space_down_read(va_space);
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
if (uvm_processor_mask_test_and_clear_atomic(&va_space->needs_fault_buffer_flush, gpu->id)) {
status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
batch_context);
if (status == NV_OK)
status = NV_WARN_MORE_PROCESSING_REQUIRED;
break;
}
// The case where there is no valid GPU VA space for the GPU in this
// VA space is handled next
}
// Some faults could be already fatal if they cannot be handled by
// the UVM driver
if (current_entry->is_fatal) {
++i;
batch_context->has_fatal_faults = true;
if (!batch_context->fatal_va_space)
batch_context->fatal_va_space = va_space;
utlb->has_fatal_faults = true;
UVM_ASSERT(utlb->num_pending_faults > 0);
continue;
}
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
if (!gpu_va_space) {
// If there is no GPU VA space for the GPU, ignore the fault. This
// can happen if a GPU VA space is destroyed without explicitly
// freeing all memory ranges (destroying the VA range triggers a
// flush of the fault buffer) and there are stale entries in the
// freeing all memory ranges and there are stale entries in the
// buffer that got fixed by the servicing in a previous batch.
++i;
continue;
@ -2044,15 +2224,17 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
uvm_va_space_mm_release_unlock(va_space, mm);
mm = NULL;
va_space = NULL;
status = NV_OK;
continue;
}
if (status != NV_OK)
goto fail;
i += block_faults;
// Don't issue replays in cancel mode
if (replay_per_va_block && !batch_context->has_fatal_faults) {
if (replay_per_va_block && !batch_context->fatal_va_space) {
status = push_replay_on_gpu(gpu, UVM_FAULT_REPLAY_TYPE_START, batch_context);
if (status != NV_OK)
goto fail;
@ -2064,8 +2246,6 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
}
}
// Only clobber status if invalidate_status != NV_OK, since status may also
// contain NV_WARN_MORE_PROCESSING_REQUIRED.
if (va_space != NULL) {
NV_STATUS invalidate_status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
if (invalidate_status != NV_OK)
@ -2273,77 +2453,48 @@ static NvU32 is_fatal_fault_in_buffer(uvm_fault_service_batch_context_t *batch_c
return false;
}
typedef enum
{
// Only cancel faults flagged as fatal
FAULT_CANCEL_MODE_FATAL,
// Cancel all faults in the batch unconditionally
FAULT_CANCEL_MODE_ALL,
} fault_cancel_mode_t;
// Cancel faults in the given fault service batch context. The function provides
// two different modes depending on the value of cancel_mode:
// - If cancel_mode == FAULT_CANCEL_MODE_FATAL, only faults flagged as fatal
// will be cancelled. In this case, the reason reported to tools is the one
// contained in the fault entry itself.
// - If cancel_mode == FAULT_CANCEL_MODE_ALL, all faults will be cancelled
// unconditionally. In this case, the reason reported to tools for non-fatal
// faults is the one passed to this function.
static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
uvm_fault_service_batch_context_t *batch_context,
fault_cancel_mode_t cancel_mode,
UvmEventFatalReason reason)
// Cancel all faults in the given fault service batch context, even those not
// marked as fatal.
static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
uvm_fault_service_batch_context_t *batch_context,
UvmEventFatalReason reason)
{
NV_STATUS status = NV_OK;
NV_STATUS fault_status;
uvm_va_space_t *va_space = NULL;
NvU32 i;
NvU32 i = 0;
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
if (cancel_mode == FAULT_CANCEL_MODE_ALL)
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
while (i < batch_context->num_coalesced_faults && status == NV_OK) {
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
uvm_va_space_t *va_space = current_entry->va_space;
bool skip_va_space;
UVM_ASSERT(current_entry->va_space);
UVM_ASSERT(va_space);
if (current_entry->va_space != va_space) {
// Fault on a different va_space, drop the lock of the old one...
if (va_space != NULL)
uvm_va_space_up_read(va_space);
uvm_va_space_down_read(va_space);
va_space = current_entry->va_space;
// If there is no GPU VA space for the GPU, ignore all faults in
// that VA space. This can happen if the GPU VA space has been
// destroyed since we unlocked the VA space in service_fault_batch.
// Ignoring the fault avoids targetting a PDB that might have been
// reused by another process.
skip_va_space = !uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
// ... and take the lock of the new one
uvm_va_space_down_read(va_space);
for (;
i < batch_context->num_coalesced_faults && current_entry->va_space == va_space;
current_entry = batch_context->ordered_fault_cache[++i]) {
uvm_fault_cancel_va_mode_t cancel_va_mode;
// We don't need to check whether a buffer flush is required
// (due to VA range destruction).
// - For cancel_mode == FAULT_CANCEL_MODE_FATAL, once a fault is
// flagged as fatal we need to cancel it, even if its VA range no
// longer exists.
// - For cancel_mode == FAULT_CANCEL_MODE_ALL we don't care about
// any of this, we just want to trigger RC in RM.
}
if (skip_va_space)
continue;
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
// If there is no GPU VA space for the GPU, ignore the fault.
// This can happen if the GPU VA did not exist in
// service_fault_batch(), or it was destroyed since then.
// This is to avoid targetting a PDB that might have been reused
// by another process.
continue;
}
// Cancel the fault
if (cancel_mode == FAULT_CANCEL_MODE_ALL || current_entry->is_fatal) {
uvm_fault_cancel_va_mode_t cancel_va_mode = current_entry->replayable.cancel_va_mode;
// If cancelling unconditionally and the fault was not fatal,
// set the cancel reason passed to this function
if (!current_entry->is_fatal) {
if (current_entry->is_fatal) {
UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
cancel_va_mode = current_entry->replayable.cancel_va_mode;
}
else {
current_entry->fatal_reason = reason;
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
}
@ -2352,17 +2503,13 @@ static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
if (status != NV_OK)
break;
}
uvm_va_space_up_read(va_space);
}
if (va_space != NULL)
uvm_va_space_up_read(va_space);
// After cancelling the fatal faults, the fault buffer is flushed to remove
// any potential duplicated fault that may have been added while processing
// the faults in this batch. This flush also avoids doing unnecessary
// processing after the fatal faults have been cancelled, so all the rest
// are unlikely to remain after a replay because the context is probably in
// the process of dying.
// Because each cancel itself triggers a replay, there may be a large number
// of new duplicated faults in the buffer after cancelling all the known
// ones. Flushing the buffer discards them to avoid unnecessary processing.
fault_status = fault_buffer_flush_locked(gpu,
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
UVM_FAULT_REPLAY_TYPE_START,
@ -2410,12 +2557,12 @@ static void cancel_fault_batch(uvm_gpu_t *gpu,
uvm_fault_service_batch_context_t *batch_context,
UvmEventFatalReason reason)
{
if (gpu->parent->fault_cancel_va_supported) {
cancel_faults_precise_va(gpu, batch_context, FAULT_CANCEL_MODE_ALL, reason);
return;
}
cancel_fault_batch_tlb(gpu, batch_context, reason);
// Return code is ignored since we're on a global error path and wouldn't be
// able to recover anyway.
if (gpu->parent->fault_cancel_va_supported)
cancel_faults_all(gpu, batch_context, reason);
else
cancel_fault_batch_tlb(gpu, batch_context, reason);
}
@ -2502,7 +2649,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
batch_context->num_invalid_prefetch_faults = 0;
batch_context->num_replays = 0;
batch_context->has_fatal_faults = false;
batch_context->fatal_va_space = NULL;
batch_context->has_throttled_faults = false;
// 5) Fetch all faults from buffer
@ -2549,9 +2696,6 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
// 8) Service all non-fatal faults and mark all non-serviceable faults
// as fatal
status = service_fault_batch(gpu, FAULT_SERVICE_MODE_CANCEL, batch_context);
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
continue;
UVM_ASSERT(batch_context->num_replays == 0);
if (status == NV_ERR_NO_MEMORY)
continue;
@ -2559,7 +2703,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
break;
// No more fatal faults left, we are done
if (!batch_context->has_fatal_faults)
if (!batch_context->fatal_va_space)
break;
// 9) Search for uTLBs that contain fatal faults and meet the
@ -2581,13 +2725,9 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
static NV_STATUS cancel_faults_precise(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
{
UVM_ASSERT(batch_context->has_fatal_faults);
if (gpu->parent->fault_cancel_va_supported) {
return cancel_faults_precise_va(gpu,
batch_context,
FAULT_CANCEL_MODE_FATAL,
UvmEventFatalReasonInvalid);
}
UVM_ASSERT(batch_context->fatal_va_space);
if (gpu->parent->fault_cancel_va_supported)
return service_fault_batch_for_cancel(gpu, batch_context);
return cancel_faults_precise_tlb(gpu, batch_context);
}
@ -2643,7 +2783,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
batch_context->num_invalid_prefetch_faults = 0;
batch_context->num_duplicate_faults = 0;
batch_context->num_replays = 0;
batch_context->has_fatal_faults = false;
batch_context->fatal_va_space = NULL;
batch_context->has_throttled_faults = false;
status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_BATCH_READY);
@ -2671,9 +2811,6 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
// was flushed
num_replays += batch_context->num_replays;
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
continue;
enable_disable_prefetch_faults(gpu->parent, batch_context);
if (status != NV_OK) {
@ -2687,10 +2824,17 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
break;
}
if (batch_context->has_fatal_faults) {
if (batch_context->fatal_va_space) {
status = uvm_tracker_wait(&batch_context->tracker);
if (status == NV_OK)
if (status == NV_OK) {
status = cancel_faults_precise(gpu, batch_context);
if (status == NV_OK) {
// Cancel handling should've issued at least one replay
UVM_ASSERT(batch_context->num_replays > 0);
++num_batches;
continue;
}
}
break;
}

View File

@ -103,5 +103,7 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->map_remap_larger_page_promotion = false;
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = true;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2020-2022 NVIDIA Corporation
Copyright (c) 2020-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -33,6 +33,7 @@
#include "uvm_types.h"
#include "uvm_global.h"
#include "uvm_common.h"
#include "uvm_hal.h"
#include "uvm_hal_types.h"
#include "uvm_hopper_fault_buffer.h"
@ -42,6 +43,10 @@
#define MMU_BIG 0
#define MMU_SMALL 1
// Used in pde_pcf().
#define ATS_ALLOWED 0
#define ATS_NOT_ALLOWED 1
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
{
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44)
@ -260,7 +265,108 @@ static NvU64 poisoned_pte_hopper(void)
return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
}
static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 depth)
typedef enum
{
PDE_TYPE_SINGLE,
PDE_TYPE_DUAL_BIG,
PDE_TYPE_DUAL_SMALL,
PDE_TYPE_COUNT,
} pde_type_t;
static const NvU8 valid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED,
NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED },
{ NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED,
NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED },
{ NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED,
NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED } };
static const NvU8 invalid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED,
NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED },
{ NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED,
NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED },
{ NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED,
NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED } };
static const NvU8 va_base[] = { 56, 47, 38, 29, 21 };
static bool is_ats_range_valid(uvm_page_directory_t *dir, NvU32 child_index)
{
NvU64 pde_base_va;
NvU64 min_va_upper;
NvU64 max_va_lower;
NvU32 index_in_dir;
uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
UVM_ASSERT(dir->depth < ARRAY_SIZE(va_base));
// We can use UVM_PAGE_SIZE_AGNOSTIC because page_size is only used in
// index_bits_hopper() for PTE table, i.e., depth 5+, which does not use a
// PDE PCF or an ATS_ALLOWED/NOT_ALLOWED setting.
UVM_ASSERT(child_index < (1ull << index_bits_hopper(dir->depth, UVM_PAGE_SIZE_AGNOSTIC)));
pde_base_va = 0;
index_in_dir = child_index;
while (dir) {
pde_base_va += index_in_dir * (1ull << va_base[dir->depth]);
index_in_dir = dir->index_in_parent;
dir = dir->host_parent;
}
pde_base_va = (NvU64)((NvS64)(pde_base_va << (64 - num_va_bits_hopper())) >> (64 - num_va_bits_hopper()));
if (pde_base_va < max_va_lower || pde_base_va >= min_va_upper)
return true;
return false;
}
// PDE Permission Control Flags
static NvU32 pde_pcf(bool valid, pde_type_t pde_type, uvm_page_directory_t *dir, NvU32 child_index)
{
const NvU8 (*pcf)[2] = valid ? valid_pcf : invalid_pcf;
NvU8 depth = dir->depth;
UVM_ASSERT(pde_type < PDE_TYPE_COUNT);
UVM_ASSERT(depth < 5);
// On non-ATS systems, PDE PCF only sets the valid and volatile/cache bits.
if (!g_uvm_global.ats.enabled)
return pcf[pde_type][ATS_ALLOWED];
// We assume all supported ATS platforms use canonical form address.
// See comments in uvm_gpu.c:uvm_gpu_can_address() and in
// uvm_mmu.c:page_tree_ats_init();
UVM_ASSERT(uvm_platform_uses_canonical_form_address());
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
// ATS and GMMU page tables. For managed memory we need to prevent this
// parallel lookup since we would not get any GPU fault if the CPU has
// a valid mapping. Also, for external ranges that are known to be
// mapped entirely on the GMMU page table we can skip the ATS lookup
// for performance reasons. Parallel ATS lookup is disabled in PDE1
// (depth 3) and, therefore, it applies to the underlying 512MB VA
// range.
//
// UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
// This is fine because CUDA ensures that all managed and external
// allocations are properly compartmentalized in 512MB-aligned VA
// regions. For cudaHostRegister CUDA cannot control the VA range, but
// we rely on ATS for those allocations so they can't choose the
// ATS_NOT_ALLOWED mode.
// TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range to
// PTEs.
// HW complies with the leaf PDE's ATS_ALLOWED/ATS_NOT_ALLOWED settings,
// enabling us to treat any upper-level PDE as a don't care as long as there
// are leaf PDEs for the entire upper-level PDE range. We assume PDE4
// entries (depth == 0) are always ATS enabled, and the no_ats_range is in
// PDE3 or lower.
if (depth == 0 || (!valid && is_ats_range_valid(dir, child_index)))
return pcf[pde_type][ATS_ALLOWED];
return pcf[pde_type][ATS_NOT_ALLOWED];
}
static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
{
NvU64 pde_bits = 0;
@ -280,38 +386,17 @@ static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 dep
break;
}
// PCF (permission control flags) 5:3
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
// ATS and GMMU page tables. For managed memory we need to prevent this
// parallel lookup since we would not get any GPU fault if the CPU has
// a valid mapping. Also, for external ranges that are known to be
// mapped entirely on the GMMU page table we can skip the ATS lookup
// for performance reasons. Parallel ATS lookup is disabled in PDE1
// (depth 3) and, therefore, it applies to the underlying 512MB VA
// range.
//
// UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
// This is fine because CUDA ensures that all managed and external
// allocations are properly compartmentalized in 512MB-aligned VA
// regions. For cudaHostRegister CUDA cannot control the VA range, but
// we rely on ATS for those allocations so they can't choose the
// ATS_NOT_ALLOWED mode.
//
// TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range
// to PTEs.
if (depth == 3 && g_uvm_global.ats.enabled)
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_NOT_ALLOWED);
else
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_ALLOWED);
// address 51:12
pde_bits |= HWVALUE64(_MMU_VER3, PDE, ADDRESS, address);
}
// PCF (permission control flags) 5:3
pde_bits |= HWVALUE64(_MMU_VER3, PDE, PCF, pde_pcf(phys_alloc != NULL, PDE_TYPE_SINGLE, dir, child_index));
return pde_bits;
}
static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
{
NvU64 pde_bits = 0;
@ -330,17 +415,20 @@ static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
break;
}
// PCF (permission control flags) 5:3
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_BIG, VALID_UNCACHED_ATS_NOT_ALLOWED);
// address 51:8
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_BIG, address);
}
// PCF (permission control flags) 5:3
pde_bits |= HWVALUE64(_MMU_VER3,
DUAL_PDE,
PCF_BIG,
pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_BIG, dir, child_index));
return pde_bits;
}
static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
{
NvU64 pde_bits = 0;
@ -359,29 +447,40 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
break;
}
// PCF (permission control flags) 69:67 [5:3]
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_SMALL, VALID_UNCACHED_ATS_NOT_ALLOWED);
// address 115:76 [51:12]
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_SMALL, address);
}
// PCF (permission control flags) 69:67 [5:3]
pde_bits |= HWVALUE64(_MMU_VER3,
DUAL_PDE,
PCF_SMALL,
pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_SMALL, dir, child_index));
return pde_bits;
}
static void make_pde_hopper(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
static void make_pde_hopper(void *entry,
uvm_mmu_page_table_alloc_t **phys_allocs,
uvm_page_directory_t *dir,
NvU32 child_index)
{
NvU32 entry_count = entries_per_index_hopper(depth);
NvU32 entry_count;
NvU64 *entry_bits = (NvU64 *)entry;
UVM_ASSERT(dir);
entry_count = entries_per_index_hopper(dir->depth);
if (entry_count == 1) {
*entry_bits = single_pde_hopper(*phys_allocs, depth);
*entry_bits = single_pde_hopper(*phys_allocs, dir, child_index);
}
else if (entry_count == 2) {
entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG]);
entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL]);
entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG], dir, child_index);
entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL], dir, child_index);
// This entry applies to the whole dual PDE but is stored in the lower
// bits
// bits.
entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER3, DUAL_PDE, IS_PTE, FALSE);
}
else {

View File

@ -128,8 +128,9 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
// present if we see the callback.
//
// The callback was added in commit 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e,
// v3.19 (2014-11-13).
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
// v3.19 (2014-11-13) and renamed in commit 1af5a8109904.
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE) || \
defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
#define UVM_CAN_USE_MMU_NOTIFIERS() 1
#else
#define UVM_CAN_USE_MMU_NOTIFIERS() 0
@ -153,10 +154,6 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
#define VM_MIXEDMAP 0x00000000
#endif
#if !defined(MPOL_PREFERRED_MANY)
#define MPOL_PREFERRED_MANY 5
#endif
//
// printk.h already defined pr_fmt, so we have to redefine it so the pr_*
// routines pick up our version

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -71,4 +71,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->smc.supported = false;
parent_gpu->plc_supported = false;
parent_gpu->no_ats_range_required = false;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -106,10 +106,16 @@ static NvU64 small_half_pde_maxwell(uvm_mmu_page_table_alloc_t *phys_alloc)
return pde_bits;
}
static void make_pde_maxwell(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
static void make_pde_maxwell(void *entry,
uvm_mmu_page_table_alloc_t **phys_allocs,
uvm_page_directory_t *dir,
NvU32 child_index)
{
NvU64 pde_bits = 0;
UVM_ASSERT(depth == 0);
UVM_ASSERT(dir);
UVM_ASSERT(dir->depth == 0);
pde_bits |= HWCONST64(_MMU, PDE, SIZE, FULL);
pde_bits |= big_half_pde_maxwell(phys_allocs[MMU_BIG]) | small_half_pde_maxwell(phys_allocs[MMU_SMALL]);

View File

@ -672,14 +672,6 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
.finalize_and_map = uvm_migrate_vma_finalize_and_map_helper,
};
// WAR for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
// invalidates on read-only to read-write upgrades
//
// This code path isn't used on GH180 but we need to maintain consistent
// behaviour on systems that do.
if (!vma_is_anonymous(args->vma))
return NV_WARN_NOTHING_TO_DO;
ret = migrate_vma(&uvm_migrate_vma_ops, args->vma, args->start, args->end, args->src, args->dst, state);
if (ret < 0)
return errno_to_nv_status(ret);
@ -693,24 +685,6 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
if (ret < 0)
return errno_to_nv_status(ret);
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
// support for it is added to the Linux kernel
//
// A side-effect of migrate_vma_setup() is it calls mmu notifiers even if a
// page can't be migrated (eg. because it's a non-anonymous mapping). We
// need this side-effect for SMMU on GH180 to ensure any cached read-only
// entries are flushed from SMMU on permission upgrade.
//
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
// invalidates on read-only to read-write upgrades
//
// The above WAR doesn't work for HugeTLBfs mappings because
// migrate_vma_setup() will fail in that case.
if (!vma_is_anonymous(args->vma)) {
migrate_vma_finalize(args);
return NV_WARN_NOTHING_TO_DO;
}
uvm_migrate_vma_alloc_and_copy(args, state);
if (state->status == NV_OK) {
migrate_vma_pages(args);
@ -862,6 +836,17 @@ static NV_STATUS migrate_pageable_vma_region(struct vm_area_struct *vma,
return NV_OK;
}
NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp)
{
uvm_va_space_t *va_space = uvm_va_space_get(filp);
uvm_va_space_down_write(va_space);
va_space->test.skip_migrate_vma = params->skip;
uvm_va_space_up_write(va_space);
return NV_OK;
}
static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
unsigned long start,
unsigned long outer,
@ -884,13 +869,12 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
start = max(start, vma->vm_start);
outer = min(outer, vma->vm_end);
// migrate_vma only supports anonymous VMAs. We check for those after
// calling migrate_vma_setup() to workaround Bug 4130089. We need to check
// for HugeTLB VMAs here because migrate_vma_setup() will return a fatal
// error for those.
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
// invalidates on read-only to read-write upgrades
if (is_vm_hugetlb_page(vma))
if (va_space->test.skip_migrate_vma)
return NV_WARN_NOTHING_TO_DO;
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
// support for it is added to the Linux kernel
if (!vma_is_anonymous(vma))
return NV_WARN_NOTHING_TO_DO;
if (uvm_processor_mask_empty(&va_space->registered_gpus))
@ -950,7 +934,9 @@ static NV_STATUS migrate_pageable(migrate_vma_state_t *state)
bool touch = uvm_migrate_args->touch;
uvm_populate_permissions_t populate_permissions = uvm_migrate_args->populate_permissions;
UVM_ASSERT(!vma_is_anonymous(vma) || uvm_processor_mask_empty(&va_space->registered_gpus));
UVM_ASSERT(va_space->test.skip_migrate_vma ||
!vma_is_anonymous(vma) ||
uvm_processor_mask_empty(&va_space->registered_gpus));
// We can't use migrate_vma to move the pages as desired. Normally
// this fallback path is supposed to populate the memory then inform

View File

@ -51,7 +51,7 @@ typedef struct
#if defined(CONFIG_MIGRATE_VMA_HELPER)
#define UVM_MIGRATE_VMA_SUPPORTED 1
#else
#if defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_VMA_SETUP_PRESENT)
#if NV_IS_EXPORT_SYMBOL_PRESENT_migrate_vma_setup
#define UVM_MIGRATE_VMA_SUPPORTED 1
#endif
#endif
@ -218,6 +218,9 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args);
NV_STATUS uvm_migrate_pageable_init(void);
void uvm_migrate_pageable_exit(void);
NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp);
#else // UVM_MIGRATE_VMA_SUPPORTED
static NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
@ -251,6 +254,10 @@ static void uvm_migrate_pageable_exit(void)
{
}
static inline NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp)
{
return NV_OK;
}
#endif // UVM_MIGRATE_VMA_SUPPORTED
#endif

View File

@ -323,37 +323,156 @@ static void uvm_mmu_page_table_cpu_memset_16(uvm_gpu_t *gpu,
uvm_mmu_page_table_cpu_unmap(gpu, phys_alloc);
}
static void pde_fill_cpu(uvm_page_tree_t *tree,
uvm_page_directory_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr)
{
NvU64 pde_data[2], entry_size;
NvU32 i;
UVM_ASSERT(uvm_mmu_use_cpu(tree));
entry_size = tree->hal->entry_size(directory->depth);
UVM_ASSERT(sizeof(pde_data) >= entry_size);
for (i = 0; i < pde_count; i++) {
tree->hal->make_pde(pde_data, phys_addr, directory, start_index + i);
if (entry_size == sizeof(pde_data[0]))
uvm_mmu_page_table_cpu_memset_8(tree->gpu, &directory->phys_alloc, start_index + i, pde_data[0], 1);
else
uvm_mmu_page_table_cpu_memset_16(tree->gpu, &directory->phys_alloc, start_index + i, pde_data, 1);
}
}
static void pde_fill_gpu(uvm_page_tree_t *tree,
uvm_page_directory_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr,
uvm_push_t *push)
{
NvU64 pde_data[2], entry_size;
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->phys_alloc.addr);
NvU32 max_inline_entries;
uvm_push_flag_t push_membar_flag = UVM_PUSH_FLAG_COUNT;
uvm_gpu_address_t inline_data_addr;
uvm_push_inline_data_t inline_data;
NvU32 entry_count, i, j;
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
entry_size = tree->hal->entry_size(directory->depth);
UVM_ASSERT(sizeof(pde_data) >= entry_size);
max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / entry_size;
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
pde_entry_addr.address += start_index * entry_size;
for (i = 0; i < pde_count;) {
// All but the first memory operation can be pipelined. We respect the
// caller's pipelining settings for the first push.
if (i != 0)
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
entry_count = min(pde_count - i, max_inline_entries);
// No membar is needed until the last memory operation. Otherwise,
// use caller's membar flag.
if ((i + entry_count) < pde_count)
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
else if (push_membar_flag != UVM_PUSH_FLAG_COUNT)
uvm_push_set_flag(push, push_membar_flag);
uvm_push_inline_data_begin(push, &inline_data);
for (j = 0; j < entry_count; j++) {
tree->hal->make_pde(pde_data, phys_addr, directory, start_index + i + j);
uvm_push_inline_data_add(&inline_data, pde_data, entry_size);
}
inline_data_addr = uvm_push_inline_data_end(&inline_data);
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * entry_size);
i += entry_count;
pde_entry_addr.address += entry_size * entry_count;
}
}
// pde_fill() populates pde_count PDE entries (starting at start_index) with
// the same mapping, i.e., with the same physical address (phys_addr).
// pde_fill() is optimized for pde_count == 1, which is the common case.
static void pde_fill(uvm_page_tree_t *tree,
uvm_page_directory_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr,
uvm_push_t *push)
{
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, directory->depth, UVM_PAGE_SIZE_AGNOSTIC));
if (push)
pde_fill_gpu(tree, directory, start_index, pde_count, phys_addr, push);
else
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
}
static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
{
NvU64 clear_bits[2];
uvm_mmu_mode_hal_t *hal = tree->hal;
NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
NvU8 max_pde_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC) - 1;
if (dir->depth == tree->hal->page_table_depth(page_size)) {
*clear_bits = 0; // Invalid PTE
}
else {
// passing in NULL for the phys_allocs will mark the child entries as invalid
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
hal->make_pde(clear_bits, phys_allocs, dir->depth);
// Passing in NULL for the phys_allocs will mark the child entries as
// invalid.
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
// Make sure that using only clear_bits[0] will work
UVM_ASSERT(hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
}
// Init with an invalid PTE or clean PDE. Only Maxwell PDEs can have more
// than 512 entries. In this case, we initialize them all with the same
// clean PDE. ATS systems may require clean PDEs with
// ATS_ALLOWED/ATS_NOT_ALLOWED bit settings based on the mapping VA.
// We only clean_bits to 0 at the lowest page table level (PTE table), i.e.,
// when depth is greater than the max_pde_depth.
if ((dir->depth > max_pde_depth) || (entries_count > 512 && !g_uvm_global.ats.enabled)) {
NvU64 clear_bits[2];
// initialize the memory to a reasonable value
if (push) {
tree->gpu->parent->ce_hal->memset_8(push,
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
// If it is not a PTE, make a clean PDE.
if (dir->depth != tree->hal->page_table_depth(page_size)) {
// make_pde() child index is zero/ignored, since it is only used in
// PDEs on ATS-enabled systems where pde_fill() is preferred.
tree->hal->make_pde(clear_bits, phys_allocs, dir, 0);
// Make sure that using only clear_bits[0] will work.
UVM_ASSERT(tree->hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
}
else {
*clear_bits = 0;
}
// Initialize the memory to a reasonable value.
if (push) {
tree->gpu->parent->ce_hal->memset_8(push,
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
*clear_bits,
dir->phys_alloc.size);
}
else {
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
&dir->phys_alloc,
0,
*clear_bits,
dir->phys_alloc.size);
dir->phys_alloc.size / sizeof(*clear_bits));
}
}
else {
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
&dir->phys_alloc,
0,
*clear_bits,
dir->phys_alloc.size / sizeof(*clear_bits));
pde_fill(tree, dir, 0, entries_count, phys_allocs, push);
}
}
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
@ -367,8 +486,10 @@ static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
NvLength phys_alloc_size = hal->allocation_size(depth, page_size);
uvm_page_directory_t *dir;
// The page tree doesn't cache PTEs so space is not allocated for entries that are always PTEs.
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not page_size.
// The page tree doesn't cache PTEs so space is not allocated for entries
// that are always PTEs.
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not
// page_size.
if (depth == hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC))
entry_count = 0;
else
@ -409,108 +530,6 @@ static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, N
return hal->entries_per_index(depth) * entry_index + hal->entry_offset(depth, page_size);
}
static void pde_fill_cpu(uvm_page_tree_t *tree,
NvU32 depth,
uvm_mmu_page_table_alloc_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr)
{
NvU64 pde_data[2], entry_size;
UVM_ASSERT(uvm_mmu_use_cpu(tree));
entry_size = tree->hal->entry_size(depth);
UVM_ASSERT(sizeof(pde_data) >= entry_size);
tree->hal->make_pde(pde_data, phys_addr, depth);
if (entry_size == sizeof(pde_data[0]))
uvm_mmu_page_table_cpu_memset_8(tree->gpu, directory, start_index, pde_data[0], pde_count);
else
uvm_mmu_page_table_cpu_memset_16(tree->gpu, directory, start_index, pde_data, pde_count);
}
static void pde_fill_gpu(uvm_page_tree_t *tree,
NvU32 depth,
uvm_mmu_page_table_alloc_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr,
uvm_push_t *push)
{
NvU64 pde_data[2], entry_size;
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->addr);
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
entry_size = tree->hal->entry_size(depth);
UVM_ASSERT(sizeof(pde_data) >= entry_size);
tree->hal->make_pde(pde_data, phys_addr, depth);
pde_entry_addr.address += start_index * entry_size;
if (entry_size == sizeof(pde_data[0])) {
tree->gpu->parent->ce_hal->memset_8(push, pde_entry_addr, pde_data[0], sizeof(pde_data[0]) * pde_count);
}
else {
NvU32 max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / sizeof(pde_data);
uvm_gpu_address_t inline_data_addr;
uvm_push_inline_data_t inline_data;
NvU32 membar_flag = 0;
NvU32 i;
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
for (i = 0; i < pde_count;) {
NvU32 j;
NvU32 entry_count = min(pde_count - i, max_inline_entries);
uvm_push_inline_data_begin(push, &inline_data);
for (j = 0; j < entry_count; j++)
uvm_push_inline_data_add(&inline_data, pde_data, sizeof(pde_data));
inline_data_addr = uvm_push_inline_data_end(&inline_data);
// All but the first memcopy can be pipelined. We respect the
// caller's pipelining settings for the first push.
if (i != 0)
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
// No membar is needed until the last copy. Otherwise, use
// caller's membar flag.
if (i + entry_count < pde_count)
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
else if (membar_flag)
uvm_push_set_flag(push, membar_flag);
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * sizeof(pde_data));
i += entry_count;
pde_entry_addr.address += sizeof(pde_data) * entry_count;
}
}
}
// pde_fill() populates pde_count PDE entries (starting at start_index) with
// the same mapping, i.e., with the same physical address (phys_addr).
static void pde_fill(uvm_page_tree_t *tree,
NvU32 depth,
uvm_mmu_page_table_alloc_t *directory,
NvU32 start_index,
NvU32 pde_count,
uvm_mmu_page_table_alloc_t **phys_addr,
uvm_push_t *push)
{
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, depth, UVM_PAGE_SIZE_AGNOSTIC));
if (push)
pde_fill_gpu(tree, depth, directory, start_index, pde_count, phys_addr, push);
else
pde_fill_cpu(tree, depth, directory, start_index, pde_count, phys_addr);
}
static uvm_page_directory_t *host_pde_write(uvm_page_directory_t *dir,
uvm_page_directory_t *parent,
NvU32 index_in_parent)
@ -540,7 +559,7 @@ static void pde_write(uvm_page_tree_t *tree,
phys_allocs[i] = &entry->phys_alloc;
}
pde_fill(tree, dir->depth, &dir->phys_alloc, entry_index, 1, phys_allocs, push);
pde_fill(tree, dir, entry_index, 1, phys_allocs, push);
}
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU32 page_size)
@ -800,7 +819,6 @@ static void free_unused_directories(uvm_page_tree_t *tree,
}
}
}
}
static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm_mmu_page_table_alloc_t *out)
@ -811,10 +829,93 @@ static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm
return phys_mem_allocate(tree, alloc_size, tree->location, UVM_PMM_ALLOC_FLAGS_EVICT, out);
}
static bool page_tree_ats_init_required(uvm_page_tree_t *tree)
{
// We have full control of the kernel page tables mappings, no ATS address
// aliases is expected.
if (tree->type == UVM_PAGE_TREE_TYPE_KERNEL)
return false;
// Enable uvm_page_tree_init() from the page_tree test.
if (uvm_enable_builtin_tests && tree->gpu_va_space == NULL)
return false;
if (!tree->gpu_va_space->ats.enabled)
return false;
return tree->gpu->parent->no_ats_range_required;
}
static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
{
NV_STATUS status;
NvU64 min_va_upper, max_va_lower;
NvU32 page_size;
if (!page_tree_ats_init_required(tree))
return NV_OK;
page_size = uvm_mmu_biggest_page_size(tree);
uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
// Potential violation of the UVM internal get/put_ptes contract. get_ptes()
// creates and initializes enough PTEs to populate all PDEs covering the
// no_ats_ranges. We store the no_ats_ranges in the tree, so they can be
// put_ptes()'ed on deinit(). It doesn't preclude the range to be used by a
// future get_ptes(), since we don't write to the PTEs (range->table) from
// the tree->no_ats_ranges.
//
// Lower half
status = uvm_page_tree_get_ptes(tree,
page_size,
max_va_lower,
page_size,
UVM_PMM_ALLOC_FLAGS_EVICT,
&tree->no_ats_ranges[0]);
if (status != NV_OK)
return status;
UVM_ASSERT(tree->no_ats_ranges[0].entry_count == 1);
if (uvm_platform_uses_canonical_form_address()) {
// Upper half
status = uvm_page_tree_get_ptes(tree,
page_size,
min_va_upper - page_size,
page_size,
UVM_PMM_ALLOC_FLAGS_EVICT,
&tree->no_ats_ranges[1]);
if (status != NV_OK)
return status;
UVM_ASSERT(tree->no_ats_ranges[1].entry_count == 1);
}
return NV_OK;
}
static void page_tree_ats_deinit(uvm_page_tree_t *tree)
{
size_t i;
if (page_tree_ats_init_required(tree)) {
for (i = 0; i < ARRAY_SIZE(tree->no_ats_ranges); i++) {
if (tree->no_ats_ranges[i].entry_count)
uvm_page_tree_put_ptes(tree, &tree->no_ats_ranges[i]);
}
memset(tree->no_ats_ranges, 0, sizeof(tree->no_ats_ranges));
}
}
static void map_remap_deinit(uvm_page_tree_t *tree)
{
if (tree->map_remap.pde0.size)
phys_mem_deallocate(tree, &tree->map_remap.pde0);
if (tree->map_remap.pde0) {
phys_mem_deallocate(tree, &tree->map_remap.pde0->phys_alloc);
uvm_kvfree(tree->map_remap.pde0);
tree->map_remap.pde0 = NULL;
}
if (tree->map_remap.ptes_invalid_4k.size)
phys_mem_deallocate(tree, &tree->map_remap.ptes_invalid_4k);
@ -839,10 +940,16 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
// PDE1-depth(512M) PTE. We first map it to the pde0 directory, then we
// return the PTE for the get_ptes()'s caller.
if (tree->hal->page_sizes() & UVM_PAGE_SIZE_512M) {
status = allocate_page_table(tree, UVM_PAGE_SIZE_2M, &tree->map_remap.pde0);
if (status != NV_OK)
tree->map_remap.pde0 = allocate_directory(tree,
UVM_PAGE_SIZE_2M,
tree->hal->page_table_depth(UVM_PAGE_SIZE_2M),
UVM_PMM_ALLOC_FLAGS_EVICT);
if (tree->map_remap.pde0 == NULL) {
status = NV_ERR_NO_MEMORY;
goto error;
}
}
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "map remap init");
if (status != NV_OK)
goto error;
@ -864,22 +971,23 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
NvU32 depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_4K) - 1;
size_t index_4k = tree->hal->entry_offset(depth, UVM_PAGE_SIZE_4K);
// pde0 depth equals UVM_PAGE_SIZE_2M.
NvU32 pde0_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_2M);
NvU32 pde0_entries = tree->map_remap.pde0.size / tree->hal->entry_size(pde0_depth);
NvU32 pde0_entries = tree->map_remap.pde0->phys_alloc.size / tree->hal->entry_size(tree->map_remap.pde0->depth);
// The big-page entry is NULL which makes it an invalid entry.
phys_allocs[index_4k] = &tree->map_remap.ptes_invalid_4k;
// By default CE operations include a MEMBAR_SYS. MEMBAR_GPU is
// sufficient when pde0 is allocated in VIDMEM.
if (tree->map_remap.pde0.addr.aperture == UVM_APERTURE_VID)
if (tree->map_remap.pde0->phys_alloc.addr.aperture == UVM_APERTURE_VID)
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
// This is an orphan directory, make_pde() requires a directory to
// compute the VA. The UVM depth map_remap() operates on is not in the
// range make_pde() must operate. We only need to supply the fields used
// by make_pde() to not access invalid memory addresses.
pde_fill(tree,
pde0_depth,
&tree->map_remap.pde0,
tree->map_remap.pde0,
0,
pde0_entries,
(uvm_mmu_page_table_alloc_t **)&phys_allocs,
@ -1006,11 +1114,22 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
return status;
phys_mem_init(tree, UVM_PAGE_SIZE_AGNOSTIC, tree->root, &push);
return page_tree_end_and_wait(tree, &push);
status = page_tree_end_and_wait(tree, &push);
if (status != NV_OK)
return status;
status = page_tree_ats_init(tree);
if (status != NV_OK)
return status;
return NV_OK;
}
void uvm_page_tree_deinit(uvm_page_tree_t *tree)
{
page_tree_ats_deinit(tree);
UVM_ASSERT(tree->root->ref_count == 0);
// Take the tree lock only to avoid assertions. It is not required for
@ -1249,7 +1368,6 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
UVM_ASSERT(uvm_gpu_can_address_kernel(tree->gpu, start, size));
while (true) {
// index of the entry, for the first byte of the range, within its
// containing directory
NvU32 start_index;
@ -1281,7 +1399,8 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
if (dir_cache[dir->depth] == NULL) {
*cur_depth = dir->depth;
// Undo the changes to the tree so that the dir cache remains private to the thread
// Undo the changes to the tree so that the dir cache
// remains private to the thread.
for (i = 0; i < used_count; i++)
host_pde_clear(tree, dirs_used[i]->host_parent, dirs_used[i]->index_in_parent, page_size);
@ -1332,10 +1451,9 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
if (uvm_page_table_range_aperture(range) == UVM_APERTURE_VID)
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
phys_alloc[0] = &tree->map_remap.pde0;
phys_alloc[0] = &tree->map_remap.pde0->phys_alloc;
pde_fill(tree,
range->table->depth,
&range->table->phys_alloc,
range->table,
range->start_index,
range->entry_count,
(uvm_mmu_page_table_alloc_t **)&phys_alloc,
@ -1380,7 +1498,8 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
dir_cache)) == NV_ERR_MORE_PROCESSING_REQUIRED) {
uvm_mutex_unlock(&tree->lock);
// try_get_ptes never needs depth 0, so store a directory at its parent's depth
// try_get_ptes never needs depth 0, so store a directory at its
// parent's depth.
// TODO: Bug 1766655: Allocate everything below cur_depth instead of
// retrying for every level.
dir_cache[cur_depth] = allocate_directory(tree, page_size, cur_depth + 1, pmm_flags);
@ -1663,8 +1782,12 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
range);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to get PTEs for subrange %zd [0x%llx, 0x%llx) size 0x%llx, part of [0x%llx, 0x%llx)\n",
i, range_start, range_start + range_size, range_size,
start, size);
i,
range_start,
range_start + range_size,
range_size,
start,
size);
goto out;
}
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -215,11 +215,14 @@ struct uvm_mmu_mode_hal_struct
// memory out-of-range error so we can immediately identify bad PTE usage.
NvU64 (*poisoned_pte)(void);
// write a PDE bit-pattern to entry based on the data in entries (which may
// Write a PDE bit-pattern to entry based on the data in allocs (which may
// point to two items for dual PDEs).
// any of allocs are allowed to be NULL, in which case they are to be
// treated as empty.
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, NvU32 depth);
// Any of allocs are allowed to be NULL, in which case they are to be
// treated as empty. make_pde() uses dir and child_index to compute the
// mapping PDE VA. On ATS-enabled systems, we may set PDE's PCF as
// ATS_ALLOWED or ATS_NOT_ALLOWED based on the mapping PDE VA, even for
// invalid/clean PDE entries.
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, uvm_page_directory_t *dir, NvU32 child_index);
// size of an entry in a directory/table. Generally either 8 or 16 bytes.
// (in the case of Pascal dual PDEs)
@ -229,7 +232,7 @@ struct uvm_mmu_mode_hal_struct
NvU32 (*entries_per_index)(NvU32 depth);
// For dual PDEs, this is ether 1 or 0, depending on the page size.
// This is used to index the host copy only. GPU PDEs are always entirely
// This is used to index the host copy only. GPU PDEs are always entirely
// re-written using make_pde.
NvLength (*entry_offset)(NvU32 depth, NvU32 page_size);
@ -295,11 +298,16 @@ struct uvm_page_tree_struct
// PDE0 where all big-page entries are invalid, and small-page entries
// point to ptes_invalid_4k.
// pde0 is only used on Pascal-Ampere, i.e., they have the same PDE
// format.
uvm_mmu_page_table_alloc_t pde0;
// pde0 is used on Pascal+ GPUs, i.e., they have the same PDE format.
uvm_page_directory_t *pde0;
} map_remap;
// On ATS-enabled systems where the CPU VA width is smaller than the GPU VA
// width, the excess address range is set with ATS_NOT_ALLOWED on all leaf
// PDEs covering that range. We have at most 2 no_ats_ranges, due to
// canonical form address systems.
uvm_page_table_range_t no_ats_ranges[2];
// Tracker for all GPU operations on the tree
uvm_tracker_t tracker;
};
@ -365,21 +373,32 @@ void uvm_page_tree_deinit(uvm_page_tree_t *tree);
// the same page size without an intervening put_ptes. To duplicate a subset of
// an existing range or change the size of an existing range, use
// uvm_page_table_range_get_upper() and/or uvm_page_table_range_shrink().
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 start,
NvLength size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *range);
// Same as uvm_page_tree_get_ptes(), but doesn't synchronize the GPU work.
//
// All pending operations can be waited on with uvm_page_tree_wait().
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 start,
NvLength size,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *range);
// Returns a single-entry page table range for the addresses passed.
// The size parameter must be a page size supported by this tree.
// This is equivalent to calling uvm_page_tree_get_ptes() with size equal to
// page_size.
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start,
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *single);
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
NvU32 page_size,
NvU64 start,
uvm_pmm_alloc_flags_t pmm_flags,
uvm_page_table_range_t *single);
// For a single-entry page table range, write the PDE (which could be a dual
// PDE) to the GPU.
@ -478,8 +497,8 @@ NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
// new_range_vec will contain the upper portion of range_vec, starting at
// new_end + 1.
//
// new_end + 1 is required to be within the address range of range_vec and be aligned to
// range_vec's page_size.
// new_end + 1 is required to be within the address range of range_vec and be
// aligned to range_vec's page_size.
//
// On failure, the original range vector is left unmodified.
NV_STATUS uvm_page_table_range_vec_split_upper(uvm_page_table_range_vec_t *range_vec,
@ -501,18 +520,22 @@ void uvm_page_table_range_vec_destroy(uvm_page_table_range_vec_t *range_vec);
// for each offset.
// The caller_data pointer is what the caller passed in as caller_data to
// uvm_page_table_range_vec_write_ptes().
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec, NvU64 offset,
void *caller_data);
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec,
NvU64 offset,
void *caller_data);
// Write all PTEs covered by the range vector using the given PTE making function.
// Write all PTEs covered by the range vector using the given PTE making
// function.
//
// After writing all the PTEs a TLB invalidate operation is performed including
// the passed in tlb_membar.
//
// See comments about uvm_page_table_range_pte_maker_t for details about the
// PTE making callback.
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec, uvm_membar_t tlb_membar,
uvm_page_table_range_pte_maker_t pte_maker, void *caller_data);
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec,
uvm_membar_t tlb_membar,
uvm_page_table_range_pte_maker_t pte_maker,
void *caller_data);
// Set all PTEs covered by the range vector to an empty PTE
//
@ -636,8 +659,9 @@ static NvU64 uvm_page_table_range_size(uvm_page_table_range_t *range)
// Get the physical address of the entry at entry_index within the range
// (counted from range->start_index).
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree, uvm_page_table_range_t *range,
size_t entry_index)
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree,
uvm_page_table_range_t *range,
size_t entry_index)
{
NvU32 entry_size = uvm_mmu_pte_size(tree, range->page_size);
uvm_gpu_phys_address_t entry = range->table->phys_alloc.addr;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -146,9 +146,15 @@ static void fake_tlb_invals_disable(void)
g_fake_tlb_invals_tracking_enabled = false;
}
// Fake TLB invalidate VA that just saves off the parameters so that they can be verified later
static void fake_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb,
NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
// Fake TLB invalidate VA that just saves off the parameters so that they can be
// verified later.
static void fake_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,
NvU64 base,
NvU64 size,
NvU32 page_size,
uvm_membar_t membar)
{
if (!g_fake_tlb_invals_tracking_enabled)
return;
@ -210,8 +216,8 @@ static bool assert_and_reset_last_invalidate(NvU32 expected_depth, bool expected
}
if ((g_last_fake_inval->membar == UVM_MEMBAR_NONE) == expected_membar) {
UVM_TEST_PRINT("Expected %s membar, got %s instead\n",
expected_membar ? "a" : "no",
uvm_membar_string(g_last_fake_inval->membar));
expected_membar ? "a" : "no",
uvm_membar_string(g_last_fake_inval->membar));
result = false;
}
@ -230,7 +236,8 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
}
if (g_last_fake_inval->base != 0 || g_last_fake_inval->size != -1) {
UVM_TEST_PRINT("Expected invalidate all but got range [0x%llx, 0x%llx) instead\n",
g_last_fake_inval->base, g_last_fake_inval->base + g_last_fake_inval->size);
g_last_fake_inval->base,
g_last_fake_inval->base + g_last_fake_inval->size);
return false;
}
if (g_last_fake_inval->depth != expected_depth) {
@ -247,15 +254,16 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);
if (g_fake_invals_count == 0) {
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n",
base, base + size);
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n", base, base + size);
return false;
}
if ((inval->base != base || inval->size != size) && inval->base != 0 && inval->size != -1) {
UVM_TEST_PRINT("Expected invalidate range [0x%llx, 0x%llx), but got range [0x%llx, 0x%llx) instead\n",
base, base + size,
inval->base, inval->base + inval->size);
base,
base + size,
inval->base,
inval->base + inval->size);
return false;
}
if (inval->depth != expected_depth) {
@ -270,7 +278,13 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
return true;
}
static bool assert_invalidate_range(NvU64 base, NvU64 size, NvU32 page_size, bool allow_inval_all, NvU32 range_depth, NvU32 all_depth, bool expected_membar)
static bool assert_invalidate_range(NvU64 base,
NvU64 size,
NvU32 page_size,
bool allow_inval_all,
NvU32 range_depth,
NvU32 all_depth,
bool expected_membar)
{
NvU32 i;
@ -488,7 +502,6 @@ static NV_STATUS alloc_adjacent_pde_64k_memory(uvm_gpu_t *gpu)
return NV_OK;
}
static NV_STATUS alloc_nearby_pde_64k_memory(uvm_gpu_t *gpu)
{
uvm_page_tree_t tree;
@ -842,6 +855,7 @@ static NV_STATUS get_two_free_apart(uvm_gpu_t *gpu)
TEST_CHECK_RET(range2.entry_count == 256);
TEST_CHECK_RET(range2.table->ref_count == 512);
TEST_CHECK_RET(range1.table == range2.table);
// 4k page is second entry in a dual PDE
TEST_CHECK_RET(range1.table == tree.root->entries[0]->entries[0]->entries[0]->entries[1]);
TEST_CHECK_RET(range1.start_index == 256);
@ -871,6 +885,7 @@ static NV_STATUS get_overlapping_dual_pdes(uvm_gpu_t *gpu)
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, size, size, &range64k), NV_OK);
TEST_CHECK_RET(range64k.entry_count == 16);
TEST_CHECK_RET(range64k.table->ref_count == 16);
// 4k page is second entry in a dual PDE
TEST_CHECK_RET(range64k.table == tree.root->entries[0]->entries[0]->entries[0]->entries[0]);
TEST_CHECK_RET(range64k.start_index == 16);
@ -1030,10 +1045,13 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
// Depth 4
NvU64 extent_pte = UVM_PAGE_SIZE_2M;
// Depth 3
NvU64 extent_pde0 = extent_pte * (1ull << 8);
// Depth 2
NvU64 extent_pde1 = extent_pde0 * (1ull << 9);
// Depth 1
NvU64 extent_pde2 = extent_pde1 * (1ull << 9);
@ -1081,7 +1099,11 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
return status;
}
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree, NvU64 base, NvU64 size, NvU32 min_page_size, NvU32 max_page_size)
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
NvU64 base,
NvU64 size,
NvU32 min_page_size,
NvU32 max_page_size)
{
NV_STATUS status = NV_OK;
uvm_push_t push;
@ -1205,7 +1227,11 @@ static bool assert_range_vec_ptes(uvm_page_table_range_vec_t *range_vec, bool ex
NvU64 expected_pte = expecting_cleared ? 0 : range_vec->size + offset;
if (*pte != expected_pte) {
UVM_TEST_PRINT("PTE is 0x%llx instead of 0x%llx for offset 0x%llx within range [0x%llx, 0x%llx)\n",
*pte, expected_pte, offset, range_vec->start, range_vec->size);
*pte,
expected_pte,
offset,
range_vec->start,
range_vec->size);
return false;
}
offset += range_vec->page_size;
@ -1226,7 +1252,11 @@ static NV_STATUS test_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec
TEST_CHECK_RET(data.status == NV_OK);
TEST_CHECK_RET(data.count == range_vec->size / range_vec->page_size);
TEST_CHECK_RET(assert_invalidate_range_specific(g_last_fake_inval,
range_vec->start, range_vec->size, range_vec->page_size, page_table_depth, membar != UVM_MEMBAR_NONE));
range_vec->start,
range_vec->size,
range_vec->page_size,
page_table_depth,
membar != UVM_MEMBAR_NONE));
TEST_CHECK_RET(assert_range_vec_ptes(range_vec, false));
fake_tlb_invals_disable();
@ -1249,7 +1279,11 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
return NV_OK;
}
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree, NvU64 start, NvU64 size, NvU32 page_size, uvm_page_table_range_vec_t **range_vec_out)
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
NvU64 start,
NvU64 size,
NvU32 page_size,
uvm_page_table_range_vec_t **range_vec_out)
{
uvm_page_table_range_vec_t *range_vec;
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
@ -1544,25 +1578,28 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
uvm_mmu_mode_hal_t *hal;
uvm_page_directory_t dir;
NvU32 i, j, big_page_size, page_size;
dir.depth = 0;
for (i = 0; i < ARRAY_SIZE(big_page_sizes); i++) {
big_page_size = big_page_sizes[i];
hal = gpu->parent->arch_hal->mmu_mode_hal(big_page_size);
memset(phys_allocs, 0, sizeof(phys_allocs));
hal->make_pde(&pde_bits, phys_allocs, 0);
hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits == 0x0L);
phys_allocs[0] = &alloc_sys;
phys_allocs[1] = &alloc_vid;
hal->make_pde(&pde_bits, phys_allocs, 0);
hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits == 0x1BBBBBBD99999992LL);
phys_allocs[0] = &alloc_vid;
phys_allocs[1] = &alloc_sys;
hal->make_pde(&pde_bits, phys_allocs, 0);
hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits == 0x9999999E1BBBBBB1LL);
for (j = 0; j <= 2; j++) {
@ -1632,38 +1669,47 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
uvm_page_directory_t dir;
// big versions have [11:8] set as well to test the page table merging
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBBB00LL);
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
dir.index_in_parent = 0;
dir.host_parent = NULL;
dir.depth = 0;
// Make sure cleared PDEs work as expected
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0);
memset(pde_bits, 0xFF, sizeof(pde_bits));
hal->make_pde(pde_bits, phys_allocs, 3);
dir.depth = 3;
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
// Sys and vidmem PDEs
phys_allocs[0] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 0);
dir.depth = 0;
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
phys_allocs[0] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
// Dual PDEs
phys_allocs[0] = &alloc_big_sys;
phys_allocs[1] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 3);
dir.depth = 3;
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
phys_allocs[0] = &alloc_big_vid;
phys_allocs[1] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 3);
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache. Clear
@ -1719,6 +1765,7 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
uvm_page_directory_t dir;
// big versions have [11:8] set as well to test the page table merging
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
@ -1726,37 +1773,45 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
dir.index_in_parent = 0;
dir.host_parent = NULL;
dir.depth = 0;
// Make sure cleared PDEs work as expected
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0);
memset(pde_bits, 0xFF, sizeof(pde_bits));
hal->make_pde(pde_bits, phys_allocs, 3);
dir.depth = 3;
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
// Sys and vidmem PDEs
phys_allocs[0] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 0);
dir.depth = 0;
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
phys_allocs[0] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 0);
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
// Dual PDEs
phys_allocs[0] = &alloc_big_sys;
phys_allocs[1] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 3);
dir.depth = 3;
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
phys_allocs[0] = &alloc_big_vid;
phys_allocs[1] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 3);
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
// NO_ATS PDE1 (depth 2)
phys_allocs[0] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 2);
dir.depth = 2;
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
if (g_uvm_global.ats.enabled)
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB2A);
else
@ -1791,104 +1846,203 @@ static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func ent
static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
{
NV_STATUS status = NV_OK;
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
NvU64 pde_bits[2];
uvm_page_directory_t *dirs[5];
size_t i, num_page_sizes;
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999000LL);
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBB000LL);
// big versions have [11:8] set as well to test the page table merging
// Big versions have [11:8] set as well to test the page table merging
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999900LL);
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBBB00LL);
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
// Make sure cleared PDEs work as expected
hal->make_pde(pde_bits, phys_allocs, 0);
TEST_CHECK_RET(pde_bits[0] == 0);
memset(dirs, 0, sizeof(dirs));
// Fake directory tree.
for (i = 0; i < ARRAY_SIZE(dirs); i++) {
dirs[i] = uvm_kvmalloc_zero(sizeof(uvm_page_directory_t) + sizeof(dirs[i]->entries[0]) * 512);
TEST_CHECK_GOTO(dirs[i] != NULL, cleanup);
dirs[i]->depth = i;
dirs[i]->index_in_parent = 0;
if (i == 0)
dirs[i]->host_parent = NULL;
else
dirs[i]->host_parent = dirs[i - 1];
}
// Make sure cleared PDEs work as expected.
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
TEST_CHECK_GOTO(pde_bits[0] == 0, cleanup);
// Cleared PDEs work as expected for big and small PDEs.
memset(pde_bits, 0xFF, sizeof(pde_bits));
hal->make_pde(pde_bits, phys_allocs, 4);
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
TEST_CHECK_GOTO(pde_bits[0] == 0 && pde_bits[1] == 0, cleanup);
// Sys and vidmem PDEs, uncached ATS allowed.
phys_allocs[0] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 0);
TEST_CHECK_RET(pde_bits[0] == 0x999999999900C);
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
phys_allocs[0] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 0);
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBB00A);
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBB00A, cleanup);
// Dual PDEs, uncached.
// Dual PDEs, uncached. We don't use child_dir in the depth 4 checks because
// our policy decides the PDE's PCF without using it.
phys_allocs[0] = &alloc_big_sys;
phys_allocs[1] = &alloc_vid;
hal->make_pde(pde_bits, phys_allocs, 4);
TEST_CHECK_RET(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A);
hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
if (g_uvm_global.ats.enabled)
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A, cleanup);
else
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999990C && pde_bits[1] == 0xBBBBBBB00A, cleanup);
phys_allocs[0] = &alloc_big_vid;
phys_allocs[1] = &alloc_sys;
hal->make_pde(pde_bits, phys_allocs, 4);
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C);
hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
if (g_uvm_global.ats.enabled)
TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C, cleanup);
else
TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBBB0A && pde_bits[1] == 0x999999999900C, cleanup);
// We only need to test make_pde() on ATS when the CPU VA width < GPU's.
if (g_uvm_global.ats.enabled && uvm_cpu_num_va_bits() < hal->num_va_bits()) {
phys_allocs[0] = &alloc_sys;
dirs[1]->index_in_parent = 0;
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
dirs[2]->index_in_parent = 0;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
dirs[2]->index_in_parent = 1;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 1);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
dirs[2]->index_in_parent = 2;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 2);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
dirs[2]->index_in_parent = 511;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 511);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
dirs[1]->index_in_parent = 1;
hal->make_pde(pde_bits, phys_allocs, dirs[0], 1);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
dirs[2]->index_in_parent = 0;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
dirs[2]->index_in_parent = 509;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 509);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
dirs[2]->index_in_parent = 510;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 510);
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
phys_allocs[0] = NULL;
dirs[1]->index_in_parent = 0;
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
dirs[2]->index_in_parent = 0;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
dirs[2]->index_in_parent = 2;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 2);
TEST_CHECK_GOTO(pde_bits[0] == 0x10, cleanup);
dirs[1]->index_in_parent = 1;
dirs[2]->index_in_parent = 509;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 509);
TEST_CHECK_GOTO(pde_bits[0] == 0x10, cleanup);
dirs[2]->index_in_parent = 510;
hal->make_pde(pde_bits, phys_allocs, dirs[1], 510);
TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
}
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache, and
// access counters disabled.
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) == 0x999999999968D);
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) == 0x999999999968D,
cleanup);
// change to cached.
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_MMU_PTE_FLAGS_CACHED | UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) ==
0x9999999999685);
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_MMU_PTE_FLAGS_CACHED | UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) ==
0x9999999999685,
cleanup);
// enable access counters.
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999605);
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE_ATOMIC,
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999605,
cleanup);
// remove atomic
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE,
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999645);
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_WRITE,
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999645,
cleanup);
// read only
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999665);
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
0x9999999999000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999665,
cleanup);
// local video
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_VID,
0xBBBBBBB000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0xBBBBBBB661);
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_VID,
0xBBBBBBB000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0xBBBBBBB661,
cleanup);
// peer 1
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_PEER_1,
0xBBBBBBB000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0x200000BBBBBBB663);
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_PEER_1,
0xBBBBBBB000LL,
UVM_PROT_READ_ONLY,
UVM_MMU_PTE_FLAGS_CACHED) == 0x200000BBBBBBB663,
cleanup);
// sparse
TEST_CHECK_RET(hal->make_sparse_pte() == 0x8);
TEST_CHECK_GOTO(hal->make_sparse_pte() == 0x8, cleanup);
// sked reflected
TEST_CHECK_RET(hal->make_sked_reflected_pte() == 0xF09);
TEST_CHECK_GOTO(hal->make_sked_reflected_pte() == 0xF09, cleanup);
num_page_sizes = get_page_sizes(gpu, page_sizes);
for (i = 0; i < num_page_sizes; i++)
TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup);
return NV_OK;
cleanup:
for (i = 0; i < ARRAY_SIZE(dirs); i++)
uvm_kvfree(dirs[i]);
return status;
}
static NV_STATUS alloc_4k_maxwell(uvm_gpu_t *gpu)
@ -2303,7 +2457,8 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
gpu->parent = parent_gpu;
// At least test_tlb_invalidates() relies on global state
// (g_tlb_invalidate_*) so make sure only one test instance can run at a time.
// (g_tlb_invalidate_*) so make sure only one test instance can run at a
// time.
uvm_mutex_lock(&g_uvm_global.global_lock);
// Allocate the fake TLB tracking state. Notably tests still need to enable
@ -2311,7 +2466,13 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
// calls.
TEST_NV_CHECK_GOTO(fake_tlb_invals_alloc(), done);
TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
// We prevent the maxwell_test_page_tree test from running on ATS-enabled
// systems. On "fake" Maxwell-based ATS systems pde_fill() may push more
// methods than what we support in UVM. Specifically, on
// uvm_page_tree_init() which eventually calls phys_mem_init(). On Maxwell,
// upper PDE levels have more than 512 entries.
if (!g_uvm_global.ats.enabled)
TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(pascal_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done);
TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2020 NVIDIA Corporation
Copyright (c) 2016-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -100,4 +100,6 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->smc.supported = false;
parent_gpu->plc_supported = false;
parent_gpu->no_ats_range_required = false;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2020 NVIDIA Corporation
Copyright (c) 2015-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -140,11 +140,18 @@ static NvU64 small_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc)
return pde_bits;
}
static void make_pde_pascal(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
static void make_pde_pascal(void *entry,
uvm_mmu_page_table_alloc_t **phys_allocs,
uvm_page_directory_t *dir,
NvU32 child_index)
{
NvU32 entry_count = entries_per_index_pascal(depth);
NvU32 entry_count;
NvU64 *entry_bits = (NvU64 *)entry;
UVM_ASSERT(dir);
entry_count = entries_per_index_pascal(dir->depth);
if (entry_count == 1) {
*entry_bits = single_pde_pascal(*phys_allocs);
}
@ -152,7 +159,8 @@ static void make_pde_pascal(void *entry, uvm_mmu_page_table_alloc_t **phys_alloc
entry_bits[MMU_BIG] = big_half_pde_pascal(phys_allocs[MMU_BIG]);
entry_bits[MMU_SMALL] = small_half_pde_pascal(phys_allocs[MMU_SMALL]);
// This entry applies to the whole dual PDE but is stored in the lower bits
// This entry applies to the whole dual PDE but is stored in the lower
// bits.
entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER2, DUAL_PDE, IS_PDE, TRUE);
}
else {

View File

@ -36,6 +36,7 @@
#include "uvm_mmu.h"
#include "uvm_gpu_access_counters.h"
#include "uvm_pmm_sysmem.h"
#include "uvm_migrate_pageable.h"
static NV_STATUS uvm_test_get_gpu_ref_count(UVM_TEST_GET_GPU_REF_COUNT_PARAMS *params, struct file *filp)
{
@ -331,6 +332,7 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED, uvm_test_cgroup_accounting_supported);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SPLIT_INVALIDATE_DELAY, uvm_test_split_invalidate_delay);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_CPU_CHUNK_API, uvm_test_cpu_chunk_api);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SKIP_MIGRATE_VMA, uvm_test_skip_migrate_vma);
}
return -EINVAL;

View File

@ -28,6 +28,13 @@
#include "uvm_ioctl.h"
#include "nv_uvm_types.h"
#define UVM_TEST_SKIP_MIGRATE_VMA UVM_TEST_IOCTL_BASE(103)
typedef struct
{
NvBool skip; // In
NV_STATUS rmStatus; // Out
} UVM_TEST_SKIP_MIGRATE_VMA_PARAMS;
#ifdef __cplusplus
extern "C" {
#endif

View File

@ -1082,25 +1082,19 @@ void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
}
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu,
NvU32 batch_id,
uvm_fault_client_type_t client_type)
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type)
{
UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
return;
record_replay_event_helper(gpu->id,
batch_id,
client_type,
NV_GETTIME(),
gpu->parent->host_hal->get_time(gpu));
record_replay_event_helper(gpu->id, batch_id, client_type, NV_GETTIME(), gpu->parent->host_hal->get_time(gpu));
}
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed)
bool on_managed_phys)
{
UvmEventEntry entry;
UvmEventTestAccessCounterInfo *info = &entry.testEventData.accessCounter;
@ -1119,6 +1113,7 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
info->srcIndex = uvm_id_value(gpu->id);
info->address = buffer_entry->address.address;
info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
if (buffer_entry->address.is_virtual) {
info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
@ -1126,9 +1121,10 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
}
else {
info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
info->physOnManaged = on_managed_phys? 1 : 0;
}
info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
info->onManaged = on_managed? 1 : 0;
info->value = buffer_entry->counter_value;
info->subGranularity = buffer_entry->sub_granularity;
info->bank = buffer_entry->bank;

View File

@ -102,18 +102,13 @@ void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
uvm_va_block_region_t region,
const uvm_page_mask_t *page_mask);
void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
uvm_push_t *push,
NvU32 batch_id,
uvm_fault_client_type_t client_type);
void uvm_tools_broadcast_replay(uvm_gpu_t *gpu, uvm_push_t *push, NvU32 batch_id, uvm_fault_client_type_t client_type);
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu,
NvU32 batch_id,
uvm_fault_client_type_t client_type);
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type);
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed);
bool on_managed_phys);
void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2021 NVIDIA Corporation
Copyright (c) 2017-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -93,4 +93,6 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->smc.supported = false;
parent_gpu->plc_supported = true;
parent_gpu->no_ats_range_required = false;
}

View File

@ -967,8 +967,10 @@ typedef struct
NvU8 isFromCpu;
NvU8 veId;
NvU8 onManaged; // The access counter notification was triggered on
// a managed memory region
// The physical access counter notification was triggered on a managed
// memory region. This is not set for virtual access counter notifications.
NvU8 physOnManaged;
NvU32 value;
NvU32 subGranularity;

View File

@ -1760,6 +1760,21 @@ static NvU32 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
return (NvU32)chunk_size;
}
NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
uvm_processor_id_t processor,
uvm_page_index_t page_index)
{
block_phys_page_t page;
UVM_ASSERT(block);
uvm_assert_mutex_locked(&block->lock);
page = block_phys_page(processor, page_index);
return block_phys_page_size(block, page);
}
static uvm_pte_bits_cpu_t get_cpu_pte_bit_index(uvm_prot_t prot)
{
uvm_pte_bits_cpu_t pte_bit_index = UVM_PTE_BITS_CPU_MAX;
@ -8248,14 +8263,6 @@ void uvm_va_block_munmap_region(uvm_va_block_t *va_block,
event_data.block_munmap.region = region;
uvm_perf_event_notify(&va_space->perf_events, UVM_PERF_EVENT_BLOCK_MUNMAP, &event_data);
// Set a flag so that GPU fault events are flushed since they might refer
// to the region being unmapped.
// Note that holding the va_block lock prevents GPU VA spaces from
// being removed so the registered_gpu_va_spaces mask is stable.
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
}
// Release any remaining vidmem chunks in the given region.
for_each_gpu_id(gpu_id) {
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu_id);
@ -10155,6 +10162,34 @@ static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(preferred_location)], processor_id))
return preferred_location;
// Check if we should map the closest resident processor remotely on remote CPU fault
//
// When faulting on CPU, there's a linux process on behalf of it, which is associated
// with a unique VM pointed by current->mm. A block of memory residing on GPU is also
// associated with VM, pointed by va_block_context->mm. If they match, it's a regular
// (local) fault, and we may want to migrate a page from GPU to CPU.
// If it's a 'remote' fault, i.e. linux process differs from one associated with block
// VM, we might preserve residence.
//
// Establishing a remote fault without access counters means the memory could stay in
// the wrong spot for a long time, which is why we prefer to avoid creating remote
// mappings. However when NIC accesses a memory residing on GPU, it's worth to keep it
// in place for NIC accesses.
//
// The logic that's used to detect remote faulting also keeps memory in place for
// ptrace accesses. We would prefer to control those policies separately, but the
// NIC case takes priority.
// If the accessing processor is CPU, we're either handling a fault
// from other than owning process, or we're handling an MOMC
// notification. Only prevent migration for the former.
if (UVM_ID_IS_CPU(processor_id) &&
operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS &&
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(closest_resident_processor)], processor_id) &&
va_block_context->mm != current->mm) {
UVM_ASSERT(va_block_context->mm != NULL);
return closest_resident_processor;
}
// If the page is resident on a processor other than the preferred location,
// or the faulting processor can't access the preferred location, we select
// the faulting processor as the new residency.
@ -10713,7 +10748,7 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index,
uvm_fault_type_t access_type,
uvm_fault_access_type_t access_type,
bool allow_migration)
{
uvm_va_range_t *va_range = va_block->va_range;

View File

@ -1000,7 +1000,7 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_processor_id_t processor_id,
uvm_page_index_t page_index,
uvm_fault_type_t access_type,
uvm_fault_access_type_t access_type,
bool allow_migration);
// API for access privilege revocation
@ -2072,6 +2072,14 @@ void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
// Locking: The va_block lock must be held.
void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_region_t region);
// Get the size of the physical allocation backing the page at page_index on the
// specified processor in the block. Returns 0 if the address is not resident on
// the specified processor.
// Locking: The va_block lock must be held.
NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
uvm_processor_id_t processor,
uvm_page_index_t page_index);
// Get CPU page size or 0 if it is not mapped
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
uvm_page_index_t page_index);

View File

@ -193,7 +193,8 @@ uvm_va_policy_node_t *uvm_va_policy_node_iter_next(uvm_va_block_t *va_block, uvm
for ((node) = uvm_va_policy_node_iter_first((va_block), (start), (end)), \
(next) = uvm_va_policy_node_iter_next((va_block), (node), (end)); \
(node); \
(node) = (next))
(node) = (next), \
(next) = uvm_va_policy_node_iter_next((va_block), (node), (end)))
// Returns the first policy in the range [start, end], if any.
// Locking: The va_block lock must be held.

View File

@ -1540,7 +1540,6 @@ static void remove_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space,
atomic_inc(&va_space->gpu_va_space_deferred_free.num_pending);
uvm_processor_mask_clear(&va_space->registered_gpu_va_spaces, gpu_va_space->gpu->id);
uvm_processor_mask_clear_atomic(&va_space->needs_fault_buffer_flush, gpu_va_space->gpu->id);
va_space->gpu_va_spaces[uvm_id_gpu_index(gpu_va_space->gpu->id)] = NULL;
gpu_va_space->state = UVM_GPU_VA_SPACE_STATE_DEAD;
}

View File

@ -253,17 +253,6 @@ struct uvm_va_space_struct
// corrupting state.
uvm_processor_mask_t gpu_unregister_in_progress;
// On VMA destruction, the fault buffer needs to be flushed for all the GPUs
// registered in the VA space to avoid leaving stale entries of the VA range
// that is going to be destroyed. Otherwise, these fault entries can be
// attributed to new VA ranges reallocated at the same addresses. However,
// uvm_vm_close is called with mm->mmap_lock taken and we cannot take the
// ISR lock. Therefore, we use a flag to notify the GPU fault handler that
// the fault buffer needs to be flushed, before servicing the faults that
// belong to the va_space. The bits are set and cleared atomically so no
// va_space lock is required.
uvm_processor_mask_t needs_fault_buffer_flush;
// Mask of processors that are participating in system-wide atomics
uvm_processor_mask_t system_wide_atomics_enabled_processors;
@ -353,6 +342,7 @@ struct uvm_va_space_struct
struct
{
bool page_prefetch_enabled;
bool skip_migrate_vma;
atomic_t migrate_vma_allocation_fail_nth;

View File

@ -215,7 +215,13 @@ bool uvm_va_space_mm_enabled(uvm_va_space_t *va_space)
static struct mmu_notifier_ops uvm_mmu_notifier_ops_ats =
{
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
.invalidate_range = uvm_mmu_notifier_invalidate_range_ats,
#elif defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
.arch_invalidate_secondary_tlbs = uvm_mmu_notifier_invalidate_range_ats,
#else
#error One of invalidate_range/arch_invalid_secondary must be present
#endif
};
static int uvm_mmu_notifier_register(uvm_va_space_mm_t *va_space_mm)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -98,4 +98,6 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->smc.supported = false;
parent_gpu->plc_supported = false;
parent_gpu->no_ats_range_required = false;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2021 NVIDIA Corporation
Copyright (c) 2017-2023 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -145,13 +145,20 @@ static NvU64 small_half_pde_volta(uvm_mmu_page_table_alloc_t *phys_alloc)
return pde_bits;
}
static void make_pde_volta(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
static void make_pde_volta(void *entry,
uvm_mmu_page_table_alloc_t **phys_allocs,
uvm_page_directory_t *dir,
NvU32 child_index)
{
NvU32 entry_count = entries_per_index_volta(depth);
NvU32 entry_count;
NvU64 *entry_bits = (NvU64 *)entry;
UVM_ASSERT(dir);
entry_count = entries_per_index_volta(dir->depth);
if (entry_count == 1) {
*entry_bits = single_pde_volta(*phys_allocs, depth);
*entry_bits = single_pde_volta(*phys_allocs, dir->depth);
}
else if (entry_count == 2) {
entry_bits[MMU_BIG] = big_half_pde_volta(phys_allocs[MMU_BIG]);

View File

@ -23,10 +23,16 @@
#include "internal_crypt_lib.h"
#ifdef USE_LKCA
#ifndef NV_CRYPTO_TFM_CTX_ALIGNED_PRESENT
#include <crypto/internal/hash.h>
#endif
#endif
void *lkca_hash_new(const char* alg_name)
{
#ifndef USE_LKCA
return false;
return NULL;
#else
//XXX: can we reuse crypto_shash part and just allocate desc
struct crypto_shash *alg;
@ -87,9 +93,24 @@ bool lkca_hmac_duplicate(struct shash_desc *dst, struct shash_desc const *src)
struct crypto_shash *src_tfm = src->tfm;
struct crypto_shash *dst_tfm = dst->tfm;
int ss = crypto_shash_statesize(dst_tfm);
#ifdef NV_CRYPTO_TFM_CTX_ALIGNED_PRESENT
char *src_ipad = crypto_tfm_ctx_aligned(&src_tfm->base);
char *dst_ipad = crypto_tfm_ctx_aligned(&dst_tfm->base);
int ss = crypto_shash_statesize(dst_tfm);
#else
int ctx_size = crypto_shash_alg(dst_tfm)->base.cra_ctxsize;
char *src_ipad = crypto_shash_ctx(src_tfm);
char *dst_ipad = crypto_shash_ctx(dst_tfm);
/*
* Actual struct definition is hidden, so I assume data we need is at
* the end. In 6.0 the struct has a pointer to crpyto_shash followed by:
* 'u8 ipad[statesize];', then 'u8 opad[statesize];'
*/
src_ipad += ctx_size - 2 * ss;
dst_ipad += ctx_size - 2 * ss;
#endif
memcpy(dst_ipad, src_ipad, crypto_shash_blocksize(src->tfm));
memcpy(dst_ipad + ss, src_ipad + ss, crypto_shash_blocksize(src->tfm));
crypto_shash_clear_flags(dst->tfm, CRYPTO_TFM_NEED_KEY);

View File

@ -156,7 +156,7 @@ NvS32 NV_API_CALL nv_request_msix_irq(nv_linux_state_t *nvl)
{
for( j = 0; j < i; j++)
{
free_irq(nvl->msix_entries[i].vector, (void *)nvl);
free_irq(nvl->msix_entries[j].vector, (void *)nvl);
}
break;
}

View File

@ -316,14 +316,14 @@ int nvidia_p2p_init_mapping(
return -ENOTSUPP;
}
EXPORT_SYMBOL(nvidia_p2p_init_mapping);
NV_EXPORT_SYMBOL(nvidia_p2p_init_mapping);
int nvidia_p2p_destroy_mapping(uint64_t p2p_token)
{
return -ENOTSUPP;
}
EXPORT_SYMBOL(nvidia_p2p_destroy_mapping);
NV_EXPORT_SYMBOL(nvidia_p2p_destroy_mapping);
static void nv_p2p_mem_info_free_callback(void *data)
{
@ -506,8 +506,13 @@ static int nv_p2p_get_pages(
(*page_table)->page_size = page_size_index;
os_free_mem(physical_addresses);
physical_addresses = NULL;
os_free_mem(wreqmb_h);
wreqmb_h = NULL;
os_free_mem(rreqmb_h);
rreqmb_h = NULL;
if (free_callback != NULL)
{
@ -582,7 +587,7 @@ int nvidia_p2p_get_pages(
p2p_token, va_space, virtual_address,
length, page_table, free_callback, data);
}
EXPORT_SYMBOL(nvidia_p2p_get_pages);
NV_EXPORT_SYMBOL(nvidia_p2p_get_pages);
int nvidia_p2p_get_pages_persistent(
uint64_t virtual_address,
@ -600,7 +605,7 @@ int nvidia_p2p_get_pages_persistent(
virtual_address, length, page_table,
NULL, NULL);
}
EXPORT_SYMBOL(nvidia_p2p_get_pages_persistent);
NV_EXPORT_SYMBOL(nvidia_p2p_get_pages_persistent);
/*
* This function is a no-op, but is left in place (for now), in order to allow
@ -613,7 +618,7 @@ int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table)
return 0;
}
EXPORT_SYMBOL(nvidia_p2p_free_page_table);
NV_EXPORT_SYMBOL(nvidia_p2p_free_page_table);
int nvidia_p2p_put_pages(
uint64_t p2p_token,
@ -645,7 +650,7 @@ int nvidia_p2p_put_pages(
return nvidia_p2p_map_status(status);
}
EXPORT_SYMBOL(nvidia_p2p_put_pages);
NV_EXPORT_SYMBOL(nvidia_p2p_put_pages);
int nvidia_p2p_put_pages_persistent(
uint64_t virtual_address,
@ -685,7 +690,7 @@ int nvidia_p2p_put_pages_persistent(
return nvidia_p2p_map_status(status);
}
EXPORT_SYMBOL(nvidia_p2p_put_pages_persistent);
NV_EXPORT_SYMBOL(nvidia_p2p_put_pages_persistent);
int nvidia_p2p_dma_map_pages(
struct pci_dev *peer,
@ -800,7 +805,7 @@ failed:
return nvidia_p2p_map_status(status);
}
EXPORT_SYMBOL(nvidia_p2p_dma_map_pages);
NV_EXPORT_SYMBOL(nvidia_p2p_dma_map_pages);
int nvidia_p2p_dma_unmap_pages(
struct pci_dev *peer,
@ -840,7 +845,7 @@ int nvidia_p2p_dma_unmap_pages(
return 0;
}
EXPORT_SYMBOL(nvidia_p2p_dma_unmap_pages);
NV_EXPORT_SYMBOL(nvidia_p2p_dma_unmap_pages);
/*
* This function is a no-op, but is left in place (for now), in order to allow
@ -855,7 +860,7 @@ int nvidia_p2p_free_dma_mapping(
return 0;
}
EXPORT_SYMBOL(nvidia_p2p_free_dma_mapping);
NV_EXPORT_SYMBOL(nvidia_p2p_free_dma_mapping);
int nvidia_p2p_register_rsync_driver(
nvidia_p2p_rsync_driver_t *driver,
@ -884,7 +889,7 @@ int nvidia_p2p_register_rsync_driver(
driver->wait_for_rsync, data);
}
EXPORT_SYMBOL(nvidia_p2p_register_rsync_driver);
NV_EXPORT_SYMBOL(nvidia_p2p_register_rsync_driver);
void nvidia_p2p_unregister_rsync_driver(
nvidia_p2p_rsync_driver_t *driver,
@ -916,7 +921,7 @@ void nvidia_p2p_unregister_rsync_driver(
driver->wait_for_rsync, data);
}
EXPORT_SYMBOL(nvidia_p2p_unregister_rsync_driver);
NV_EXPORT_SYMBOL(nvidia_p2p_unregister_rsync_driver);
int nvidia_p2p_get_rsync_registers(
nvidia_p2p_rsync_reg_info_t **reg_info
@ -1009,7 +1014,7 @@ int nvidia_p2p_get_rsync_registers(
return 0;
}
EXPORT_SYMBOL(nvidia_p2p_get_rsync_registers);
NV_EXPORT_SYMBOL(nvidia_p2p_get_rsync_registers);
void nvidia_p2p_put_rsync_registers(
nvidia_p2p_rsync_reg_info_t *reg_info
@ -1041,4 +1046,4 @@ void nvidia_p2p_put_rsync_registers(
os_free_mem(reg_info);
}
EXPORT_SYMBOL(nvidia_p2p_put_rsync_registers);
NV_EXPORT_SYMBOL(nvidia_p2p_put_rsync_registers);

View File

@ -1224,12 +1224,11 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
rm_read_registry_dword(sp, nv, NV_REG_ENABLE_MSI, &msi_config);
if (msi_config == 1)
{
if (pci_find_capability(nvl->pci_dev, PCI_CAP_ID_MSIX))
if (nvl->pci_dev->msix_cap && rm_is_msix_allowed(sp, nv))
{
nv_init_msix(nv);
}
if (pci_find_capability(nvl->pci_dev, PCI_CAP_ID_MSI) &&
!(nv->flags & NV_FLAG_USES_MSIX))
if (nvl->pci_dev->msi_cap && !(nv->flags & NV_FLAG_USES_MSIX))
{
nv_init_msi(nv);
}

View File

@ -195,6 +195,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += devm_clk_bulk_get_all
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_task_ioprio
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mdev_set_iommu_device
NV_CONFTEST_FUNCTION_COMPILE_TESTS += offline_and_remove_memory
NV_CONFTEST_FUNCTION_COMPILE_TESTS += crypto_tfm_ctx_aligned
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_of_node_to_nid
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_sme_active
@ -215,6 +216,7 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_get_dram_num_channe
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_dram_types
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_pxm_to_node
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_screen_info
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_screen_info
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_i2c_bus_status
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_fuse_control_read
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_get_platform

View File

@ -46,6 +46,11 @@ NvlStatus nvlink_lib_unload(void);
*/
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
#ifdef __cplusplus
}
#endif

View File

@ -28,6 +28,11 @@
#include "nv-time.h"
#include <linux/mmzone.h>
#include <linux/numa.h>
#include <linux/pid.h>
extern char *NVreg_TemporaryFilePath;
#define MAX_ERROR_STRING 512
@ -1242,9 +1247,12 @@ void NV_API_CALL os_get_screen_info(
* SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
* information required by os_get_screen_info(), therefore you need to
* fall back onto the screen_info structure.
*
* After commit b8466fe82b79 ("efi: move screen_info into efi init code")
* in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
*/
#if NV_IS_EXPORT_SYMBOL_PRESENT_screen_info
#if NV_CHECK_EXPORT_SYMBOL(screen_info)
/*
* If there is not a framebuffer console, return 0 size.
*
@ -2122,6 +2130,43 @@ void NV_API_CALL os_nv_cap_close_fd
nv_cap_close_fd(fd);
}
/*
* Reads the total memory and free memory of a NUMA node from the kernel.
*/
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage
(
NvS32 node_id,
NvU64 *free_memory_bytes,
NvU64 *total_memory_bytes
)
{
struct pglist_data *pgdat;
struct zone *zone;
NvU32 zone_id;
if (node_id >= MAX_NUMNODES)
{
nv_printf(NV_DBG_ERRORS, "Invalid NUMA node ID\n");
return NV_ERR_INVALID_ARGUMENT;
}
pgdat = NODE_DATA(node_id);
*free_memory_bytes = 0;
*total_memory_bytes = 0;
for (zone_id = 0; zone_id < MAX_NR_ZONES; zone_id++)
{
zone = &(pgdat->node_zones[zone_id]);
if (!populated_zone(zone))
continue;
*free_memory_bytes += (zone_page_state_snapshot(zone, NR_FREE_PAGES) * PAGE_SIZE);
*total_memory_bytes += (zone->present_pages * PAGE_SIZE);
}
return NV_OK;
}
typedef struct os_numa_gpu_mem_hotplug_notifier_s
{
NvU64 start_pa;
@ -2373,3 +2418,28 @@ NV_STATUS NV_API_CALL os_offline_page_at_address
#endif
}
void* NV_API_CALL os_get_pid_info(void)
{
return get_task_pid(current, PIDTYPE_PID);
}
void NV_API_CALL os_put_pid_info(void *pid_info)
{
if (pid_info != NULL)
put_pid(pid_info);
}
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid)
{
if ((pid_info == NULL) || (ns_pid == NULL))
return NV_ERR_INVALID_ARGUMENT;
*ns_pid = pid_vnr((struct pid *)pid_info);
// The call returns 0 if the PID is not found in the current ns
if (*ns_pid == 0)
return NV_ERR_OBJECT_NOT_FOUND;
return NV_OK;
}

View File

@ -1360,7 +1360,7 @@ bool ConnectorImpl::compoundQueryAttach(Group * target,
if (dev->pconCaps.maxHdmiLinkBandwidthGbps != 0)
{
NvU64 requiredBW = (NvU64)(modesetParams.modesetInfo.pixelClockHz * modesetParams.modesetInfo.depth);
NvU64 availableBw = (NvU64)(dev->pconCaps.maxHdmiLinkBandwidthGbps * 1000000000);
NvU64 availableBw = (NvU64)(dev->pconCaps.maxHdmiLinkBandwidthGbps * (NvU64)1000000000);
if (requiredBW > availableBw)
{
compoundQueryResult = false;
@ -1375,10 +1375,10 @@ bool ConnectorImpl::compoundQueryAttach(Group * target,
else if (dev->pconCaps.maxTmdsClkRate != 0)
{
NvU64 maxTmdsClkRateU64 = (NvU64)(dev->pconCaps.maxTmdsClkRate);
NvU64 requireBw = (NvU64)(modesetParams.modesetInfo.pixelClockHz * modesetParams.modesetInfo.depth);
NvU64 requiredBw = (NvU64)(modesetParams.modesetInfo.pixelClockHz * modesetParams.modesetInfo.depth);
if (modesetParams.colorFormat == dpColorFormat_YCbCr420)
{
if (maxTmdsClkRateU64 < ((requireBw/24)/2))
if (maxTmdsClkRateU64 < ((requiredBw/24)/2))
{
compoundQueryResult = false;
return false;
@ -1386,7 +1386,7 @@ bool ConnectorImpl::compoundQueryAttach(Group * target,
}
else
{
if (maxTmdsClkRateU64 < (requireBw/24))
if (maxTmdsClkRateU64 < (requiredBw/24))
{
compoundQueryResult = false;
return false;
@ -4740,7 +4740,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
{
LinkTrainingType preferredTrainingType = trainType;
bool result;
bool bEnableFecOnSor;
//
// Validate link config against caps
//
@ -4832,16 +4832,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
result = postLTAdjustment(activeLinkConfig, force);
}
bEnableFecOnSor = lConfig.bEnableFEC;
if (main->isEDP())
{
DeviceImpl * nativeDev = findDeviceInList(Address());
if (nativeDev && nativeDev->bIsPreviouslyFakedMuxDevice)
bEnableFecOnSor = activeLinkConfig.bEnableFEC;
}
if((lConfig.lanes != 0) && result && bEnableFecOnSor)
if((lConfig.lanes != 0) && result && activeLinkConfig.bEnableFEC)
{
//
// Extended latency from link-train end to FEC enable pattern
@ -6057,7 +6048,7 @@ void ConnectorImpl::notifyLongPulseInternal(bool statusConnected)
if (this->bReassessMaxLink)
{
//
// If the highest assessed LC is not equal to
// If the highest assessed LC is not equal to
// max possible link config, re-assess link
//
NvU8 retries = 0U;

View File

@ -43,18 +43,18 @@
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/VK535_87-147"
#define NV_BUILD_CHANGELIST_NUM (33800935)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/VK535_87-148"
#define NV_BUILD_CHANGELIST_NUM (33833102)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r535/VK535_87-147"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33800935)
#define NV_BUILD_NAME "rel/gpu_drv/r535/VK535_87-148"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33833102)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "VK535_87-24"
#define NV_BUILD_CHANGELIST_NUM (33800935)
#define NV_BUILD_BRANCH_VERSION "VK535_87-25"
#define NV_BUILD_CHANGELIST_NUM (33833102)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "538.31"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33800935)
#define NV_BUILD_NAME "538.35"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33833102)
#define NV_BUILD_BRANCH_BASE_VERSION R535
#endif
// End buildmeister python edited section

View File

@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "535.43.23"
#define NV_VERSION_STRING "535.43.24"
#else

View File

@ -3,7 +3,7 @@
#define NV_COMPANY_NAME_STRING_SHORT "NVIDIA"
#define NV_COMPANY_NAME_STRING_FULL "NVIDIA Corporation"
#define NV_COMPANY_NAME_STRING NV_COMPANY_NAME_STRING_FULL
#define NV_COPYRIGHT_YEAR "2023"
#define NV_COPYRIGHT_YEAR "2024"
#define NV_COPYRIGHT "(C) " NV_COPYRIGHT_YEAR " NVIDIA Corporation. All rights reserved." // Please do not use the non-ascii copyright symbol for (C).
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \

View File

@ -39,48 +39,63 @@ extern "C" {
#endif //NV_UNIX
#endif //!__cplusplus
// Surprise removal capable TB3 and TB2 BUS Device ID
#define BUS_DEVICE_ID_TB3_ALPINE_RIDGE_01 0x1578
#define BUS_DEVICE_ID_TB3_02 0x1576
#define BUS_DEVICE_ID_TB3_03 0x15C0
#define BUS_DEVICE_ID_TB3_04 0x15D3
#define BUS_DEVICE_ID_TB3_05 0x15DA
#define BUS_DEVICE_ID_TB3_06 0x15EA
#define BUS_DEVICE_ID_TB3_07 0x15E7
#define BUS_DEVICE_ID_TB3_08 0x15EF
#define BUS_DEVICE_ID_TB3_09 0x1133
#define BUS_DEVICE_ID_TB3_10 0x1136
#define PARENT_EGPU_BUS_DEVICE_43 0x57A4
#define PARENT_EGPU_BUS_DEVICE_42 0x5786
#define PARENT_EGPU_BUS_DEVICE_41 0x1578
#define PARENT_EGPU_BUS_DEVICE_40 0x1576
#define PARENT_EGPU_BUS_DEVICE_39 0x15C0
#define PARENT_EGPU_BUS_DEVICE_38 0x15D3
#define PARENT_EGPU_BUS_DEVICE_37 0x15DA
#define PARENT_EGPU_BUS_DEVICE_36 0x15EA
#define PARENT_EGPU_BUS_DEVICE_35 0x15E7
#define PARENT_EGPU_BUS_DEVICE_34 0x15EF
#define PARENT_EGPU_BUS_DEVICE_33 0x1133
#define PARENT_EGPU_BUS_DEVICE_32 0x1136
// IceLake-U TB3 device ids. Below TB3 would be integrated to CPU.
#define BUS_DEVICE_ID_ICELAKE_TB3_01 0x8A1D
#define BUS_DEVICE_ID_ICELAKE_TB3_02 0x8A1F
#define BUS_DEVICE_ID_ICELAKE_TB3_03 0x8A21
#define BUS_DEVICE_ID_ICELAKE_TB3_04 0x8A23
#define BUS_DEVICE_ID_ICELAKE_TB3_05 0x8A0D
#define BUS_DEVICE_ID_ICELAKE_TB3_06 0x8A17
#define PARENT_EGPU_BUS_DEVICE_31 0x8A1D
#define PARENT_EGPU_BUS_DEVICE_30 0x8A1F
#define PARENT_EGPU_BUS_DEVICE_29 0x8A21
#define PARENT_EGPU_BUS_DEVICE_28 0x8A23
#define PARENT_EGPU_BUS_DEVICE_27 0x8A0D
#define PARENT_EGPU_BUS_DEVICE_26 0x8A17
// TigerLake Thunderbolt device ids.
#define BUS_DEVICE_ID_TIGERLAKE_TB3_01 0x9A1B
#define BUS_DEVICE_ID_TIGERLAKE_TB3_02 0x9A1D
#define BUS_DEVICE_ID_TIGERLAKE_TB3_03 0x9A1F
#define BUS_DEVICE_ID_TIGERLAKE_TB3_04 0x9A21
#define BUS_DEVICE_ID_TIGERLAKE_TB3_05 0x9A23
#define BUS_DEVICE_ID_TIGERLAKE_TB3_06 0x9A25
#define BUS_DEVICE_ID_TIGERLAKE_TB3_07 0x9A27
#define BUS_DEVICE_ID_TIGERLAKE_TB3_08 0x9A29
#define BUS_DEVICE_ID_TIGERLAKE_TB3_09 0x9A2B
#define BUS_DEVICE_ID_TIGERLAKE_TB3_10 0x9A2D
//#define BUS_DEVICE_ID_TB2_FALCON_RIDGE_DSL5520_01 0X156C // obsolete
#define BUS_DEVICE_ID_TB2_FALCON_RIDGE_DSL5520_02 0X156D
#define BUS_DEVICE_ID_TB2_03 0x157E
#define BUS_DEVICE_ID_TB2_04 0x156B
#define BUS_DEVICE_ID_TB2_05 0x1567
#define BUS_DEVICE_ID_TB2_06 0x1569
//#define BUS_DEVICE_ID_TB2_07 0x1548 // obsolete
#define BUS_DEVICE_ID_TB2_08 0x151B
#define BUS_DEVICE_ID_TB2_09 0x1549
#define BUS_DEVICE_ID_TB2_10 0x1513
#define PARENT_EGPU_BUS_DEVICE_25 0x9A1B
#define PARENT_EGPU_BUS_DEVICE_24 0x9A1D
#define PARENT_EGPU_BUS_DEVICE_23 0x9A1F
#define PARENT_EGPU_BUS_DEVICE_22 0x9A21
#define PARENT_EGPU_BUS_DEVICE_21 0x9A23
#define PARENT_EGPU_BUS_DEVICE_20 0x9A25
#define PARENT_EGPU_BUS_DEVICE_19 0x9A27
#define PARENT_EGPU_BUS_DEVICE_18 0x9A29
#define PARENT_EGPU_BUS_DEVICE_17 0x9A2B
#define PARENT_EGPU_BUS_DEVICE_16 0x9A2D
#define PARENT_EGPU_BUS_DEVICE_15 0x7EB2
#define PARENT_EGPU_BUS_DEVICE_14 0x7EC2
#define PARENT_EGPU_BUS_DEVICE_13 0x7EC3
#define PARENT_EGPU_BUS_DEVICE_12 0x7EB4
#define PARENT_EGPU_BUS_DEVICE_11 0x7EC4
#define PARENT_EGPU_BUS_DEVICE_10 0x7EB5
#define PARENT_EGPU_BUS_DEVICE_09 0x7EC5
#define PARENT_EGPU_BUS_DEVICE_08 0x7EC6
#define PARENT_EGPU_BUS_DEVICE_07 0x7EC7
#define PARENT_EGPU_BUS_DEVICE_06 0xA73E
#define PARENT_EGPU_BUS_DEVICE_05 0xA76D
#define PARENT_EGPU_BUS_DEVICE_04 0x466E
#define PARENT_EGPU_BUS_DEVICE_03 0x463F
#define PARENT_EGPU_BUS_DEVICE_02 0x462F
#define PARENT_EGPU_BUS_DEVICE_01 0x461F
#define PARENT_EGPU_BUS_DEVICE_02_08 0X156D
#define PARENT_EGPU_BUS_DEVICE_02_07 0x157E
#define PARENT_EGPU_BUS_DEVICE_02_06 0x156B
#define PARENT_EGPU_BUS_DEVICE_02_05 0x1567
#define PARENT_EGPU_BUS_DEVICE_02_04 0x1569
#define PARENT_EGPU_BUS_DEVICE_02_03 0x151B
#define PARENT_EGPU_BUS_DEVICE_02_02 0x1549
#define PARENT_EGPU_BUS_DEVICE_02_01 0x1513
//*****************************************************************************
// Function: isTB3DeviceID
@ -103,33 +118,51 @@ extern "C" {
EGPU_INLINE NvBool isTB3DeviceID(NvU16 deviceID)
{
NvU32 index;
NvU16 tb3DeviceIDList[]={ BUS_DEVICE_ID_TB3_ALPINE_RIDGE_01,
BUS_DEVICE_ID_TB3_02,
BUS_DEVICE_ID_TB3_03,
BUS_DEVICE_ID_TB3_04,
BUS_DEVICE_ID_TB3_05,
BUS_DEVICE_ID_TB3_06,
BUS_DEVICE_ID_TB3_07,
BUS_DEVICE_ID_TB3_08,
BUS_DEVICE_ID_TB3_09,
BUS_DEVICE_ID_TB3_10,
BUS_DEVICE_ID_ICELAKE_TB3_01,
BUS_DEVICE_ID_ICELAKE_TB3_02,
BUS_DEVICE_ID_ICELAKE_TB3_03,
BUS_DEVICE_ID_ICELAKE_TB3_04,
BUS_DEVICE_ID_ICELAKE_TB3_05,
BUS_DEVICE_ID_ICELAKE_TB3_06,
BUS_DEVICE_ID_TIGERLAKE_TB3_01,
BUS_DEVICE_ID_TIGERLAKE_TB3_02,
BUS_DEVICE_ID_TIGERLAKE_TB3_03,
BUS_DEVICE_ID_TIGERLAKE_TB3_04,
BUS_DEVICE_ID_TIGERLAKE_TB3_05,
BUS_DEVICE_ID_TIGERLAKE_TB3_06,
BUS_DEVICE_ID_TIGERLAKE_TB3_07,
BUS_DEVICE_ID_TIGERLAKE_TB3_08,
BUS_DEVICE_ID_TIGERLAKE_TB3_09,
BUS_DEVICE_ID_TIGERLAKE_TB3_10
};
NvU16 tb3DeviceIDList[]={ PARENT_EGPU_BUS_DEVICE_01,
PARENT_EGPU_BUS_DEVICE_02,
PARENT_EGPU_BUS_DEVICE_03,
PARENT_EGPU_BUS_DEVICE_04,
PARENT_EGPU_BUS_DEVICE_05,
PARENT_EGPU_BUS_DEVICE_06,
PARENT_EGPU_BUS_DEVICE_07,
PARENT_EGPU_BUS_DEVICE_08,
PARENT_EGPU_BUS_DEVICE_09,
PARENT_EGPU_BUS_DEVICE_10,
PARENT_EGPU_BUS_DEVICE_11,
PARENT_EGPU_BUS_DEVICE_12,
PARENT_EGPU_BUS_DEVICE_13,
PARENT_EGPU_BUS_DEVICE_14,
PARENT_EGPU_BUS_DEVICE_15,
PARENT_EGPU_BUS_DEVICE_16,
PARENT_EGPU_BUS_DEVICE_17,
PARENT_EGPU_BUS_DEVICE_18,
PARENT_EGPU_BUS_DEVICE_19,
PARENT_EGPU_BUS_DEVICE_20,
PARENT_EGPU_BUS_DEVICE_21,
PARENT_EGPU_BUS_DEVICE_22,
PARENT_EGPU_BUS_DEVICE_23,
PARENT_EGPU_BUS_DEVICE_24,
PARENT_EGPU_BUS_DEVICE_25,
PARENT_EGPU_BUS_DEVICE_26,
PARENT_EGPU_BUS_DEVICE_27,
PARENT_EGPU_BUS_DEVICE_28,
PARENT_EGPU_BUS_DEVICE_29,
PARENT_EGPU_BUS_DEVICE_30,
PARENT_EGPU_BUS_DEVICE_31,
PARENT_EGPU_BUS_DEVICE_32,
PARENT_EGPU_BUS_DEVICE_33,
PARENT_EGPU_BUS_DEVICE_34,
PARENT_EGPU_BUS_DEVICE_35,
PARENT_EGPU_BUS_DEVICE_36,
PARENT_EGPU_BUS_DEVICE_37,
PARENT_EGPU_BUS_DEVICE_38,
PARENT_EGPU_BUS_DEVICE_39,
PARENT_EGPU_BUS_DEVICE_40,
PARENT_EGPU_BUS_DEVICE_41,
PARENT_EGPU_BUS_DEVICE_42,
PARENT_EGPU_BUS_DEVICE_43
};
for (index = 0; index < (sizeof(tb3DeviceIDList)/sizeof(NvU16)); index++)
{
if (deviceID == tb3DeviceIDList[index])
@ -161,11 +194,14 @@ EGPU_INLINE NvBool isTB3DeviceID(NvU16 deviceID)
EGPU_INLINE NvBool isTB2DeviceID(NvU16 deviceID)
{
NvU32 index;
NvU16 tb2DeviceIDList[]={ BUS_DEVICE_ID_TB2_FALCON_RIDGE_DSL5520_02,
BUS_DEVICE_ID_TB2_03, BUS_DEVICE_ID_TB2_04,
BUS_DEVICE_ID_TB2_05, BUS_DEVICE_ID_TB2_06,
BUS_DEVICE_ID_TB2_08, BUS_DEVICE_ID_TB2_09,
BUS_DEVICE_ID_TB2_10
NvU16 tb2DeviceIDList[]={ PARENT_EGPU_BUS_DEVICE_02_01,
PARENT_EGPU_BUS_DEVICE_02_02,
PARENT_EGPU_BUS_DEVICE_02_03,
PARENT_EGPU_BUS_DEVICE_02_04,
PARENT_EGPU_BUS_DEVICE_02_05,
PARENT_EGPU_BUS_DEVICE_02_06,
PARENT_EGPU_BUS_DEVICE_02_07,
PARENT_EGPU_BUS_DEVICE_02_08
};
for (index = 0; index < (sizeof(tb2DeviceIDList)/sizeof(NvU16)); index++)
{

View File

@ -0,0 +1,28 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __ga100_hwproject_h__
#define __ga100_hwproject_h__
#define NV_SCAL_LITTER_NUM_FBPAS 24
#endif // __ga100_hwproject_h__

View File

@ -20,7 +20,7 @@
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_fb_h_
#define __gh100_dev_fb_h_
#define NV_PFB_NISO_FLUSH_SYSMEM_ADDR_SHIFT 8 /* */
@ -29,4 +29,5 @@
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI 0x00100A38 /* RW-4R */
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR 31:0 /* RWIVF */
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR_MASK 0x000FFFFF /* ----V */
#endif // __gh100_dev_fb_h_

View File

@ -0,0 +1,29 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_fbpa_h_
#define __gh100_dev_fbpa_h_
#define NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1 4 /* */
#define NV_PFB_FBPA_0_ECC_DED_COUNT(i) (0x009025A0+(i)*4) /* RW-4A */
#endif // __gh100_dev_fbpa_h_

View File

@ -31,4 +31,22 @@
#define NV_PGSP_FALCON_ENGINE_RESET_STATUS_ASSERTED 0x00000000 /* R-E-V */
#define NV_PGSP_FALCON_ENGINE_RESET_STATUS_DEASSERTED 0x00000002 /* R---V */
#define NV_PGSP_MAILBOX(i) (0x110804+(i)*4) /* RW-4A */
#define NV_PGSP_EMEMC(i) (0x110ac0+(i)*8) /* RW-4A */
#define NV_PGSP_EMEMC__SIZE_1 8 /* */
#define NV_PGSP_EMEMC_OFFS 7:2 /* RWIVF */
#define NV_PGSP_EMEMC_OFFS_INIT 0x00000000 /* RWI-V */
#define NV_PGSP_EMEMC_BLK 15:8 /* RWIVF */
#define NV_PGSP_EMEMC_BLK_INIT 0x00000000 /* RWI-V */
#define NV_PGSP_EMEMC_AINCW 24:24 /* RWIVF */
#define NV_PGSP_EMEMC_AINCW_INIT 0x00000000 /* RWI-V */
#define NV_PGSP_EMEMC_AINCW_TRUE 0x00000001 /* RW--V */
#define NV_PGSP_EMEMC_AINCW_FALSE 0x00000000 /* RW--V */
#define NV_PGSP_EMEMC_AINCR 25:25 /* RWIVF */
#define NV_PGSP_EMEMC_AINCR_INIT 0x00000000 /* RWI-V */
#define NV_PGSP_EMEMC_AINCR_TRUE 0x00000001 /* RW--V */
#define NV_PGSP_EMEMC_AINCR_FALSE 0x00000000 /* RW--V */
#define NV_PGSP_EMEMD(i) (0x110ac4+(i)*8) /* RW-4A */
#define NV_PGSP_EMEMD__SIZE_1 8 /* */
#define NV_PGSP_EMEMD_DATA 31:0 /* RWXVF */
#endif // __gh100_dev_gsp_h__

View File

@ -0,0 +1,52 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __gh100_dev_nv_xpl_h_
#define __gh100_dev_nv_xpl_h_
#define NV_XPL_DL_ERR_COUNT_RBUF 0x00000a54 /* R--4R */
#define NV_XPL_DL_ERR_COUNT_RBUF__PRIV_LEVEL_MASK 0x00000b08 /* */
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR 15:0 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR 31:16 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT 0x00000a58 /* R--4R */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT__PRIV_LEVEL_MASK 0x00000b08 /* */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR 15:0 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR 31:16 /* R-EVF */
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR_INIT 0x0000 /* R-E-V */
#define NV_XPL_DL_ERR_RESET 0x00000a5c /* RW-4R */
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT 0:0 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT 1:1 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT 16:16 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT 17:17 /* RWCVF */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
#endif // __gh100_dev_nv_xpl_h__

View File

@ -24,4 +24,7 @@
#ifndef __gh100_dev_xtl_ep_pri_h__
#define __gh100_dev_xtl_ep_pri_h__
#define NV_EP_PCFGM 0x92FFF:0x92000 /* RW--D */
#define NV_XTL_EP_PRI_DED_ERROR_STATUS 0x0000043C /* RW-4R */
#define NV_XTL_EP_PRI_RAM_ERROR_INTR_STATUS 0x000003C8 /* RW-4R */
#endif // __gh100_dev_xtl_ep_pri_h__

View File

@ -21,3 +21,6 @@
* DEALINGS IN THE SOFTWARE.
*/
#define NV_CHIP_EXTENDED_SYSTEM_PHYSICAL_ADDRESS_BITS 52
#define NV_XPL_BASE_ADDRESS 540672
#define NV_XTL_BASE_ADDRESS 593920
#define NV_FBPA_PRI_STRIDE 16384

View File

@ -47,5 +47,17 @@
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT 3:3
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT_PENDING 0x1
#define NV_XAL_EP_SCPM_PRI_DUMMY_DATA_PATTERN_INIT 0xbadf0200
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT 0x0010f364 /* RW-4R */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT 0x0010f37c /* RW-4R */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
#endif // __gh100_pri_nv_xal_ep_h__

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -635,4 +635,7 @@
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT 28:28 /* RWIVF */
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_SUPPORTED 0x00000001 /* RWI-V */
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_NOT_SUPPORTED 0x00000000 /* RW--V */
#define NV_NVLIPT_LNK_SCRATCH_WARM 0x000007c0 /* RW-4R */
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA 31:0 /* RWEVF */
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA_INIT 0xdeadbaad /* RWE-V */
#endif // __ls10_dev_nvlipt_lnk_ip_h__

View File

@ -0,0 +1,28 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the Software),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __ls10_ptop_discovery_ip_h__
#define __ls10_ptop_discovery_ip_h__
/* This file is autogenerated. Do not edit */
#define NV_PTOP_UNICAST_SW_DEVICE_BASE_SAW_0 0x00028000 /* */
#endif // __ls10_ptop_discovery_ip_h__

View File

@ -38,4 +38,25 @@
#define NV_PFB_PRI_MMU_WPR2_ADDR_HI_VAL 31:4 /* RWEVF */
#define NV_PFB_PRI_MMU_WPR2_ADDR_HI_ALIGNMENT 0x0000000c /* */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
#endif // __tu102_dev_fb_h__

View File

@ -0,0 +1,29 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __tu102_dev_fbpa_h_
#define __tu102_dev_fbpa_h_
#define NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1 2 /* */
#define NV_PFB_FBPA_0_ECC_DED_COUNT(i) (0x00900488+(i)*4) /* RW-4A */
#endif // __tu102_dev_fbpa_h_

View File

@ -24,6 +24,7 @@
#ifndef __tu102_dev_gc6_island_h__
#define __tu102_dev_gc6_island_h__
#define NV_PGC6 0x118fff:0x118000 /* RW--D */
#define NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK 0x00118128 /* RW-4R */
#define NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK_READ_PROTECTION 3:0 /* RWIVF */
#define NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0 0:0 /* */

View File

@ -38,5 +38,22 @@
#define NV_PGSP_QUEUE_HEAD(i) (0x110c00+(i)*8) /* RW-4A */
#define NV_PGSP_QUEUE_HEAD__SIZE_1 8 /* */
#define NV_PGSP_QUEUE_HEAD_ADDRESS 31:0 /* RWIVF */
#define NV_PGSP_EMEMC(i) (0x110ac0+(i)*8) /* RW-4A */
#define NV_PGSP_EMEMC__SIZE_1 4 /* */
#define NV_PGSP_EMEMC_OFFS 7:2 /* RWIVF */
#define NV_PGSP_EMEMC_OFFS_INIT 0x00000000 /* RWI-V */
#define NV_PGSP_EMEMC_BLK 15:8 /* RWIVF */
#define NV_PGSP_EMEMC_BLK_INIT 0x00000000 /* RWI-V */
#define NV_PGSP_EMEMC_AINCW 24:24 /* RWIVF */
#define NV_PGSP_EMEMC_AINCW_INIT 0x00000000 /* RWI-V */
#define NV_PGSP_EMEMC_AINCW_TRUE 0x00000001 /* RW--V */
#define NV_PGSP_EMEMC_AINCW_FALSE 0x00000000 /* RW--V */
#define NV_PGSP_EMEMC_AINCR 25:25 /* RWIVF */
#define NV_PGSP_EMEMC_AINCR_INIT 0x00000000 /* RWI-V */
#define NV_PGSP_EMEMC_AINCR_TRUE 0x00000001 /* RW--V */
#define NV_PGSP_EMEMC_AINCR_FALSE 0x00000000 /* RW--V */
#define NV_PGSP_EMEMD(i) (0x110ac4+(i)*8) /* RW-4A */
#define NV_PGSP_EMEMD__SIZE_1 4 /* */
#define NV_PGSP_EMEMD_DATA 31:0 /* RW-VF */
#endif // __tu102_dev_gsp_h__

View File

@ -0,0 +1,33 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef __tu102_dev_ltc_h_
#define __tu102_dev_ltc_h_
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT 0x001404f8 /* RW-4R */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIVF */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIVF */
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
#endif // __tu102_dev_ltc_h_

View File

@ -28,6 +28,10 @@
#define NV_XVE_MSIX_CAP_HDR_ENABLE 31:31 /* RWIVF */
#define NV_XVE_MSIX_CAP_HDR_ENABLE_ENABLED 0x00000001 /* RW--V */
#define NV_XVE_MSIX_CAP_HDR_ENABLE_DISABLED 0x00000000 /* RWI-V */
#define NV_XVE_PRIV_MISC_1 0x0000041C /* RW-4R */
#define NV_XVE_PRIV_MISC_1_CYA_HIDE_MSIX_CAP 29:29 /* RWCVF */
#define NV_XVE_PRIV_MISC_1_CYA_HIDE_MSIX_CAP_TRUE 0x00000001 /* RW--V */
#define NV_XVE_PRIV_MISC_1_CYA_HIDE_MSIX_CAP_FALSE 0x00000000 /* RWC-V */
#define NV_XVE_SRIOV_CAP_HDR3 0x00000BD8 /* R--4R */
#define NV_XVE_SRIOV_CAP_HDR3_TOTAL_VFS 31:16 /* R-EVF */
#define NV_XVE_SRIOV_CAP_HDR5 0x00000BE0 /* R--4R */

View File

@ -25,5 +25,9 @@
#define __tu102_hwproject_h__
#define NV_CHIP_EXTENDED_SYSTEM_PHYSICAL_ADDRESS_BITS 47
#define NV_SCAL_LITTER_NUM_FBPAS 16
#define NV_FBPA_PRI_STRIDE 16384
#define NV_LTC_PRI_STRIDE 8192
#define NV_LTS_PRI_STRIDE 512
#endif // __tu102_hwproject_h__

View File

@ -439,6 +439,11 @@ NvlStatus nvlink_lib_register_link(nvlink_device *dev, nvlink_link *link);
*/
NvlStatus nvlink_lib_unregister_link(nvlink_link *link);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
/************************************************************************************************/
/******************************* NVLink link management functions *******************************/

View File

@ -46,6 +46,11 @@ NvlStatus nvlink_lib_unload(void);
*/
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
/*
* Gets number of devices with type deviceType
*/
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
#ifdef __cplusplus
}
#endif

View File

@ -198,3 +198,48 @@ nvlink_lib_is_registerd_device_with_reduced_config(void)
return bIsReducedConfg;
}
/*
* Get the number of devices that have the device type deviceType
*/
NvlStatus
nvlink_lib_return_device_count_by_type
(
NvU32 deviceType,
NvU32 *numDevices
)
{
NvlStatus lock_status = NVL_SUCCESS;
nvlink_device *dev = NULL;
NvU32 device_count = 0;
if (nvlink_lib_is_initialized())
{
// Acquire top-level lock
lock_status = nvlink_lib_top_lock_acquire();
if (lock_status != NVL_SUCCESS)
{
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
"%s: Failed to acquire top-level lock\n",
__FUNCTION__));
return lock_status;
}
// Top-level lock is now acquired
// Loop through device list
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
{
if (dev->type == deviceType)
{
device_count++;
}
}
// Release top-level lock
nvlink_lib_top_lock_release();
}
*numDevices = device_count;
return NVL_SUCCESS;
}

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -37,6 +37,17 @@ enum
RM_SOE_IFR_BBX_SHUTDOWN,
RM_SOE_IFR_BBX_SXID_ADD,
RM_SOE_IFR_BBX_SXID_GET,
RM_SOE_IFR_BBX_DATA_GET,
};
enum
{
RM_SOE_IFR_BBX_GET_NONE,
RM_SOE_IFR_BBX_GET_SXID,
RM_SOE_IFR_BBX_GET_SYS_INFO,
RM_SOE_IFR_BBX_GET_TIME_INFO,
RM_SOE_IFR_BBX_GET_TEMP_DATA,
RM_SOE_IFR_BBX_GET_TEMP_SAMPLES,
};
typedef struct
@ -75,6 +86,14 @@ typedef struct
RM_FLCN_U64 dmaHandle;
} RM_SOE_IFR_CMD_BBX_SXID_GET_PARAMS;
typedef struct
{
NvU8 cmdType;
NvU32 sizeInBytes;
RM_FLCN_U64 dmaHandle;
NvU8 dataType;
} RM_SOE_IFR_CMD_BBX_GET_DATA_PARAMS;
typedef union
{
NvU8 cmdType;
@ -82,6 +101,7 @@ typedef union
RM_SOE_IFR_CMD_BBX_INIT_PARAMS bbxInit;
RM_SOE_IFR_CMD_BBX_SXID_ADD_PARAMS bbxSxidAdd;
RM_SOE_IFR_CMD_BBX_SXID_GET_PARAMS bbxSxidGet;
RM_SOE_IFR_CMD_BBX_GET_DATA_PARAMS bbxDataGet;
} RM_SOE_IFR_CMD;
// entry of getSxid
@ -99,4 +119,81 @@ typedef struct
RM_SOE_BBX_SXID_ENTRY sxidLast[INFOROM_BBX_OBJ_XID_ENTRIES];
} RM_SOE_BBX_GET_SXID_DATA;
// NVSwitch system version information returning with the command GET_SYS_INFO
typedef struct
{
NvU32 driverLo; //Driver Version Low 32 bits
NvU16 driverHi; //Driver Version High 16 bits
NvU32 vbiosVersion; //VBIOS Version
NvU8 vbiosVersionOem; //VBIOS OEM Version byte
NvU8 osType; //OS Type (UNIX/WIN/WIN2K/WIN9x/OTHER)
NvU32 osVersion; //OS Version (Build|MINOR|MAJOR)
} RM_SOE_BBX_GET_SYS_INFO_DATA;
// NVSwitch time information returning with the command GET_TIME_INFO
typedef struct
{
NvU32 timeStart; //Timestamp (EPOCH) when the driver was loaded on the GPU for the first time
NvU32 timeEnd; //Timestamp (EPOCH) when the data was last flushed
NvU32 timeRun; //Amount of time (in seconds) driver was loaded, and GPU has run
NvU32 time24Hours; //Timestamp (EPOCH) of when the first 24 operational hours is hit
NvU32 time100Hours; //Timestamp (EPOCH) of when the first 100 operational hours is hit
} RM_SOE_BBX_GET_TIME_INFO_DATA;
#define RM_SOE_BBX_TEMP_DAY_ENTRIES 5
#define RM_SOE_BBX_TEMP_WEEK_ENTRIES 5
#define RM_SOE_BBX_TEMP_MNT_ENTRIES 5
#define RM_SOE_BBX_TEMP_ALL_ENTRIES 5
#define RM_SOE_BBX_TEMP_SUM_HOUR_ENTRIES 23
#define RM_SOE_BBX_TEMP_SUM_DAY_ENTRIES 5
#define RM_SOE_BBX_TEMP_SUM_MNT_ENTRIES 3
#define RM_SOE_BBX_TEMP_HISTOGRAM_THLD_ENTRIES 20
#define RM_SOE_BBX_TEMP_HISTOGRAM_TIME_ENTRIES 21
#define RM_SOE_BBX_TEMP_HOURLY_MAX_ENTRIES 168
#define RM_SOE_BBX_TEMP_COMPRESS_BUFFER_ENTRIES 1096
#define RM_SOE_BBX_NUM_COMPRESSION_PERIODS 8
// NVSwitch Temperature Entry
typedef struct
{
NvU16 value; //Temperature (SFXP 9.7 format in Celsius)
NvU32 timestamp; //Timestamp (EPOCH) of when the entry is recorded
} RM_SOE_BBX_TEMP_ENTRY;
// NVSwitch Temperature Data returning with the command GET_TEMP_DATA
typedef struct
{
NvU32 tempMaxDayIdx;
RM_SOE_BBX_TEMP_ENTRY tempMaxDay[RM_SOE_BBX_TEMP_DAY_ENTRIES];
NvU32 tempMaxWeekIdx;
RM_SOE_BBX_TEMP_ENTRY tempMaxWeek[RM_SOE_BBX_TEMP_WEEK_ENTRIES];
NvU32 tempMaxMntIdx;
RM_SOE_BBX_TEMP_ENTRY tempMaxMnt[RM_SOE_BBX_TEMP_MNT_ENTRIES];
NvU32 tempMaxAllIdx;
RM_SOE_BBX_TEMP_ENTRY tempMaxAll[RM_SOE_BBX_TEMP_ALL_ENTRIES];
NvU32 tempMinDayIdx;
RM_SOE_BBX_TEMP_ENTRY tempMinDay[RM_SOE_BBX_TEMP_DAY_ENTRIES];
NvU32 tempMinWeekIdx;
RM_SOE_BBX_TEMP_ENTRY tempMinWeek[RM_SOE_BBX_TEMP_WEEK_ENTRIES];
NvU32 tempMinMntIdx;
RM_SOE_BBX_TEMP_ENTRY tempMinMnt[RM_SOE_BBX_TEMP_MNT_ENTRIES];
NvU32 tempMinAllIdx;
RM_SOE_BBX_TEMP_ENTRY tempMinAll[RM_SOE_BBX_TEMP_ALL_ENTRIES];
NvU32 tempSumDelta;
NvU32 tempSumHour[RM_SOE_BBX_TEMP_SUM_HOUR_ENTRIES];
NvU32 tempSumDay[RM_SOE_BBX_TEMP_SUM_DAY_ENTRIES];
NvU32 tempSumMnt[RM_SOE_BBX_TEMP_SUM_MNT_ENTRIES];
NvU32 tempHistogramThld[RM_SOE_BBX_TEMP_HISTOGRAM_THLD_ENTRIES];
NvU32 tempHistogramTime[RM_SOE_BBX_TEMP_HISTOGRAM_TIME_ENTRIES];
RM_SOE_BBX_TEMP_ENTRY tempHourlyMaxSample[RM_SOE_BBX_TEMP_HOURLY_MAX_ENTRIES];
} RM_SOE_BBX_GET_TEMP_DATA;
// NVSwitch Temperature Compressed Samples returning with the command GET_TEMP_SAMPLES
typedef struct
{
NvU32 compressionPeriodIdx;
NvU32 compressionPeriod[RM_SOE_BBX_NUM_COMPRESSION_PERIODS];
RM_SOE_BBX_TEMP_ENTRY tempCompressionBuffer[RM_SOE_BBX_TEMP_COMPRESS_BUFFER_ENTRIES];
} RM_SOE_BBX_GET_TEMP_SAMPLES;
#endif // _SOEIFIFR_H_

View File

@ -830,6 +830,7 @@ typedef enum nvswitch_err_type
NVSWITCH_ERR_HW_HOST_THERMAL_SHUTDOWN = 10006,
NVSWITCH_ERR_HW_HOST_IO_FAILURE = 10007,
NVSWITCH_ERR_HW_HOST_FIRMWARE_INITIALIZATION_FAILURE = 10008,
NVSWITCH_ERR_HW_HOST_FIRMWARE_RECOVERY_MODE = 10009,
NVSWITCH_ERR_HW_HOST_LAST,
@ -2973,6 +2974,197 @@ typedef struct
NVSWITCH_SXID_ENTRY sxidLast[NVSWITCH_SXID_ENTRIES_NUM];
} NVSWITCH_GET_SXIDS_PARAMS;
/*
* CTRL_NVSWITCH_GET_SYS_INFO
*
* Control to get the NVSwitch system version information from inforom cache
*
* Parameters:
* driverLo [OUT]
* The driver version low 32 bits. Example: driverLo = 54531 (Driver 545.31)
* driverHi [OUT]
* The driver version high 16 bits
* vbiosVersion [OUT]
* The vbios version number. Example: vbiosVersion=0x96104100 (release 96.10.41.00)
* vbiosVersionOem [OUT]
* The vbios OEM version byte.
* osType [OUT]
* The OS type. Example: osType=0x05 (UNIX)
* osVersion [OUT]
* The OS version number. [BUILD[31:16]|MINOR[15:8]|MAJOR[7:0]]
*/
typedef struct
{
NvU32 driverLo;
NvU16 driverHi;
NvU32 vbiosVersion;
NvU8 vbiosVersionOem;
NvU8 osType;
NvU32 osVersion;
} NVSWITCH_GET_SYS_INFO_PARAMS;
/*
* CTRL_NVSWITCH_GET_TIME_INFO
*
* Control to get the NVSwitch time information from inforom cache
*
* Parameters:
* timeStart [OUT]
* The timestamp (EPOCH) when driver load onto the NVSwitch for the 1st time
* timeEnd [OUT]
* The timestamp (EPOCH) when the data was last flushed
* timeRun [OUT]
* The amount of time (in seconds) driver was loaded/running
* time24Hours [OUT]
* The timestamp (EPOCH) when the first 24 operational hours is hit
* time100Hours [OUT]
* The timestamp (EPOCH) when the first 100 operational hours is hit
*/
typedef struct
{
NvU32 timeStart;
NvU32 timeEnd;
NvU32 timeRun;
NvU32 time24Hours;
NvU32 time100Hours;
} NVSWITCH_GET_TIME_INFO_PARAMS;
#define NVSWITCH_TEMP_DAY_ENTRIES 5
#define NVSWITCH_TEMP_WEEK_ENTRIES 5
#define NVSWITCH_TEMP_MNT_ENTRIES 5
#define NVSWITCH_TEMP_ALL_ENTRIES 5
#define NVSWITCH_TEMP_SUM_HOUR_ENTRIES 23
#define NVSWITCH_TEMP_SUM_DAY_ENTRIES 5
#define NVSWITCH_TEMP_SUM_MNT_ENTRIES 3
#define NVSWITCH_TEMP_HISTOGRAM_THLD_ENTRIES 20
#define NVSWITCH_TEMP_HISTOGRAM_TIME_ENTRIES 21
#define NVSWITCH_TEMP_HOURLY_MAX_ENTRIES 168
/*
* NVSWITCH_TEMP_ENTRY
*
* This structure represents the NVSwitch TEMP with its timestamp.
*
* value
* This parameter specifies the NVSwitch Temperature
* (SFXP 9.7 format in Celsius).
*
* timestamp
* This parameter specifies the timestamp (EPOCH) of the entry.
*/
typedef struct
{
NvU16 value;
NvU32 timestamp;
} NVSWITCH_TEMP_ENTRY;
/*
* CTRL_NVSWITCH_GET_TEMP_DATA
*
* Control to get the NVSwitch device historical temperature information from inforom cache
*
* Parameters:
* tempMaxDayIdx [OUT]
* The current index to the maximum day temperature array
* tempMaxDay[] [OUT]
* The maximum temperature array for last NVSWITCH_TEMP_DAY_ENTRIES days
* tempMaxWeekIdx [OUT]
* The current index to the maximum week temperature array
* tempMaxWeek[] [OUT]
* The maximum temperature array for last NVSWITCH_TEMP_WEEK_ENTRIES weeks
* tempMaxMntIdx [OUT]
* The current index to the maximum month temperature array
* tempMaxMnt[] [OUT]
* The maximum temperature array for last NVSWITCH_TEMP_MNT_ENTRIES months
* tempMaxAllIdx [OUT]
* The current index to the maximum temperature array
* tempMaxAll[] [OUT]
* The maximum temperature array for the device
* tempMinDayIdx [OUT]
* The current index to the minimum day temperature array
* tempMinDay[] [OUT]
* The minimum temperature array for last NVSWITCH_TEMP_DAY_ENTRIES days
* tempMinWeekIdx [OUT]
* The current index to the minimum week temperature array
* tempMinWeek[] [OUT]
* The minimum temperature array for last NVSWITCH_TEMP_WEEK_ENTRIES weeks
* tempMinMntIdx [OUT]
* The current index to the minimum month temperature array
* tempMinMnt[] [OUT]
* The minimum temperature array for last NVSWITCH_TEMP_MNT_ENTRIES months
* tempMinAllIdx [OUT]
* The current index to the minimum temperature array
* tempMinAll[] [OUT]
* The minimum temperature array for the device
* tempSumDelta [OUT]
* The total sum of temperature change in 0.1C granularity
* tempSumHour[] [OUT]
* The moving average of temperature per hour, for last NVSWITCH_TEMP_SUM_HOUR_ENTRIES hours
* tempSumDay[] [OUT]
* The moving average of temperature per day, for last NVSWITCH_TEMP_SUM_DAY_ENTRIES days
* tempSumMnt[] [OUT]
* The moving average of temperature per month, for last NVSWITCH_TEMP_SUM_MNT_ENTRIES months
* tempHistogramThld[] [OUT]
* The histogram of temperature crossing various thresholds (5/10/15/.../95/100)
* tempHistogramTime[] [OUT]
* The histogram of time was in various temperature ranges (0..5/5..10/.../100..)
* tempHourlyMaxSample[] [OUT]
* The maximum hourly temperature array for the device
*/
typedef struct
{
NvU32 tempMaxDayIdx;
NVSWITCH_TEMP_ENTRY tempMaxDay[NVSWITCH_TEMP_DAY_ENTRIES];
NvU32 tempMaxWeekIdx;
NVSWITCH_TEMP_ENTRY tempMaxWeek[NVSWITCH_TEMP_WEEK_ENTRIES];
NvU32 tempMaxMntIdx;
NVSWITCH_TEMP_ENTRY tempMaxMnt[NVSWITCH_TEMP_MNT_ENTRIES];
NvU32 tempMaxAllIdx;
NVSWITCH_TEMP_ENTRY tempMaxAll[NVSWITCH_TEMP_ALL_ENTRIES];
NvU32 tempMinDayIdx;
NVSWITCH_TEMP_ENTRY tempMinDay[NVSWITCH_TEMP_DAY_ENTRIES];
NvU32 tempMinWeekIdx;
NVSWITCH_TEMP_ENTRY tempMinWeek[NVSWITCH_TEMP_WEEK_ENTRIES];
NvU32 tempMinMntIdx;
NVSWITCH_TEMP_ENTRY tempMinMnt[NVSWITCH_TEMP_MNT_ENTRIES];
NvU32 tempMinAllIdx;
NVSWITCH_TEMP_ENTRY tempMinAll[NVSWITCH_TEMP_ALL_ENTRIES];
NvU32 tempSumDelta;
NvU32 tempSumHour[NVSWITCH_TEMP_SUM_HOUR_ENTRIES];
NvU32 tempSumDay[NVSWITCH_TEMP_SUM_DAY_ENTRIES];
NvU32 tempSumMnt[NVSWITCH_TEMP_SUM_MNT_ENTRIES];
NvU32 tempHistogramThld[NVSWITCH_TEMP_HISTOGRAM_THLD_ENTRIES];
NvU32 tempHistogramTime[NVSWITCH_TEMP_HISTOGRAM_TIME_ENTRIES];
NVSWITCH_TEMP_ENTRY tempHourlyMaxSample[NVSWITCH_TEMP_HOURLY_MAX_ENTRIES];
} NVSWITCH_GET_TEMP_DATA_PARAMS;
#define NVSWITCH_TEMP_COMPRESS_BUFFER_ENTRIES 1096
#define NVSWITCH_NUM_COMPRESSION_PERIODS 8
/*
* CTRL_NVSWITCH_GET_TEMP_DATA
*
* Control to get the NVSwitch device temperature information from inforom cache
*
* Parameters:
* compressionPeriodIdx [OUT]
* The current index to the sample period array
* compressionPeriod[] [OUT]
* The samples period array (seconds)
* tempCompressionBuffer[] [OUT]
* The temperature array sampling at a specific period in compressionPeriod[]
*/
typedef struct
{
NvU32 compressionPeriodIdx;
NvU32 compressionPeriod[NVSWITCH_NUM_COMPRESSION_PERIODS];
NVSWITCH_TEMP_ENTRY tempCompressionBuffer[NVSWITCH_TEMP_COMPRESS_BUFFER_ENTRIES];
} NVSWITCH_GET_TEMP_SAMPLES_PARAMS;
/*
* CTRL_NVSWITCH_GET_FOM_VALUES
* This command gives the FOM values to MODS
@ -3848,6 +4040,10 @@ typedef struct
#define CTRL_NVSWITCH_RESERVED_11 0x55
#define CTRL_NVSWITCH_GET_BOARD_PART_NUMBER 0x56
#define CTRL_NVSWITCH_GET_POWER 0x57
#define CTRL_NVSWITCH_GET_SYS_INFO 0x58
#define CTRL_NVSWITCH_GET_TIME_INFO 0x59
#define CTRL_NVSWITCH_GET_TEMP_DATA 0x60
#define CTRL_NVSWITCH_GET_TEMP_SAMPLES 0x61
#ifdef __cplusplus
}

View File

@ -158,6 +158,7 @@
_op(NvlStatus, nvswitch_bbx_unload, (nvswitch_device *device), _arch) \
_op(NvlStatus, nvswitch_bbx_load, (nvswitch_device *device, NvU64 time_ns, NvU8 osType, NvU32 osVersion), _arch) \
_op(NvlStatus, nvswitch_bbx_get_sxid, (nvswitch_device *device, NVSWITCH_GET_SXIDS_PARAMS * params), _arch) \
_op(NvlStatus, nvswitch_bbx_get_data, (nvswitch_device *device, NvU8 dataType, void * params), _arch) \
_op(NvlStatus, nvswitch_smbpbi_alloc, (nvswitch_device *device), _arch) \
_op(NvlStatus, nvswitch_smbpbi_post_init_hal, (nvswitch_device *device), _arch) \
_op(void, nvswitch_smbpbi_destroy_hal, (nvswitch_device *device), _arch) \
@ -213,6 +214,7 @@
_op(void, nvswitch_reset_persistent_link_hw_state, (nvswitch_device *device, NvU32 linkNumber), _arch)\
_op(void, nvswitch_store_topology_information, (nvswitch_device *device, nvlink_link *link), _arch) \
_op(void, nvswitch_init_lpwr_regs, (nvlink_link *link), _arch) \
_op(void, nvswitch_program_l1_scratch_reg, (nvswitch_device *device, NvU32 linkNumber), _arch) \
_op(NvlStatus, nvswitch_set_training_mode, (nvswitch_device *device), _arch) \
_op(NvU32, nvswitch_get_sublink_width, (nvswitch_device *device, NvU32 linkNumber), _arch) \
_op(NvBool, nvswitch_i2c_is_device_access_allowed, (nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead), _arch) \
@ -234,6 +236,7 @@
_op(NvlStatus, nvswitch_ctrl_therm_read_power, (nvswitch_device *device, NVSWITCH_GET_POWER_PARAMS *info), _arch) \
_op(NvBool, nvswitch_does_link_need_termination_enabled, (nvswitch_device *device, nvlink_link *link), _arch) \
_op(NvlStatus, nvswitch_link_termination_setup, (nvswitch_device *device, nvlink_link *link), _arch) \
_op(NvlStatus, nvswitch_check_io_sanity, (nvswitch_device *device), _arch) \
#define NVSWITCH_HAL_FUNCTION_LIST_LS10(_op, _arch) \
_op(NvlStatus, nvswitch_launch_ALI, (nvswitch_device *device), _arch) \

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -184,6 +184,7 @@ NvlStatus nvswitch_inforom_bbx_add_sxid(nvswitch_device *device,
NvU32 data1, NvU32 data2);
NvlStatus nvswitch_inforom_bbx_get_sxid(nvswitch_device *device,
NVSWITCH_GET_SXIDS_PARAMS *params);
NvlStatus nvswitch_inforom_bbx_get_data(nvswitch_device *device, NvU8 dataType, void *params);
// InfoROM DEM APIs
NvlStatus nvswitch_inforom_dem_load(nvswitch_device *device);

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -169,4 +169,12 @@ nvswitch_bbx_get_sxid_lr10
NVSWITCH_GET_SXIDS_PARAMS * params
);
NvlStatus
nvswitch_bbx_get_data_lr10
(
nvswitch_device *device,
NvU8 dataType,
void *params
);
#endif //_INFOROM_LR10_H_

View File

@ -583,9 +583,12 @@ typedef struct
NvBool bDisabledRemoteEndLinkMaskCached;
} lr10_device;
#define NVSWITCH_NUM_DEVICES_PER_DELTA_LR10 6
typedef struct {
NvU32 switchPhysicalId;
NvU64 linkMask;
NvU64 accessLinkMask;
NvU64 trunkLinkMask;
} lr10_links_connected_to_disabled_remote_end;
#define NVSWITCH_GET_CHIP_DEVICE_LR10(_device) \
@ -649,6 +652,7 @@ void nvswitch_setup_link_loopback_mode_lr10(nvswitch_device *device, NvU32
void nvswitch_reset_persistent_link_hw_state_lr10(nvswitch_device *device, NvU32 linkNumber);
void nvswitch_store_topology_information_lr10(nvswitch_device *device, nvlink_link *link);
void nvswitch_init_lpwr_regs_lr10(nvlink_link *link);
void nvswitch_program_l1_scratch_reg_lr10(nvswitch_device *device, NvU32 linkNumber);
NvlStatus nvswitch_set_training_mode_lr10(nvswitch_device *device);
NvBool nvswitch_i2c_is_device_access_allowed_lr10(nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead);
NvU32 nvswitch_get_sublink_width_lr10(nvswitch_device *device,NvU32 linkNumber);

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -154,4 +154,11 @@ nvswitch_bbx_get_sxid_ls10
NVSWITCH_GET_SXIDS_PARAMS * params
);
NvlStatus
nvswitch_bbx_get_data_ls10
(
nvswitch_device *device,
NvU8 dataType,
void *params
);
#endif //_INFOROM_LS10_H_

View File

@ -529,10 +529,20 @@ typedef struct
{
NvBool bLinkErrorsCallBackEnabled;
NvBool bLinkStateCallBackEnabled;
NvBool bResetAndDrainRetry;
NvU64 lastRetrainTime;
NvU64 lastLinkUpTime;
} NVLINK_LINK_ERROR_REPORTING_STATE;
typedef struct
{
NVLINK_LINK_ERROR_INFO_ERR_MASKS fatalIntrMask;
NVLINK_LINK_ERROR_INFO_ERR_MASKS nonFatalIntrMask;
} NVLINK_LINK_ERROR_REPORTING_DATA;
typedef struct
{
NVLINK_LINK_ERROR_REPORTING_STATE state;
NVLINK_LINK_ERROR_REPORTING_DATA data;
} NVLINK_LINK_ERROR_REPORTING;
typedef struct
@ -834,7 +844,6 @@ typedef const struct
#define nvswitch_setup_link_loopback_mode_ls10 nvswitch_setup_link_loopback_mode_lr10
#define nvswitch_link_lane_reversed_ls10 nvswitch_link_lane_reversed_lr10
#define nvswitch_request_tl_link_state_ls10 nvswitch_request_tl_link_state_lr10
#define nvswitch_i2c_get_port_info_ls10 nvswitch_i2c_get_port_info_lr10
#define nvswitch_i2c_set_hw_speed_mode_ls10 nvswitch_i2c_set_hw_speed_mode_lr10
@ -929,6 +938,7 @@ void nvswitch_corelib_clear_link_state_lr10(nvlink_link *link);
NvlStatus nvswitch_corelib_set_dl_link_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
NvlStatus nvswitch_corelib_set_tx_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
void nvswitch_init_lpwr_regs_ls10(nvlink_link *link);
void nvswitch_program_l1_scratch_reg_ls10(nvswitch_device *device, NvU32 linkNumber);
NvlStatus nvswitch_minion_service_falcon_interrupts_ls10(nvswitch_device *device, NvU32 instance);
@ -986,6 +996,7 @@ NvlStatus nvswitch_reset_and_drain_links_ls10(nvswitch_device *device, NvU64 lin
void nvswitch_service_minion_all_links_ls10(nvswitch_device *device);
NvlStatus nvswitch_ctrl_get_board_part_number_ls10(nvswitch_device *device, NVSWITCH_GET_BOARD_PART_NUMBER_VECTOR *p);
void nvswitch_create_deferred_link_state_check_task_ls10(nvswitch_device *device, NvU32 nvlipt_instance, NvU32 link);
NvlStatus nvswitch_request_tl_link_state_ls10(nvlink_link *link, NvU32 tlLinkState, NvBool bSync);
//
// SU generated functions

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -46,6 +46,9 @@ typedef enum _MINION_STATUS
MINION_ALARM_BUSY = 80,
} MINION_STATUS;
#define LINKSTATUS_RESET 0x0
#define LINKSTATUS_UNINIT 0x1
#define LINKSTATUS_LANESHUTDOWN 0x13
#define LINKSTATUS_EMERGENCY_SHUTDOWN 0x29
#define LINKSTATUS_INITPHASE1 0x24
#define LINKSTATUS_ACTIVE_PENDING 0x25
#endif // _MINION_NVLINK_DEFINES_PUBLIC_H_

View File

@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
@ -1370,7 +1370,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
0xb232f900, 0xbdb2b2a1, 0x3ef00304, 0xbf00a6f0, 0x01009019, 0x93a61ab2, 0x0a090df4, 0xa6f73e03,
0xf493a600, 0x020a091b, 0x00a6f73e, 0x00a6aa7e, 0x08f402a6, 0xfba4bddd, 0xf830f431, 0x0005dcdf,
0xbf82f900, 0x0149feff, 0xb2289990, 0xb29fa0a3, 0x00a9b3b8, 0xb0b30084, 0x47fe7f00, 0x05a49801,
0x54bd24bd, 0x779014bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00,
0x14bd54bd, 0x779024bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00,
0xf49fa6ff, 0x643d090b, 0x00a74f3e, 0x90015590, 0x04a60100, 0x33d908f4, 0x90070060, 0x24bc0111,
0x03399820, 0x18f429a6, 0xbd01060b, 0xa7523e04, 0xb24bb200, 0x16fc7e1a, 0xf45aa600, 0x1190060d,
0x06399801, 0x19a6f43d, 0x0f050cf4, 0xbd8f2001, 0xa7973ea4, 0xfe020a00, 0x99900149, 0xd99fbf28,
@ -1420,7 +1420,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
0x070b943a, 0xb200804c, 0xb7797e2d, 0x0ca1b000, 0xb600adb3, 0x05291801, 0x76042f18, 0xf4f00894,
0xe59fffff, 0xe966ff09, 0x01980bf5, 0xffffe9e4, 0x08f589a6, 0xf4bd018e, 0x18902fbc, 0x9d330999,
0x90018200, 0xf4b301ff, 0xfc3ef207, 0x8e3c00ae, 0xf59f26f2, 0xc4016d08, 0x94f0fffd, 0x529dbcff,
0x0df456a6, 0x9065b205, 0xe4bd10d9, 0x3db029bc, 0x3ec43da4, 0xb100ada7, 0xf5006fd6, 0xb401450c,
0x0df456a6, 0x9065b205, 0xa43d10d9, 0x3db029bc, 0x3ee4bdc4, 0xb100ada7, 0xf5006fd6, 0xb401450c,
0xbe3c0b10, 0xf81e3c98, 0x0bf4f926, 0xff94f017, 0xfd009939, 0x9033049f, 0x010a0600, 0x0ce9bf3c,
0x01ee9001, 0xa601dd90, 0xce08f4e5, 0xed00c933, 0xf0293f00, 0x0bf40894, 0x00a93308, 0x94bd00d0,
0x91b03ab2, 0x1391b014, 0x301291b0, 0x4bfe5b91, 0x5bbb9001, 0x00a6f97e, 0xadb3a0b2, 0x3400ef00,
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x6073f3d9, 0x573ea3ef, 0xf0764322, 0xf8dacef7,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0xe0830635, 0xb9c7326b, 0x27f96395, 0x7078f754,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,

View File

@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
@ -1370,7 +1370,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
0xb232f900, 0xbdb2b2a1, 0x3ef00304, 0xbf00a6f0, 0x01009019, 0x93a61ab2, 0x0a090df4, 0xa6f73e03,
0xf493a600, 0x020a091b, 0x00a6f73e, 0x00a6aa7e, 0x08f402a6, 0xfba4bddd, 0xf830f431, 0x0005dcdf,
0xbf82f900, 0x0149feff, 0xb2289990, 0xb29fa0a3, 0x00a9b3b8, 0xb0b30084, 0x47fe7f00, 0x05a49801,
0x54bd24bd, 0x779014bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00,
0x14bd54bd, 0x779024bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00,
0xf49fa6ff, 0x643d090b, 0x00a74f3e, 0x90015590, 0x04a60100, 0x33d908f4, 0x90070060, 0x24bc0111,
0x03399820, 0x18f429a6, 0xbd01060b, 0xa7523e04, 0xb24bb200, 0x16fc7e1a, 0xf45aa600, 0x1190060d,
0x06399801, 0x19a6f43d, 0x0f050cf4, 0xbd8f2001, 0xa7973ea4, 0xfe020a00, 0x99900149, 0xd99fbf28,
@ -1420,7 +1420,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
0x070b943a, 0xb200804c, 0xb7797e2d, 0x0ca1b000, 0xb600adb3, 0x05291801, 0x76042f18, 0xf4f00894,
0xe59fffff, 0xe966ff09, 0x01980bf5, 0xffffe9e4, 0x08f589a6, 0xf4bd018e, 0x18902fbc, 0x9d330999,
0x90018200, 0xf4b301ff, 0xfc3ef207, 0x8e3c00ae, 0xf59f26f2, 0xc4016d08, 0x94f0fffd, 0x529dbcff,
0x0df456a6, 0x9065b205, 0xe4bd10d9, 0x3db029bc, 0x3ec43da4, 0xb100ada7, 0xf5006fd6, 0xb401450c,
0x0df456a6, 0x9065b205, 0xa43d10d9, 0x3db029bc, 0x3ee4bdc4, 0xb100ada7, 0xf5006fd6, 0xb401450c,
0xbe3c0b10, 0xf81e3c98, 0x0bf4f926, 0xff94f017, 0xfd009939, 0x9033049f, 0x010a0600, 0x0ce9bf3c,
0x01ee9001, 0xa601dd90, 0xce08f4e5, 0xed00c933, 0xf0293f00, 0x0bf40894, 0x00a93308, 0x94bd00d0,
0x91b03ab2, 0x1391b014, 0x301291b0, 0x4bfe5b91, 0x5bbb9001, 0x00a6f97e, 0xadb3a0b2, 0x3400ef00,
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x6073f3d9, 0x573ea3ef, 0xf0764322, 0xf8dacef7,
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0xe0830635, 0xb9c7326b, 0x27f96395, 0x7078f754,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -130,3 +130,21 @@ nvswitch_inforom_bbx_get_sxid
return status;
}
NvlStatus
nvswitch_inforom_bbx_get_data
(
nvswitch_device *device,
NvU8 dataType,
void *params
)
{
NvlStatus status;
status = device->hal.nvswitch_bbx_get_data(device, dataType, params);
if (status != NVL_SUCCESS)
{
NVSWITCH_PRINT(device, ERROR, "%s: (type=%d) failed, status=%d\n", __FUNCTION__, dataType, status);
}
return status;
}

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -32,6 +32,7 @@
#include "nvVer.h"
#include "regkey_nvswitch.h"
#include "inforom/inforom_nvl_v3_nvswitch.h"
#include "soe/soeififr.h"
//
// TODO: Split individual object hals to their own respective files
@ -1280,3 +1281,14 @@ nvswitch_bbx_get_sxid_lr10
return -NVL_ERR_NOT_SUPPORTED;
}
NvlStatus
nvswitch_bbx_get_data_lr10
(
nvswitch_device *device,
NvU8 dataType,
void *params
)
{
return -NVL_ERR_NOT_SUPPORTED;
}

Some files were not shown because too many files have changed in this diff Show More