mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-02-27 09:54:14 +01:00
535.43.24
This commit is contained in:
parent
2a3b58b8c8
commit
e558660fc2
11
CHANGELOG.md
11
CHANGELOG.md
@ -2,7 +2,7 @@
|
||||
|
||||
## Release 535 Entries
|
||||
|
||||
### [535.54.03] 2023-06-14
|
||||
### [535.43.24] 2024-01-31
|
||||
|
||||
### [535.43.23] 2024-01-24
|
||||
|
||||
@ -30,6 +30,7 @@
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fixed building main against current centos stream 8 fails, [#550](https://github.com/NVIDIA/open-gpu-kernel-modules/issues/550) by @airlied
|
||||
- Fixed console restore with traditional VGA consoles.
|
||||
|
||||
#### Added
|
||||
@ -58,6 +59,14 @@
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
### [525.147.05] 2023-10-31
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix nvidia_p2p_get_pages(): Fix double-free in register-callback error path, [#557](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/557) by @BrendanCunningham
|
||||
|
||||
### [525.125.06] 2023-06-26
|
||||
|
||||
### [525.116.04] 2023-05-09
|
||||
|
||||
### [525.116.03] 2023-04-25
|
||||
|
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 535.43.23.
|
||||
version 535.43.24.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
535.43.23 driver release. This can be achieved by installing
|
||||
535.43.24 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -180,7 +180,7 @@ software applications.
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 535.43.23 release,
|
||||
(see the table below). However, in the 535.43.24 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.43.23/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.43.24/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
|
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.23\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.43.24\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
@ -123,6 +123,9 @@ ifneq ($(wildcard /proc/sgi_uv),)
|
||||
EXTRA_CFLAGS += -DNV_CONFIG_X86_UV
|
||||
endif
|
||||
|
||||
ifdef VGX_FORCE_VFIO_PCI_CORE
|
||||
EXTRA_CFLAGS += -DNV_VGPU_FORCE_VFIO_PCI_CORE
|
||||
endif
|
||||
|
||||
#
|
||||
# The conftest.sh script tests various aspects of the target kernel.
|
||||
|
@ -2067,4 +2067,7 @@ typedef enum
|
||||
#include <linux/clk-provider.h>
|
||||
#endif
|
||||
|
||||
#define NV_EXPORT_SYMBOL(symbol) EXPORT_SYMBOL_GPL(symbol)
|
||||
#define NV_CHECK_EXPORT_SYMBOL(symbol) NV_IS_EXPORT_SYMBOL_PRESENT_##symbol
|
||||
|
||||
#endif /* _NV_LINUX_H_ */
|
||||
|
@ -924,6 +924,7 @@ NV_STATUS NV_API_CALL rm_ioctl (nvidia_stack_t *, nv_state_t *
|
||||
NvBool NV_API_CALL rm_isr (nvidia_stack_t *, nv_state_t *, NvU32 *);
|
||||
void NV_API_CALL rm_isr_bh (nvidia_stack_t *, nv_state_t *);
|
||||
void NV_API_CALL rm_isr_bh_unlocked (nvidia_stack_t *, nv_state_t *);
|
||||
NvBool NV_API_CALL rm_is_msix_allowed (nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_power_management (nvidia_stack_t *, nv_state_t *, nv_pm_action_t);
|
||||
NV_STATUS NV_API_CALL rm_stop_user_channels (nvidia_stack_t *, nv_state_t *);
|
||||
NV_STATUS NV_API_CALL rm_restart_user_channels (nvidia_stack_t *, nv_state_t *);
|
||||
|
@ -207,9 +207,13 @@ enum os_pci_req_atomics_type {
|
||||
OS_INTF_PCIE_REQ_ATOMICS_128BIT
|
||||
};
|
||||
NV_STATUS NV_API_CALL os_enable_pci_req_atomics (void *, enum os_pci_req_atomics_type);
|
||||
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage (NvS32, NvU64 *, NvU64 *);
|
||||
NV_STATUS NV_API_CALL os_numa_add_gpu_memory (void *, NvU64, NvU64, NvU32 *);
|
||||
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory (void *, NvU64, NvU64, NvU32);
|
||||
NV_STATUS NV_API_CALL os_offline_page_at_address(NvU64 address);
|
||||
void* NV_API_CALL os_get_pid_info(void);
|
||||
void NV_API_CALL os_put_pid_info(void *pid_info);
|
||||
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid);
|
||||
|
||||
extern NvU32 os_page_size;
|
||||
extern NvU64 os_page_mask;
|
||||
|
@ -316,7 +316,7 @@ export_symbol_present_conftest() {
|
||||
SYMBOL="$1"
|
||||
TAB=' '
|
||||
|
||||
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_SYMBOL.*\$" \
|
||||
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_SYMBOL\(_GPL\)\?\s*\$" \
|
||||
"$OUTPUT/Module.symvers" >/dev/null 2>&1; then
|
||||
echo "#define NV_IS_EXPORT_SYMBOL_PRESENT_$SYMBOL 1" |
|
||||
append_conftest "symbols"
|
||||
@ -337,7 +337,7 @@ export_symbol_gpl_conftest() {
|
||||
SYMBOL="$1"
|
||||
TAB=' '
|
||||
|
||||
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_\(UNUSED_\)*SYMBOL_GPL\$" \
|
||||
if grep -e "${TAB}${SYMBOL}${TAB}.*${TAB}EXPORT_\(UNUSED_\)*SYMBOL_GPL\s*\$" \
|
||||
"$OUTPUT/Module.symvers" >/dev/null 2>&1; then
|
||||
echo "#define NV_IS_EXPORT_SYMBOL_GPL_$SYMBOL 1" |
|
||||
append_conftest "symbols"
|
||||
@ -4468,6 +4468,24 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE" "" "types"
|
||||
;;
|
||||
|
||||
mmu_notifier_ops_arch_invalidate_secondary_tlbs)
|
||||
#
|
||||
# Determine if the mmu_notifier_ops struct has the
|
||||
# 'arch_invalidate_secondary_tlbs' member.
|
||||
#
|
||||
# struct mmu_notifier_ops.invalidate_range was renamed to
|
||||
# arch_invalidate_secondary_tlbs by commit 1af5a8109904
|
||||
# ("mmu_notifiers: rename invalidate_range notifier") due to be
|
||||
# added in v6.6
|
||||
CODE="
|
||||
#include <linux/mmu_notifier.h>
|
||||
int conftest_mmu_notifier_ops_arch_invalidate_secondary_tlbs(void) {
|
||||
return offsetof(struct mmu_notifier_ops, arch_invalidate_secondary_tlbs);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS" "" "types"
|
||||
;;
|
||||
|
||||
drm_format_num_planes)
|
||||
#
|
||||
# Determine if drm_format_num_planes() function is present.
|
||||
@ -5636,23 +5654,6 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_GPIO_TO_IRQ_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
migrate_vma_setup)
|
||||
#
|
||||
# Determine if migrate_vma_setup() function is present
|
||||
#
|
||||
# migrate_vma_setup() function was added by commit
|
||||
# a7d1f22bb74f32cf3cd93f52776007e161f1a738 ("mm: turn migrate_vma
|
||||
# upside down) in v5.4.
|
||||
# (2019-08-20).
|
||||
CODE="
|
||||
#include <linux/migrate.h>
|
||||
int conftest_migrate_vma_setup(void) {
|
||||
migrate_vma_setup();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MIGRATE_VMA_SETUP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
migrate_vma_added_flags)
|
||||
#
|
||||
# Determine if migrate_vma structure has flags
|
||||
@ -5743,23 +5744,25 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_IOASID_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
mm_pasid_set)
|
||||
mm_pasid_drop)
|
||||
#
|
||||
# Determine if mm_pasid_set() function is present
|
||||
# Determine if mm_pasid_drop() function is present
|
||||
#
|
||||
# Added by commit 701fac40384f ("iommu/sva: Assign a PASID to mm
|
||||
# on PASID allocation and free it on mm exit") in v5.18.
|
||||
# Moved to linux/iommu.h in commit cd3891158a77 ("iommu/sva: Move
|
||||
# PASID helpers to sva code") in v6.4.
|
||||
#
|
||||
# mm_pasid_set() function was added by commit
|
||||
# 701fac40384f07197b106136012804c3cae0b3de (iommu/sva: Assign a
|
||||
# PASID to mm on PASID allocation and free it on mm exit) in v5.18.
|
||||
# (2022-02-15).
|
||||
CODE="
|
||||
#if defined(NV_LINUX_SCHED_MM_H_PRESENT)
|
||||
#include <linux/sched/mm.h>
|
||||
#endif
|
||||
void conftest_mm_pasid_set(void) {
|
||||
mm_pasid_set();
|
||||
#include <linux/iommu.h>
|
||||
void conftest_mm_pasid_drop(void) {
|
||||
mm_pasid_drop();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MM_PASID_SET_PRESENT" "" "functions"
|
||||
compile_check_conftest "$CODE" "NV_MM_PASID_DROP_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_crtc_state_has_no_vblank)
|
||||
@ -6279,6 +6282,21 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MEMORY_FAILURE_MF_SW_SIMULATED_DEFINED" "" "types"
|
||||
;;
|
||||
|
||||
crypto_tfm_ctx_aligned)
|
||||
# Determine if 'crypto_tfm_ctx_aligned' is defined.
|
||||
#
|
||||
# Removed by commit 25c74a39e0f6 ("crypto: hmac - remove unnecessary
|
||||
# alignment logic") in v6.7.
|
||||
#
|
||||
CODE="
|
||||
#include <crypto/algapi.h>
|
||||
void conftest_crypto_tfm_ctx_aligned(void) {
|
||||
(void)crypto_tfm_ctx_aligned();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_CRYPTO_TFM_CTX_ALIGNED_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
crypto)
|
||||
#
|
||||
# Determine if we support various crypto functions.
|
||||
@ -6341,6 +6359,22 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MEMPOLICY_HAS_HOME_NODE" "" "types"
|
||||
;;
|
||||
|
||||
mpol_preferred_many_present)
|
||||
#
|
||||
# Determine if MPOL_PREFERRED_MANY enum is present or not
|
||||
#
|
||||
# Added by commit b27abaccf8e8b ("mm/mempolicy: add
|
||||
# MPOL_PREFERRED_MANY for multiple preferred nodes") in
|
||||
# v5.15
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mempolicy.h>
|
||||
int mpol_preferred_many = MPOL_PREFERRED_MANY;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_MPOL_PREFERRED_MANY_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
mmu_interval_notifier)
|
||||
#
|
||||
# Determine if mmu_interval_notifier struct is present or not
|
||||
@ -6356,6 +6390,21 @@ compile_test() {
|
||||
compile_check_conftest "$CODE" "NV_MMU_INTERVAL_NOTIFIER" "" "types"
|
||||
;;
|
||||
|
||||
drm_unlocked_ioctl_flag_present)
|
||||
# Determine if DRM_UNLOCKED IOCTL flag is present.
|
||||
#
|
||||
# DRM_UNLOCKED was removed by commit 2798ffcc1d6a ("drm: Remove
|
||||
# locking for legacy ioctls and DRM_UNLOCKED") in Linux
|
||||
# next-20231208.
|
||||
CODE="
|
||||
#if defined(NV_DRM_DRM_IOCTL_H_PRESENT)
|
||||
#include <drm/drm_ioctl.h>
|
||||
#endif
|
||||
int flags = DRM_UNLOCKED;"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
# When adding a new conftest entry, please use the correct format for
|
||||
# specifying the relevant upstream Linux kernel commit.
|
||||
#
|
||||
@ -6680,18 +6729,9 @@ case "$5" in
|
||||
VFIO_PCI_CORE_PRESENT=1
|
||||
fi
|
||||
|
||||
# When this sanity check is run via nvidia-installer, it sets ARCH as aarch64.
|
||||
# But, when it is run via Kbuild, ARCH is set as arm64
|
||||
if [ "$ARCH" = "aarch64" ]; then
|
||||
ARCH="arm64"
|
||||
fi
|
||||
|
||||
if [ "$VFIO_IOMMU_PRESENT" != "0" ] && [ "$KVM_PRESENT" != "0" ] ; then
|
||||
|
||||
# On x86_64, vGPU requires MDEV framework to be present.
|
||||
# On aarch64, vGPU requires MDEV or vfio-pci-core framework to be present.
|
||||
if ([ "$ARCH" = "arm64" ] && ([ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ])) ||
|
||||
([ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" != "0" ];) then
|
||||
# vGPU requires either MDEV or vfio-pci-core framework to be present.
|
||||
if [ "$VFIO_MDEV_PRESENT" != "0" ] || [ "$VFIO_PCI_CORE_PRESENT" != "0" ]; then
|
||||
exit 0
|
||||
fi
|
||||
fi
|
||||
@ -6702,14 +6742,10 @@ case "$5" in
|
||||
echo "CONFIG_VFIO_IOMMU_TYPE1";
|
||||
fi
|
||||
|
||||
if [ "$ARCH" = "arm64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
|
||||
if [ "$VFIO_MDEV_PRESENT" = "0" ] && [ "$VFIO_PCI_CORE_PRESENT" = "0" ]; then
|
||||
echo "either CONFIG_VFIO_MDEV or CONFIG_VFIO_PCI_CORE";
|
||||
fi
|
||||
|
||||
if [ "$ARCH" = "x86_64" ] && [ "$VFIO_MDEV_PRESENT" = "0" ]; then
|
||||
echo "CONFIG_VFIO_MDEV";
|
||||
fi
|
||||
|
||||
if [ "$KVM_PRESENT" = "0" ]; then
|
||||
echo "CONFIG_KVM";
|
||||
fi
|
||||
|
@ -1312,9 +1312,21 @@ static const struct drm_ioctl_desc nv_drm_ioctls[] = {
|
||||
DRM_RENDER_ALLOW|DRM_UNLOCKED),
|
||||
#endif
|
||||
|
||||
/*
|
||||
* DRM_UNLOCKED is implicit for all non-legacy DRM driver IOCTLs since Linux
|
||||
* v4.10 commit fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions"
|
||||
* (Linux v4.4 commit ea487835e887 "drm: Enforce unlocked ioctl operation
|
||||
* for kms driver ioctls" previously did it only for drivers that set the
|
||||
* DRM_MODESET flag), so this will race with SET_CLIENT_CAP. Linux v4.11
|
||||
* commit dcf727ab5d17 "drm: setclientcap doesn't need the drm BKL" also
|
||||
* removed locking from SET_CLIENT_CAP so there is no use attempting to lock
|
||||
* manually. The latter commit acknowledges that this can expose userspace
|
||||
* to inconsistent behavior when racing with itself, but accepts that risk.
|
||||
*/
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CLIENT_CAPABILITY,
|
||||
nv_drm_get_client_capability_ioctl,
|
||||
0),
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
DRM_IOCTL_DEF_DRV(NVIDIA_GET_CRTC_CRC32,
|
||||
nv_drm_get_crtc_crc32_ioctl,
|
||||
|
@ -243,6 +243,15 @@ static int __nv_drm_nvkms_gem_obj_init(
|
||||
NvU64 *pages = NULL;
|
||||
NvU32 numPages = 0;
|
||||
|
||||
if ((size % PAGE_SIZE) != 0) {
|
||||
NV_DRM_DEV_LOG_ERR(
|
||||
nv_dev,
|
||||
"NvKmsKapiMemory 0x%p size should be in a multiple of page size to "
|
||||
"create a gem object",
|
||||
pMemory);
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
nv_nvkms_memory->pPhysicalAddress = NULL;
|
||||
nv_nvkms_memory->pWriteCombinedIORemapAddress = NULL;
|
||||
nv_nvkms_memory->physically_mapped = false;
|
||||
|
@ -582,6 +582,19 @@ static inline int nv_drm_format_num_planes(uint32_t format)
|
||||
|
||||
#endif /* defined(NV_DRM_FORMAT_MODIFIERS_PRESENT) */
|
||||
|
||||
/*
|
||||
* DRM_UNLOCKED was removed with linux-next commit 2798ffcc1d6a ("drm: Remove
|
||||
* locking for legacy ioctls and DRM_UNLOCKED"), but it was previously made
|
||||
* implicit for all non-legacy DRM driver IOCTLs since Linux v4.10 commit
|
||||
* fa5386459f06 "drm: Used DRM_LEGACY for all legacy functions" (Linux v4.4
|
||||
* commit ea487835e887 "drm: Enforce unlocked ioctl operation for kms driver
|
||||
* ioctls" previously did it only for drivers that set the DRM_MODESET flag), so
|
||||
* it was effectively a no-op anyway.
|
||||
*/
|
||||
#if !defined(NV_DRM_UNLOCKED_IOCTL_FLAG_PRESENT)
|
||||
#define DRM_UNLOCKED 0
|
||||
#endif
|
||||
|
||||
/*
|
||||
* drm_vma_offset_exact_lookup_locked() were added
|
||||
* by kernel commit 2225cfe46bcc which was Signed-off-by:
|
||||
|
@ -133,3 +133,4 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_lookup
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_connector_put
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_driver_has_dumb_destroy
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += drm_unlocked_ioctl_flag_present
|
||||
|
@ -68,6 +68,9 @@ module_param_named(output_rounding_fix, output_rounding_fix, bool, 0400);
|
||||
static bool disable_vrr_memclk_switch = false;
|
||||
module_param_named(disable_vrr_memclk_switch, disable_vrr_memclk_switch, bool, 0400);
|
||||
|
||||
static bool opportunistic_display_sync = true;
|
||||
module_param_named(opportunistic_display_sync, opportunistic_display_sync, bool, 0400);
|
||||
|
||||
/* These parameters are used for fault injection tests. Normally the defaults
|
||||
* should be used. */
|
||||
MODULE_PARM_DESC(fail_malloc, "Fail the Nth call to nvkms_alloc");
|
||||
@ -99,6 +102,11 @@ NvBool nvkms_disable_vrr_memclk_switch(void)
|
||||
return disable_vrr_memclk_switch;
|
||||
}
|
||||
|
||||
NvBool nvkms_opportunistic_display_sync(void)
|
||||
{
|
||||
return opportunistic_display_sync;
|
||||
}
|
||||
|
||||
#define NVKMS_SYNCPT_STUBS_NEEDED
|
||||
|
||||
/*************************************************************************
|
||||
@ -200,9 +208,23 @@ static inline int nvkms_read_trylock_pm_lock(void)
|
||||
|
||||
static inline void nvkms_read_lock_pm_lock(void)
|
||||
{
|
||||
while (!down_read_trylock(&nvkms_pm_lock)) {
|
||||
try_to_freeze();
|
||||
cond_resched();
|
||||
if ((current->flags & PF_NOFREEZE)) {
|
||||
/*
|
||||
* Non-freezable tasks (i.e. kthreads in this case) don't have to worry
|
||||
* about being frozen during system suspend, but do need to block so
|
||||
* that the CPU can go idle during s2idle. Do a normal uninterruptible
|
||||
* blocking wait for the PM lock.
|
||||
*/
|
||||
down_read(&nvkms_pm_lock);
|
||||
} else {
|
||||
/*
|
||||
* For freezable tasks, make sure we give the kernel an opportunity to
|
||||
* freeze if taking the PM lock fails.
|
||||
*/
|
||||
while (!down_read_trylock(&nvkms_pm_lock)) {
|
||||
try_to_freeze();
|
||||
cond_resched();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -99,6 +99,7 @@ typedef struct {
|
||||
NvBool nvkms_output_rounding_fix(void);
|
||||
|
||||
NvBool nvkms_disable_vrr_memclk_switch(void);
|
||||
NvBool nvkms_opportunistic_display_sync(void);
|
||||
|
||||
void nvkms_call_rm (void *ops);
|
||||
void* nvkms_alloc (size_t size,
|
||||
|
@ -1,20 +1,25 @@
|
||||
/* SPDX-License-Identifier: Linux-OpenIB */
|
||||
/*
|
||||
* Copyright (c) 2006, 2007 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2007, 2008 Mellanox Technologies. All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
* This software is available to you under a choice of one of two
|
||||
* licenses. You may choose to be licensed under the terms of the GNU
|
||||
* General Public License (GPL) Version 2, available from the file
|
||||
* COPYING in the main directory of this source tree, or the
|
||||
* OpenIB.org BSD license below:
|
||||
*
|
||||
* - Redistributions of source code must retain the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer.
|
||||
* Redistribution and use in source and binary forms, with or
|
||||
* without modification, are permitted provided that the following
|
||||
* conditions are met:
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
* - Redistributions of source code must retain the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer.
|
||||
*
|
||||
* - Redistributions in binary form must reproduce the above
|
||||
* copyright notice, this list of conditions and the following
|
||||
* disclaimer in the documentation and/or other materials
|
||||
* provided with the distribution.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
@ -43,7 +48,9 @@
|
||||
|
||||
MODULE_AUTHOR("Yishai Hadas");
|
||||
MODULE_DESCRIPTION("NVIDIA GPU memory plug-in");
|
||||
MODULE_LICENSE("Linux-OpenIB");
|
||||
|
||||
MODULE_LICENSE("Dual BSD/GPL");
|
||||
|
||||
MODULE_VERSION(DRV_VERSION);
|
||||
enum {
|
||||
NV_MEM_PEERDIRECT_SUPPORT_DEFAULT = 0,
|
||||
@ -53,7 +60,13 @@ static int peerdirect_support = NV_MEM_PEERDIRECT_SUPPORT_DEFAULT;
|
||||
module_param(peerdirect_support, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(peerdirect_support, "Set level of support for Peer-direct, 0 [default] or 1 [legacy, for example MLNX_OFED 4.9 LTS]");
|
||||
|
||||
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d " FMT, __FUNCTION__, __LINE__, ## ARGS)
|
||||
|
||||
#define peer_err(FMT, ARGS...) printk(KERN_ERR "nvidia-peermem" " %s:%d ERROR " FMT, __FUNCTION__, __LINE__, ## ARGS)
|
||||
#ifdef NV_MEM_DEBUG
|
||||
#define peer_trace(FMT, ARGS...) printk(KERN_DEBUG "nvidia-peermem" " %s:%d TRACE " FMT, __FUNCTION__, __LINE__, ## ARGS)
|
||||
#else
|
||||
#define peer_trace(FMT, ARGS...) do {} while (0)
|
||||
#endif
|
||||
|
||||
#if defined(NV_MLNX_IB_PEER_MEM_SYMBOLS_PRESENT)
|
||||
|
||||
@ -74,7 +87,10 @@ invalidate_peer_memory mem_invalidate_callback;
|
||||
static void *reg_handle = NULL;
|
||||
static void *reg_handle_nc = NULL;
|
||||
|
||||
#define NV_MEM_CONTEXT_MAGIC ((u64)0xF1F4F1D0FEF0DAD0ULL)
|
||||
|
||||
struct nv_mem_context {
|
||||
u64 pad1;
|
||||
struct nvidia_p2p_page_table *page_table;
|
||||
struct nvidia_p2p_dma_mapping *dma_mapping;
|
||||
u64 core_context;
|
||||
@ -86,8 +102,22 @@ struct nv_mem_context {
|
||||
struct task_struct *callback_task;
|
||||
int sg_allocated;
|
||||
struct sg_table sg_head;
|
||||
u64 pad2;
|
||||
};
|
||||
|
||||
#define NV_MEM_CONTEXT_CHECK_OK(MC) ({ \
|
||||
struct nv_mem_context *mc = (MC); \
|
||||
int rc = ((0 != mc) && \
|
||||
(READ_ONCE(mc->pad1) == NV_MEM_CONTEXT_MAGIC) && \
|
||||
(READ_ONCE(mc->pad2) == NV_MEM_CONTEXT_MAGIC)); \
|
||||
if (!rc) { \
|
||||
peer_trace("invalid nv_mem_context=%px pad1=%016llx pad2=%016llx\n", \
|
||||
mc, \
|
||||
mc?mc->pad1:0, \
|
||||
mc?mc->pad2:0); \
|
||||
} \
|
||||
rc; \
|
||||
})
|
||||
|
||||
static void nv_get_p2p_free_callback(void *data)
|
||||
{
|
||||
@ -97,8 +127,9 @@ static void nv_get_p2p_free_callback(void *data)
|
||||
struct nvidia_p2p_dma_mapping *dma_mapping = NULL;
|
||||
|
||||
__module_get(THIS_MODULE);
|
||||
if (!nv_mem_context) {
|
||||
peer_err("nv_get_p2p_free_callback -- invalid nv_mem_context\n");
|
||||
|
||||
if (!NV_MEM_CONTEXT_CHECK_OK(nv_mem_context)) {
|
||||
peer_err("detected invalid context, skipping further processing\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -169,9 +200,11 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
|
||||
/* Error case handled as not mine */
|
||||
return 0;
|
||||
|
||||
nv_mem_context->pad1 = NV_MEM_CONTEXT_MAGIC;
|
||||
nv_mem_context->page_virt_start = addr & GPU_PAGE_MASK;
|
||||
nv_mem_context->page_virt_end = (addr + size + GPU_PAGE_SIZE - 1) & GPU_PAGE_MASK;
|
||||
nv_mem_context->mapped_size = nv_mem_context->page_virt_end - nv_mem_context->page_virt_start;
|
||||
nv_mem_context->pad2 = NV_MEM_CONTEXT_MAGIC;
|
||||
|
||||
ret = nvidia_p2p_get_pages(0, 0, nv_mem_context->page_virt_start, nv_mem_context->mapped_size,
|
||||
&nv_mem_context->page_table, nv_mem_dummy_callback, nv_mem_context);
|
||||
@ -195,6 +228,7 @@ static int nv_mem_acquire(unsigned long addr, size_t size, void *peer_mem_privat
|
||||
return 1;
|
||||
|
||||
err:
|
||||
memset(nv_mem_context, 0, sizeof(*nv_mem_context));
|
||||
kfree(nv_mem_context);
|
||||
|
||||
/* Error case handled as not mine */
|
||||
@ -342,6 +376,7 @@ static void nv_mem_release(void *context)
|
||||
sg_free_table(&nv_mem_context->sg_head);
|
||||
nv_mem_context->sg_allocated = 0;
|
||||
}
|
||||
memset(nv_mem_context, 0, sizeof(*nv_mem_context));
|
||||
kfree(nv_mem_context);
|
||||
module_put(THIS_MODULE);
|
||||
return;
|
||||
|
@ -81,8 +81,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_memory_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += set_pages_uc
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ktime_get_raw_ts64
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ioasid_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_set
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += migrate_vma_setup
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mm_pasid_drop
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmget_not_zero
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mmgrab
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += iommu_sva_bind_device_has_drvdata_arg
|
||||
@ -100,6 +99,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += kmem_cache_has_kobj_remove_work
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += sysfs_slab_unlink
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_fault_t
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_invalidate_range
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_notifier_ops_arch_invalidate_secondary_tlbs
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += proc_ops
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += timespec64
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mm_has_mmap_lock
|
||||
@ -110,6 +110,8 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_mm_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += handle_mm_fault_has_pt_regs_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_unified_nodes
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mempolicy_has_home_node
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mpol_preferred_many_present
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += mmu_interval_notifier
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_int_active_memcg
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_migrate_vma_setup
|
||||
|
@ -571,7 +571,6 @@ static void uvm_vm_open_managed_entry(struct vm_area_struct *vma)
|
||||
static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(vma->vm_file);
|
||||
uvm_processor_id_t gpu_id;
|
||||
bool make_zombie = false;
|
||||
|
||||
if (current->mm != NULL)
|
||||
@ -606,12 +605,6 @@ static void uvm_vm_close_managed(struct vm_area_struct *vma)
|
||||
|
||||
uvm_destroy_vma_managed(vma, make_zombie);
|
||||
|
||||
// Notify GPU address spaces that the fault buffer needs to be flushed to
|
||||
// avoid finding stale entries that can be attributed to new VA ranges
|
||||
// reallocated at the same address.
|
||||
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
|
||||
}
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
if (current->mm != NULL)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -94,4 +94,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
}
|
||||
|
@ -101,4 +101,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018 NVIDIA Corporation
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -107,10 +107,10 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 addr,
|
||||
size_t size,
|
||||
uvm_fault_client_type_t client_type)
|
||||
static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU64 addr,
|
||||
size_t size,
|
||||
uvm_fault_client_type_t client_type)
|
||||
{
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate;
|
||||
|
||||
@ -119,12 +119,12 @@ static void flush_tlb_write_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
else
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
|
||||
|
||||
if (!ats_invalidate->write_faults_in_batch) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->write_faults_tlb_batch);
|
||||
ats_invalidate->write_faults_in_batch = true;
|
||||
if (!ats_invalidate->tlb_batch_pending) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
|
||||
ats_invalidate->tlb_batch_pending = true;
|
||||
}
|
||||
|
||||
uvm_tlb_batch_invalidate(&ats_invalidate->write_faults_tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
|
||||
uvm_tlb_batch_invalidate(&ats_invalidate->tlb_batch, addr, size, PAGE_SIZE, UVM_MEMBAR_NONE);
|
||||
}
|
||||
|
||||
static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
@ -149,7 +149,11 @@ static void ats_batch_select_residency(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
mode = vma_policy->mode;
|
||||
|
||||
if ((mode == MPOL_BIND) || (mode == MPOL_PREFERRED_MANY) || (mode == MPOL_PREFERRED)) {
|
||||
if ((mode == MPOL_BIND)
|
||||
#if defined(NV_MPOL_PREFERRED_MANY_PRESENT)
|
||||
|| (mode == MPOL_PREFERRED_MANY)
|
||||
#endif
|
||||
|| (mode == MPOL_PREFERRED)) {
|
||||
int home_node = NUMA_NO_NODE;
|
||||
|
||||
#if defined(NV_MEMPOLICY_HAS_HOME_NODE)
|
||||
@ -467,6 +471,10 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_and(write_fault_mask, write_fault_mask, read_fault_mask);
|
||||
else
|
||||
uvm_page_mask_zero(write_fault_mask);
|
||||
|
||||
// There are no pending faults beyond write faults to RO region.
|
||||
if (uvm_page_mask_empty(read_fault_mask))
|
||||
return status;
|
||||
}
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
@ -489,6 +497,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
if (vma->vm_flags & VM_WRITE) {
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
uvm_ats_smmu_invalidate_tlbs(gpu_va_space, start, length);
|
||||
|
||||
// The Linux kernel never invalidates TLB entries on mapping
|
||||
// permission upgrade. This is a problem if the GPU has cached
|
||||
@ -499,7 +508,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// infinite loop because we just forward the fault to the Linux
|
||||
// kernel and it will see that the permissions in the page table are
|
||||
// correct. Therefore, we flush TLB entries on ATS write faults.
|
||||
flush_tlb_write_faults(gpu_va_space, start, length, client_type);
|
||||
flush_tlb_va_region(gpu_va_space, start, length, client_type);
|
||||
}
|
||||
else {
|
||||
uvm_page_mask_region_fill(reads_serviced_mask, subregion);
|
||||
@ -522,6 +531,15 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
|
||||
uvm_page_mask_region_fill(faults_serviced_mask, subregion);
|
||||
|
||||
// Similarly to permission upgrade scenario, discussed above, GPU
|
||||
// will not re-fetch the entry if the PTE is invalid and page size
|
||||
// is 4K. To avoid infinite faulting loop, invalidate TLB for every
|
||||
// new translation written explicitly like in the case of permission
|
||||
// upgrade.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K)
|
||||
flush_tlb_va_region(gpu_va_space, start, length, client_type);
|
||||
|
||||
}
|
||||
|
||||
return status;
|
||||
@ -556,7 +574,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
|
||||
if (!ats_invalidate->write_faults_in_batch)
|
||||
if (!ats_invalidate->tlb_batch_pending)
|
||||
return NV_OK;
|
||||
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
@ -568,7 +586,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
"Invalidate ATS entries");
|
||||
|
||||
if (status == NV_OK) {
|
||||
uvm_tlb_batch_end(&ats_invalidate->write_faults_tlb_batch, &push, UVM_MEMBAR_NONE);
|
||||
uvm_tlb_batch_end(&ats_invalidate->tlb_batch, &push, UVM_MEMBAR_NONE);
|
||||
uvm_push_end(&push);
|
||||
|
||||
// Add this push to the GPU's tracker so that fault replays/clears can
|
||||
@ -576,8 +594,7 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
status = uvm_tracker_add_push_safe(out_tracker, &push);
|
||||
}
|
||||
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
|
@ -52,7 +52,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);
|
||||
|
||||
// This function performs pending TLB invalidations for ATS and clears the
|
||||
// ats_invalidate->write_faults_in_batch flag
|
||||
// ats_invalidate->tlb_batch_pending flag
|
||||
NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate,
|
||||
uvm_tracker_t *out_tracker);
|
||||
|
@ -29,8 +29,13 @@
|
||||
#include "uvm_va_space.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
|
||||
#include <asm/io.h>
|
||||
#include <linux/log2.h>
|
||||
#include <linux/iommu.h>
|
||||
#include <linux/mm_types.h>
|
||||
#include <linux/acpi.h>
|
||||
#include <linux/device.h>
|
||||
#include <linux/mmu_context.h>
|
||||
|
||||
// linux/sched/mm.h is needed for mmget_not_zero and mmput to get the mm
|
||||
// reference required for the iommu_sva_bind_device() call. This header is not
|
||||
@ -46,17 +51,276 @@
|
||||
#define UVM_IOMMU_SVA_BIND_DEVICE(dev, mm) iommu_sva_bind_device(dev, mm)
|
||||
#endif
|
||||
|
||||
// Type to represent a 128-bit SMMU command queue command.
|
||||
struct smmu_cmd {
|
||||
NvU64 low;
|
||||
NvU64 high;
|
||||
};
|
||||
|
||||
// Base address of SMMU CMDQ-V for GSMMU0.
|
||||
#define SMMU_CMDQV_BASE_ADDR(smmu_base) (smmu_base + 0x200000)
|
||||
#define SMMU_CMDQV_BASE_LEN 0x00830000
|
||||
|
||||
// CMDQV configuration is done by firmware but we check status here.
|
||||
#define SMMU_CMDQV_CONFIG 0x0
|
||||
#define SMMU_CMDQV_CONFIG_CMDQV_EN BIT(0)
|
||||
|
||||
// Used to map a particular VCMDQ to a VINTF.
|
||||
#define SMMU_CMDQV_CMDQ_ALLOC_MAP(vcmdq_id) (0x200 + 0x4 * (vcmdq_id))
|
||||
#define SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC BIT(0)
|
||||
|
||||
// Shift for the field containing the index of the virtual interface
|
||||
// owning the VCMDQ.
|
||||
#define SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT 15
|
||||
|
||||
// Base address for the VINTF registers.
|
||||
#define SMMU_VINTF_BASE_ADDR(cmdqv_base_addr, vintf_id) (cmdqv_base_addr + 0x1000 + 0x100 * (vintf_id))
|
||||
|
||||
// Virtual interface (VINTF) configuration registers. The WAR only
|
||||
// works on baremetal so we need to configure ourselves as the
|
||||
// hypervisor owner.
|
||||
#define SMMU_VINTF_CONFIG 0x0
|
||||
#define SMMU_VINTF_CONFIG_ENABLE BIT(0)
|
||||
#define SMMU_VINTF_CONFIG_HYP_OWN BIT(17)
|
||||
|
||||
#define SMMU_VINTF_STATUS 0x0
|
||||
#define SMMU_VINTF_STATUS_ENABLED BIT(0)
|
||||
|
||||
// Caclulates the base address for a particular VCMDQ instance.
|
||||
#define SMMU_VCMDQ_BASE_ADDR(cmdqv_base_addr, vcmdq_id) (cmdqv_base_addr + 0x10000 + 0x80 * (vcmdq_id))
|
||||
|
||||
// SMMU command queue consumer index register. Updated by SMMU
|
||||
// when commands are consumed.
|
||||
#define SMMU_VCMDQ_CONS 0x0
|
||||
|
||||
// SMMU command queue producer index register. Updated by UVM when
|
||||
// commands are added to the queue.
|
||||
#define SMMU_VCMDQ_PROD 0x4
|
||||
|
||||
// Configuration register used to enable a VCMDQ.
|
||||
#define SMMU_VCMDQ_CONFIG 0x8
|
||||
#define SMMU_VCMDQ_CONFIG_ENABLE BIT(0)
|
||||
|
||||
// Status register used to check the VCMDQ is enabled.
|
||||
#define SMMU_VCMDQ_STATUS 0xc
|
||||
#define SMMU_VCMDQ_STATUS_ENABLED BIT(0)
|
||||
|
||||
// Base address offset for the VCMDQ registers.
|
||||
#define SMMU_VCMDQ_CMDQ_BASE 0x10000
|
||||
|
||||
// Size of the command queue. Each command is 16 bytes and we can't
|
||||
// have a command queue greater than one page in size.
|
||||
#define SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE (PAGE_SHIFT - ilog2(sizeof(struct smmu_cmd)))
|
||||
#define SMMU_VCMDQ_CMDQ_ENTRIES (1UL << SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE)
|
||||
|
||||
// We always use VINTF63 for the WAR
|
||||
#define VINTF 63
|
||||
static void smmu_vintf_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
|
||||
{
|
||||
iowrite32(val, SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
|
||||
}
|
||||
|
||||
static NvU32 smmu_vintf_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
{
|
||||
return ioread32(SMMU_VINTF_BASE_ADDR(smmu_cmdqv_base, VINTF) + reg);
|
||||
}
|
||||
|
||||
// We always use VCMDQ127 for the WAR
|
||||
#define VCMDQ 127
|
||||
void smmu_vcmdq_write32(void __iomem *smmu_cmdqv_base, int reg, NvU32 val)
|
||||
{
|
||||
iowrite32(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
||||
NvU32 smmu_vcmdq_read32(void __iomem *smmu_cmdqv_base, int reg)
|
||||
{
|
||||
return ioread32(SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
||||
static void smmu_vcmdq_write64(void __iomem *smmu_cmdqv_base, int reg, NvU64 val)
|
||||
{
|
||||
iowrite64(val, SMMU_VCMDQ_BASE_ADDR(smmu_cmdqv_base, VCMDQ) + reg);
|
||||
}
|
||||
|
||||
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
|
||||
// TLB invalidates on read-only to read-write upgrades
|
||||
static NV_STATUS uvm_ats_smmu_war_init(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_spin_loop_t spin;
|
||||
NV_STATUS status;
|
||||
unsigned long cmdqv_config;
|
||||
void __iomem *smmu_cmdqv_base;
|
||||
struct acpi_iort_node *node;
|
||||
struct acpi_iort_smmu_v3 *iort_smmu;
|
||||
|
||||
node = *(struct acpi_iort_node **) dev_get_platdata(parent_gpu->pci_dev->dev.iommu->iommu_dev->dev->parent);
|
||||
iort_smmu = (struct acpi_iort_smmu_v3 *) node->node_data;
|
||||
|
||||
smmu_cmdqv_base = ioremap(SMMU_CMDQV_BASE_ADDR(iort_smmu->base_address), SMMU_CMDQV_BASE_LEN);
|
||||
if (!smmu_cmdqv_base)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->smmu_war.smmu_cmdqv_base = smmu_cmdqv_base;
|
||||
cmdqv_config = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CONFIG);
|
||||
if (!(cmdqv_config & SMMU_CMDQV_CONFIG_CMDQV_EN)) {
|
||||
status = NV_ERR_OBJECT_NOT_FOUND;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Allocate SMMU CMDQ pages for WAR
|
||||
parent_gpu->smmu_war.smmu_cmdq = alloc_page(NV_UVM_GFP_FLAGS | __GFP_ZERO);
|
||||
if (!parent_gpu->smmu_war.smmu_cmdq) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto out;
|
||||
}
|
||||
|
||||
// Initialise VINTF for the WAR
|
||||
smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, SMMU_VINTF_CONFIG_ENABLE | SMMU_VINTF_CONFIG_HYP_OWN);
|
||||
UVM_SPIN_WHILE(!(smmu_vintf_read32(smmu_cmdqv_base, SMMU_VINTF_STATUS) & SMMU_VINTF_STATUS_ENABLED), &spin);
|
||||
|
||||
// Allocate VCMDQ to VINTF
|
||||
iowrite32((VINTF << SMMU_CMDQV_CMDQ_ALLOC_MAP_VIRT_INTF_INDX_SHIFT) | SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC,
|
||||
smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
|
||||
|
||||
smmu_vcmdq_write64(smmu_cmdqv_base, SMMU_VCMDQ_CMDQ_BASE,
|
||||
page_to_phys(parent_gpu->smmu_war.smmu_cmdq) | SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE);
|
||||
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONS, 0);
|
||||
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_PROD, 0);
|
||||
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, SMMU_VCMDQ_CONFIG_ENABLE);
|
||||
UVM_SPIN_WHILE(!(smmu_vcmdq_read32(smmu_cmdqv_base, SMMU_VCMDQ_STATUS) & SMMU_VCMDQ_STATUS_ENABLED), &spin);
|
||||
|
||||
uvm_mutex_init(&parent_gpu->smmu_war.smmu_lock, UVM_LOCK_ORDER_LEAF);
|
||||
parent_gpu->smmu_war.smmu_prod = 0;
|
||||
parent_gpu->smmu_war.smmu_cons = 0;
|
||||
|
||||
return NV_OK;
|
||||
|
||||
out:
|
||||
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
|
||||
parent_gpu->smmu_war.smmu_cmdqv_base = NULL;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void uvm_ats_smmu_war_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
void __iomem *smmu_cmdqv_base = parent_gpu->smmu_war.smmu_cmdqv_base;
|
||||
NvU32 cmdq_alloc_map;
|
||||
|
||||
if (parent_gpu->smmu_war.smmu_cmdqv_base) {
|
||||
smmu_vcmdq_write32(smmu_cmdqv_base, SMMU_VCMDQ_CONFIG, 0);
|
||||
cmdq_alloc_map = ioread32(smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
|
||||
iowrite32(cmdq_alloc_map & SMMU_CMDQV_CMDQ_ALLOC_MAP_ALLOC, smmu_cmdqv_base + SMMU_CMDQV_CMDQ_ALLOC_MAP(VCMDQ));
|
||||
smmu_vintf_write32(smmu_cmdqv_base, SMMU_VINTF_CONFIG, 0);
|
||||
}
|
||||
|
||||
if (parent_gpu->smmu_war.smmu_cmdq)
|
||||
__free_page(parent_gpu->smmu_war.smmu_cmdq);
|
||||
|
||||
if (parent_gpu->smmu_war.smmu_cmdqv_base)
|
||||
iounmap(parent_gpu->smmu_war.smmu_cmdqv_base);
|
||||
}
|
||||
|
||||
// The SMMU on ARM64 can run under different translation regimes depending on
|
||||
// what features the OS and CPU variant support. The CPU for GH180 supports
|
||||
// virtualisation extensions and starts the kernel at EL2 meaning SMMU operates
|
||||
// under the NS-EL2-E2H translation regime. Therefore we need to use the
|
||||
// TLBI_EL2_* commands which invalidate TLB entries created under this
|
||||
// translation regime.
|
||||
#define CMDQ_OP_TLBI_EL2_ASID 0x21;
|
||||
#define CMDQ_OP_TLBI_EL2_VA 0x22;
|
||||
#define CMDQ_OP_CMD_SYNC 0x46
|
||||
|
||||
// Use the same maximum as used for MAX_TLBI_OPS in the upstream
|
||||
// kernel.
|
||||
#define UVM_MAX_TLBI_OPS (1UL << (PAGE_SHIFT - 3))
|
||||
|
||||
#if UVM_ATS_SMMU_WAR_REQUIRED()
|
||||
void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
|
||||
{
|
||||
struct mm_struct *mm = gpu_va_space->va_space->va_space_mm.mm;
|
||||
uvm_parent_gpu_t *parent_gpu = gpu_va_space->gpu->parent;
|
||||
struct {
|
||||
NvU64 low;
|
||||
NvU64 high;
|
||||
} *vcmdq;
|
||||
unsigned long vcmdq_prod;
|
||||
NvU64 end;
|
||||
uvm_spin_loop_t spin;
|
||||
NvU16 asid;
|
||||
|
||||
if (!parent_gpu->smmu_war.smmu_cmdqv_base)
|
||||
return;
|
||||
|
||||
asid = arm64_mm_context_get(mm);
|
||||
vcmdq = kmap(parent_gpu->smmu_war.smmu_cmdq);
|
||||
uvm_mutex_lock(&parent_gpu->smmu_war.smmu_lock);
|
||||
vcmdq_prod = parent_gpu->smmu_war.smmu_prod;
|
||||
|
||||
// Our queue management is very simple. The mutex prevents multiple
|
||||
// producers writing to the queue and all our commands require waiting for
|
||||
// the queue to drain so we know it's empty. If we can't fit enough commands
|
||||
// in the queue we just invalidate the whole ASID.
|
||||
//
|
||||
// The command queue is a cirular buffer with the MSB representing a wrap
|
||||
// bit that must toggle on each wrap. See the SMMU architecture
|
||||
// specification for more details.
|
||||
//
|
||||
// SMMU_VCMDQ_CMDQ_ENTRIES - 1 because we need to leave space for the
|
||||
// CMD_SYNC.
|
||||
if ((size >> PAGE_SHIFT) > min(UVM_MAX_TLBI_OPS, SMMU_VCMDQ_CMDQ_ENTRIES - 1)) {
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_ASID;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0;
|
||||
vcmdq_prod++;
|
||||
}
|
||||
else {
|
||||
for (end = addr + size; addr < end; addr += PAGE_SIZE) {
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_TLBI_EL2_VA;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low |= (NvU64) asid << 48;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = addr & ~((1UL << 12) - 1);
|
||||
vcmdq_prod++;
|
||||
}
|
||||
}
|
||||
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].low = CMDQ_OP_CMD_SYNC;
|
||||
vcmdq[vcmdq_prod % SMMU_VCMDQ_CMDQ_ENTRIES].high = 0x0;
|
||||
vcmdq_prod++;
|
||||
|
||||
// MSB is the wrap bit
|
||||
vcmdq_prod &= (1UL << (SMMU_VCMDQ_CMDQ_BASE_LOG2SIZE + 1)) - 1;
|
||||
parent_gpu->smmu_war.smmu_prod = vcmdq_prod;
|
||||
smmu_vcmdq_write32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_PROD, parent_gpu->smmu_war.smmu_prod);
|
||||
|
||||
UVM_SPIN_WHILE(
|
||||
(smmu_vcmdq_read32(parent_gpu->smmu_war.smmu_cmdqv_base, SMMU_VCMDQ_CONS) & GENMASK(19, 0)) != vcmdq_prod,
|
||||
&spin);
|
||||
|
||||
uvm_mutex_unlock(&parent_gpu->smmu_war.smmu_lock);
|
||||
kunmap(parent_gpu->smmu_war.smmu_cmdq);
|
||||
arm64_mm_context_put(mm);
|
||||
}
|
||||
#endif
|
||||
|
||||
NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
int ret;
|
||||
|
||||
ret = iommu_dev_enable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
if (ret)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
return errno_to_nv_status(ret);
|
||||
if (UVM_ATS_SMMU_WAR_REQUIRED())
|
||||
return uvm_ats_smmu_war_init(parent_gpu);
|
||||
else
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_ats_sva_remove_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
if (UVM_ATS_SMMU_WAR_REQUIRED())
|
||||
uvm_ats_smmu_war_deinit(parent_gpu);
|
||||
|
||||
iommu_dev_disable_feature(&parent_gpu->pci_dev->dev, IOMMU_DEV_FEAT_SVA);
|
||||
}
|
||||
|
||||
|
@ -32,23 +32,38 @@
|
||||
// For ATS support on aarch64, arm_smmu_sva_bind() is needed for
|
||||
// iommu_sva_bind_device() calls. Unfortunately, arm_smmu_sva_bind() is not
|
||||
// conftest-able. We instead look for the presence of ioasid_get() or
|
||||
// mm_pasid_set(). ioasid_get() was added in the same patch series as
|
||||
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_set() was added in the
|
||||
// mm_pasid_drop(). ioasid_get() was added in the same patch series as
|
||||
// arm_smmu_sva_bind() and removed in v6.0. mm_pasid_drop() was added in the
|
||||
// same patch as the removal of ioasid_get(). We assume the presence of
|
||||
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_set(v5.18+) is
|
||||
// arm_smmu_sva_bind() if ioasid_get(v5.11 - v5.17) or mm_pasid_drop(v5.18+) is
|
||||
// present.
|
||||
//
|
||||
// arm_smmu_sva_bind() was added with commit
|
||||
// 32784a9562fb0518b12e9797ee2aec52214adf6f and ioasid_get() was added with
|
||||
// commit cb4789b0d19ff231ce9f73376a023341300aed96 (11/23/2020). Commit
|
||||
// 701fac40384f07197b106136012804c3cae0b3de (02/15/2022) removed ioasid_get()
|
||||
// and added mm_pasid_set().
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_SET_PRESENT))
|
||||
#define UVM_ATS_SVA_SUPPORTED() 1
|
||||
// and added mm_pasid_drop().
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS() && (defined(NV_IOASID_GET_PRESENT) || defined(NV_MM_PASID_DROP_PRESENT))
|
||||
#if defined(CONFIG_IOMMU_SVA)
|
||||
#define UVM_ATS_SVA_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_SVA_SUPPORTED() 0
|
||||
#endif
|
||||
#else
|
||||
#define UVM_ATS_SVA_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
// If NV_ARCH_INVALIDATE_SECONDARY_TLBS is defined it means the upstream fix is
|
||||
// in place so no need for the WAR from Bug 4130089: [GH180][r535] WAR for
|
||||
// kernel not issuing SMMU TLB invalidates on read-only
|
||||
#if defined(NV_ARCH_INVALIDATE_SECONDARY_TLBS)
|
||||
#define UVM_ATS_SMMU_WAR_REQUIRED() 0
|
||||
#elif NVCPU_IS_AARCH64
|
||||
#define UVM_ATS_SMMU_WAR_REQUIRED() 1
|
||||
#else
|
||||
#define UVM_ATS_SMMU_WAR_REQUIRED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
int placeholder;
|
||||
@ -77,6 +92,17 @@ typedef struct
|
||||
|
||||
// LOCKING: None
|
||||
void uvm_ats_sva_unregister_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space);
|
||||
|
||||
// Fix for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU
|
||||
// TLB invalidates on read-only to read-write upgrades
|
||||
#if UVM_ATS_SMMU_WAR_REQUIRED()
|
||||
void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size);
|
||||
#else
|
||||
static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
|
||||
{
|
||||
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
static NV_STATUS uvm_ats_sva_add_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
@ -107,6 +133,11 @@ typedef struct
|
||||
{
|
||||
|
||||
}
|
||||
|
||||
static void uvm_ats_smmu_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space, NvU64 addr, size_t size)
|
||||
{
|
||||
|
||||
}
|
||||
#endif // UVM_ATS_SVA_SUPPORTED
|
||||
|
||||
#endif // __UVM_ATS_SVA_H__
|
||||
|
@ -191,7 +191,7 @@ static NV_STATUS test_membar(uvm_gpu_t *gpu)
|
||||
|
||||
for (i = 0; i < REDUCTIONS; ++i) {
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS + 1);
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(&push, host_mem_gpu_va, REDUCTIONS);
|
||||
}
|
||||
|
||||
// Without a sys membar the channel tracking semaphore can and does complete
|
||||
@ -577,7 +577,7 @@ static NV_STATUS test_semaphore_reduction_inc(uvm_gpu_t *gpu)
|
||||
|
||||
for (i = 0; i < REDUCTIONS; i++) {
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, i+1);
|
||||
gpu->parent->ce_hal->semaphore_reduction_inc(&push, gpu_va, REDUCTIONS);
|
||||
}
|
||||
|
||||
status = uvm_push_end_and_wait(&push);
|
||||
|
@ -21,8 +21,8 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#ifndef _UVM_COMMON_H
|
||||
#define _UVM_COMMON_H
|
||||
#ifndef __UVM_COMMON_H__
|
||||
#define __UVM_COMMON_H__
|
||||
|
||||
#ifdef DEBUG
|
||||
#define UVM_IS_DEBUG() 1
|
||||
@ -413,4 +413,40 @@ static inline void uvm_touch_page(struct page *page)
|
||||
// Return true if the VMA is one used by UVM managed allocations.
|
||||
bool uvm_vma_is_managed(struct vm_area_struct *vma);
|
||||
|
||||
#endif /* _UVM_COMMON_H */
|
||||
static bool uvm_platform_uses_canonical_form_address(void)
|
||||
{
|
||||
if (NVCPU_IS_PPC64LE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Similar to the GPU MMU HAL num_va_bits(), it returns the CPU's num_va_bits().
|
||||
static NvU32 uvm_cpu_num_va_bits(void)
|
||||
{
|
||||
return fls64(TASK_SIZE - 1) + 1;
|
||||
}
|
||||
|
||||
// Return the unaddressable range in a num_va_bits-wide VA space, [first, outer)
|
||||
static void uvm_get_unaddressable_range(NvU32 num_va_bits, NvU64 *first, NvU64 *outer)
|
||||
{
|
||||
UVM_ASSERT(num_va_bits < 64);
|
||||
UVM_ASSERT(first);
|
||||
UVM_ASSERT(outer);
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
*first = 1ULL << (num_va_bits - 1);
|
||||
*outer = (NvU64)((NvS64)(1ULL << 63) >> (64 - num_va_bits));
|
||||
}
|
||||
else {
|
||||
*first = 1ULL << num_va_bits;
|
||||
*outer = ~0Ull;
|
||||
}
|
||||
}
|
||||
|
||||
static void uvm_cpu_get_unaddressable_range(NvU64 *first, NvU64 *outer)
|
||||
{
|
||||
return uvm_get_unaddressable_range(uvm_cpu_num_va_bits(), first, outer);
|
||||
}
|
||||
|
||||
#endif /* __UVM_COMMON_H__ */
|
||||
|
@ -218,19 +218,12 @@ static bool gpu_supports_uvm(uvm_parent_gpu_t *parent_gpu)
|
||||
return parent_gpu->rm_info.subdeviceCount == 1;
|
||||
}
|
||||
|
||||
static bool platform_uses_canonical_form_address(void)
|
||||
{
|
||||
if (NVCPU_IS_PPC64LE)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
{
|
||||
// Lower and upper address spaces are typically found in platforms that use
|
||||
// the canonical address form.
|
||||
NvU64 max_va_lower;
|
||||
NvU64 min_va_upper;
|
||||
NvU64 addr_end = addr + size - 1;
|
||||
NvU8 gpu_addr_shift;
|
||||
NvU8 cpu_addr_shift;
|
||||
@ -243,7 +236,7 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
UVM_ASSERT(size > 0);
|
||||
|
||||
gpu_addr_shift = gpu->address_space_tree.hal->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
cpu_addr_shift = uvm_cpu_num_va_bits();
|
||||
addr_shift = gpu_addr_shift;
|
||||
|
||||
// Pascal+ GPUs are capable of accessing kernel pointers in various modes
|
||||
@ -279,9 +272,7 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
// 0 +----------------+ 0 +----------------+
|
||||
|
||||
// On canonical form address platforms and Pascal+ GPUs.
|
||||
if (platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
|
||||
NvU64 min_va_upper;
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address() && gpu_addr_shift > 40) {
|
||||
// On x86, when cpu_addr_shift > gpu_addr_shift, it means the CPU uses
|
||||
// 5-level paging and the GPU is pre-Hopper. On Pascal-Ada GPUs (49b
|
||||
// wide VA) we set addr_shift to match a 4-level paging x86 (48b wide).
|
||||
@ -292,15 +283,11 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size)
|
||||
addr_shift = gpu_addr_shift;
|
||||
else
|
||||
addr_shift = cpu_addr_shift;
|
||||
}
|
||||
|
||||
min_va_upper = (NvU64)((NvS64)(1ULL << 63) >> (64 - addr_shift));
|
||||
max_va_lower = 1ULL << (addr_shift - 1);
|
||||
return (addr_end < max_va_lower) || (addr >= min_va_upper);
|
||||
}
|
||||
else {
|
||||
max_va_lower = 1ULL << addr_shift;
|
||||
return addr_end < max_va_lower;
|
||||
}
|
||||
uvm_get_unaddressable_range(addr_shift, &max_va_lower, &min_va_upper);
|
||||
|
||||
return (addr_end < max_va_lower) || (addr >= min_va_upper);
|
||||
}
|
||||
|
||||
// The internal UVM VAS does not use canonical form addresses.
|
||||
@ -326,14 +313,14 @@ NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr)
|
||||
NvU8 addr_shift;
|
||||
NvU64 input_addr = addr;
|
||||
|
||||
if (platform_uses_canonical_form_address()) {
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
// When the CPU VA width is larger than GPU's, it means that:
|
||||
// On ARM: the CPU is on LVA mode and the GPU is pre-Hopper.
|
||||
// On x86: the CPU uses 5-level paging and the GPU is pre-Hopper.
|
||||
// We sign-extend on the 48b on ARM and on the 47b on x86 to mirror the
|
||||
// behavior of CPUs with smaller (than GPU) VA widths.
|
||||
gpu_addr_shift = parent_gpu->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K)->num_va_bits();
|
||||
cpu_addr_shift = fls64(TASK_SIZE - 1) + 1;
|
||||
cpu_addr_shift = uvm_cpu_num_va_bits();
|
||||
|
||||
if (cpu_addr_shift > gpu_addr_shift)
|
||||
addr_shift = NVCPU_IS_X86_64 ? 48 : 49;
|
||||
|
@ -57,14 +57,16 @@
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Number of faults from this uTLB that have been fetched but have not been serviced yet
|
||||
// Number of faults from this uTLB that have been fetched but have not been
|
||||
// serviced yet.
|
||||
NvU32 num_pending_faults;
|
||||
|
||||
// Whether the uTLB contains fatal faults
|
||||
bool has_fatal_faults;
|
||||
|
||||
// We have issued a replay of type START_ACK_ALL while containing fatal faults. This puts
|
||||
// the uTLB in lockdown mode and no new translations are accepted
|
||||
// We have issued a replay of type START_ACK_ALL while containing fatal
|
||||
// faults. This puts the uTLB in lockdown mode and no new translations are
|
||||
// accepted.
|
||||
bool in_lockdown;
|
||||
|
||||
// We have issued a cancel on this uTLB
|
||||
@ -126,8 +128,8 @@ struct uvm_service_block_context_struct
|
||||
struct list_head service_context_list;
|
||||
|
||||
// A mask of GPUs that need to be checked for ECC errors before the CPU
|
||||
// fault handler returns, but after the VA space lock has been unlocked to
|
||||
// avoid the RM/UVM VA space lock deadlocks.
|
||||
// fault handler returns, but after the VA space lock has been unlocked
|
||||
// to avoid the RM/UVM VA space lock deadlocks.
|
||||
uvm_processor_mask_t gpus_to_check_for_ecc;
|
||||
|
||||
// This is set to throttle page fault thrashing.
|
||||
@ -160,9 +162,9 @@ struct uvm_service_block_context_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Per-processor mask with the pages that will be resident after servicing.
|
||||
// We need one mask per processor because we may coalesce faults that
|
||||
// trigger migrations to different processors.
|
||||
// Per-processor mask with the pages that will be resident after
|
||||
// servicing. We need one mask per processor because we may coalesce
|
||||
// faults that trigger migrations to different processors.
|
||||
uvm_page_mask_t new_residency;
|
||||
} per_processor_masks[UVM_ID_MAX_PROCESSORS];
|
||||
|
||||
@ -263,7 +265,10 @@ struct uvm_fault_service_batch_context_struct
|
||||
|
||||
NvU32 num_coalesced_faults;
|
||||
|
||||
bool has_fatal_faults;
|
||||
// One of the VA spaces in this batch which had fatal faults. If NULL, no
|
||||
// faults were fatal. More than one VA space could have fatal faults, but we
|
||||
// pick one to be the target of the cancel sequence.
|
||||
uvm_va_space_t *fatal_va_space;
|
||||
|
||||
bool has_throttled_faults;
|
||||
|
||||
@ -291,11 +296,8 @@ struct uvm_fault_service_batch_context_struct
|
||||
|
||||
struct uvm_ats_fault_invalidate_struct
|
||||
{
|
||||
// Whether the TLB batch contains any information
|
||||
bool write_faults_in_batch;
|
||||
|
||||
// Batch of TLB entries to be invalidated
|
||||
uvm_tlb_batch_t write_faults_tlb_batch;
|
||||
bool tlb_batch_pending;
|
||||
uvm_tlb_batch_t tlb_batch;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
@ -440,20 +442,9 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by instance_ptr if we
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same instance_ptr
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Scratch space, used to generate artificial physically addressed notifications.
|
||||
// Virtual address notifications are always aligned to 64k. This means up to 16
|
||||
// different physical locations could have been accessed to trigger one notification.
|
||||
// The sub-granularity mask can correspond to any of them.
|
||||
struct
|
||||
{
|
||||
uvm_processor_id_t resident_processors[16];
|
||||
uvm_gpu_phys_address_t phys_addresses[16];
|
||||
uvm_access_counter_buffer_entry_t phys_entry;
|
||||
} scratch;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
@ -464,8 +455,8 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by aperture if we
|
||||
// determine at fetch time that all the access counter notifications in the
|
||||
// batch report the same aperture
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same aperture
|
||||
bool is_single_aperture;
|
||||
} phys;
|
||||
|
||||
@ -661,8 +652,8 @@ struct uvm_gpu_struct
|
||||
struct
|
||||
{
|
||||
// Big page size used by the internal UVM VA space
|
||||
// Notably it may be different than the big page size used by a user's VA
|
||||
// space in general.
|
||||
// Notably it may be different than the big page size used by a user's
|
||||
// VA space in general.
|
||||
NvU32 internal_size;
|
||||
} big_page;
|
||||
|
||||
@ -688,8 +679,8 @@ struct uvm_gpu_struct
|
||||
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
|
||||
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
|
||||
|
||||
// Leaf spinlock used to synchronize access to the peer_gpus table so that
|
||||
// it can be safely accessed from the access counters bottom half
|
||||
// Leaf spinlock used to synchronize access to the peer_gpus table so
|
||||
// that it can be safely accessed from the access counters bottom half
|
||||
uvm_spinlock_t peer_gpus_lock;
|
||||
} peer_info;
|
||||
|
||||
@ -980,6 +971,10 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool plc_supported;
|
||||
|
||||
// If true, page_tree initialization pre-populates no_ats_ranges. It only
|
||||
// affects ATS systems.
|
||||
bool no_ats_range_required;
|
||||
|
||||
// Parameters used by the TLB batching API
|
||||
struct
|
||||
{
|
||||
@ -1051,14 +1046,16 @@ struct uvm_parent_gpu_struct
|
||||
// Interrupt handling state and locks
|
||||
uvm_isr_info_t isr;
|
||||
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is set to true
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is
|
||||
// set to true.
|
||||
uvm_fault_buffer_info_t fault_buffer_info;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// Access counter buffer info. This is only valid if supports_access_counters is set to true
|
||||
// Access counter buffer info. This is only valid if
|
||||
// supports_access_counters is set to true.
|
||||
uvm_access_counter_buffer_info_t access_counter_buffer_info;
|
||||
|
||||
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
|
||||
@ -1108,7 +1105,7 @@ struct uvm_parent_gpu_struct
|
||||
uvm_rb_tree_t instance_ptr_table;
|
||||
uvm_spinlock_t instance_ptr_table_lock;
|
||||
|
||||
// This is set to true if the GPU belongs to an SLI group. Else, set to false.
|
||||
// This is set to true if the GPU belongs to an SLI group.
|
||||
bool sli_enabled;
|
||||
|
||||
struct
|
||||
@ -1135,8 +1132,8 @@ struct uvm_parent_gpu_struct
|
||||
// environment, rather than using the peer-id field of the PTE (which can
|
||||
// only address 8 gpus), all gpus are assigned a 47-bit physical address
|
||||
// space by the fabric manager. Any physical address access to these
|
||||
// physical address spaces are routed through the switch to the corresponding
|
||||
// peer.
|
||||
// physical address spaces are routed through the switch to the
|
||||
// corresponding peer.
|
||||
struct
|
||||
{
|
||||
bool is_nvswitch_connected;
|
||||
@ -1162,6 +1159,16 @@ struct uvm_parent_gpu_struct
|
||||
NvU64 memory_window_start;
|
||||
NvU64 memory_window_end;
|
||||
} system_bus;
|
||||
|
||||
// WAR to issue ATS TLB invalidation commands ourselves.
|
||||
struct
|
||||
{
|
||||
uvm_mutex_t smmu_lock;
|
||||
struct page *smmu_cmdq;
|
||||
void __iomem *smmu_cmdqv_base;
|
||||
unsigned long smmu_prod;
|
||||
unsigned long smmu_cons;
|
||||
} smmu_war;
|
||||
};
|
||||
|
||||
static const char *uvm_gpu_name(uvm_gpu_t *gpu)
|
||||
@ -1351,7 +1358,8 @@ void uvm_gpu_release_pcie_peer_access(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1);
|
||||
// They must not be the same gpu.
|
||||
uvm_aperture_t uvm_gpu_peer_aperture(uvm_gpu_t *local_gpu, uvm_gpu_t *remote_gpu);
|
||||
|
||||
// Get the processor id accessible by the given GPU for the given physical address
|
||||
// Get the processor id accessible by the given GPU for the given physical
|
||||
// address.
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
|
||||
|
||||
// Get the P2P capabilities between the gpus with the given indexes
|
||||
@ -1448,9 +1456,9 @@ NV_STATUS uvm_gpu_check_ecc_error(uvm_gpu_t *gpu);
|
||||
|
||||
// Check for ECC errors without calling into RM
|
||||
//
|
||||
// Calling into RM is problematic in many places, this check is always safe to do.
|
||||
// Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error and
|
||||
// it's required to call uvm_gpu_check_ecc_error() to be sure.
|
||||
// Calling into RM is problematic in many places, this check is always safe to
|
||||
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an ECC error
|
||||
// and it's required to call uvm_gpu_check_ecc_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_ecc_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access
|
||||
@ -1507,6 +1515,8 @@ bool uvm_gpu_can_address(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
// The GPU must be initialized before calling this function.
|
||||
bool uvm_gpu_can_address_kernel(uvm_gpu_t *gpu, NvU64 addr, NvU64 size);
|
||||
|
||||
bool uvm_platform_uses_canonical_form_address(void);
|
||||
|
||||
// Returns addr's canonical form for host systems that use canonical form
|
||||
// addresses.
|
||||
NvU64 uvm_parent_gpu_canonical_address(uvm_parent_gpu_t *parent_gpu, NvU64 addr);
|
||||
@ -1553,8 +1563,9 @@ uvm_aperture_t uvm_gpu_page_tree_init_location(const uvm_gpu_t *gpu);
|
||||
// Debug print of GPU properties
|
||||
void uvm_gpu_print(uvm_gpu_t *gpu);
|
||||
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The bottom
|
||||
// half GPU page fault handler uses this to look up the VA space for GPU faults.
|
||||
// Add the given instance pointer -> user_channel mapping to this GPU. The
|
||||
// bottom half GPU page fault handler uses this to look up the VA space for GPU
|
||||
// faults.
|
||||
NV_STATUS uvm_gpu_add_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
|
||||
void uvm_gpu_remove_user_channel(uvm_gpu_t *gpu, uvm_user_channel_t *user_channel);
|
||||
|
||||
|
@ -33,17 +33,17 @@
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_pmm_sysmem.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
|
||||
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
|
||||
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
|
||||
#define UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT "2m"
|
||||
#define UVM_PERF_ACCESS_COUNTER_GRANULARITY UVM_ACCESS_COUNTER_GRANULARITY_2M
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN 1
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX ((1 << 16) - 1)
|
||||
#define UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT 256
|
||||
|
||||
#define UVM_ACCESS_COUNTER_ACTION_NOTIFY 0x1
|
||||
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x2
|
||||
#define UVM_ACCESS_COUNTER_ON_MANAGED 0x4
|
||||
#define UVM_ACCESS_COUNTER_ACTION_CLEAR 0x1
|
||||
#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x2
|
||||
|
||||
// Each page in a tracked physical range may belong to a different VA Block. We
|
||||
// preallocate an array of reverse map translations. However, access counter
|
||||
@ -54,12 +54,6 @@
|
||||
#define UVM_MAX_TRANSLATION_SIZE (2 * 1024 * 1024ULL)
|
||||
#define UVM_SUB_GRANULARITY_REGIONS 32
|
||||
|
||||
// The GPU offers the following tracking granularities: 64K, 2M, 16M, 16G
|
||||
//
|
||||
// Use the largest granularity to minimize the number of access counter
|
||||
// notifications. This is fine because we simply drop the notifications during
|
||||
// normal operation, and tests override these values.
|
||||
static UVM_ACCESS_COUNTER_GRANULARITY g_uvm_access_counter_granularity;
|
||||
static unsigned g_uvm_access_counter_threshold;
|
||||
|
||||
// Per-VA space access counters information
|
||||
@ -87,7 +81,6 @@ static int uvm_perf_access_counter_momc_migration_enable = -1;
|
||||
static unsigned uvm_perf_access_counter_batch_count = UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT;
|
||||
|
||||
// See module param documentation below
|
||||
static char *uvm_perf_access_counter_granularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT;
|
||||
static unsigned uvm_perf_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT;
|
||||
|
||||
// Module parameters for the tunables
|
||||
@ -100,10 +93,6 @@ MODULE_PARM_DESC(uvm_perf_access_counter_momc_migration_enable,
|
||||
"Whether MOMC access counters will trigger migrations."
|
||||
"Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
|
||||
module_param(uvm_perf_access_counter_batch_count, uint, S_IRUGO);
|
||||
module_param(uvm_perf_access_counter_granularity, charp, S_IRUGO);
|
||||
MODULE_PARM_DESC(uvm_perf_access_counter_granularity,
|
||||
"Size of the physical memory region tracked by each counter. Valid values as"
|
||||
"of Volta: 64k, 2m, 16m, 16g");
|
||||
module_param(uvm_perf_access_counter_threshold, uint, S_IRUGO);
|
||||
MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
|
||||
"Number of remote accesses on a region required to trigger a notification."
|
||||
@ -136,7 +125,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
|
||||
|
||||
// Whether access counter migrations are enabled or not. The policy is as
|
||||
// follows:
|
||||
// - MIMC migrations are enabled by default on P9 systems with ATS support
|
||||
// - MIMC migrations are disabled by default on all systems except P9.
|
||||
// - MOMC migrations are disabled by default on all systems
|
||||
// - Users can override this policy by specifying on/off
|
||||
static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
@ -159,7 +148,10 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
return false;
|
||||
|
||||
return g_uvm_global.ats.supported;
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
return g_uvm_global.ats.supported;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// Create the access counters tracking struct for the given VA space
|
||||
@ -225,30 +217,18 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Clear the given access counter and add it to the per-GPU clear tracker
|
||||
static NV_STATUS access_counter_clear_targeted(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *entry)
|
||||
// Clear the access counter notifications and add it to the per-GPU clear
|
||||
// tracker.
|
||||
static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_notifications)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
|
||||
if (entry->address.is_virtual) {
|
||||
status = uvm_push_begin(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&push,
|
||||
"Clear access counter with virtual address: 0x%llx",
|
||||
entry->address.address);
|
||||
}
|
||||
else {
|
||||
status = uvm_push_begin(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&push,
|
||||
"Clear access counter with physical address: 0x%llx:%s",
|
||||
entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
}
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@ -256,7 +236,8 @@ static NV_STATUS access_counter_clear_targeted(uvm_gpu_t *gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
gpu->parent->host_hal->access_counter_clear_targeted(&push, entry);
|
||||
for (i = 0; i < num_notifications; i++)
|
||||
gpu->parent->host_hal->access_counter_clear_targeted(&push, notification_start[i]);
|
||||
|
||||
uvm_push_end(&push);
|
||||
|
||||
@ -381,25 +362,6 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
g_uvm_access_counter_threshold = uvm_perf_access_counter_threshold;
|
||||
}
|
||||
|
||||
if (strcmp(uvm_perf_access_counter_granularity, "64k") == 0) {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_64K;
|
||||
}
|
||||
else if (strcmp(uvm_perf_access_counter_granularity, "2m") == 0) {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_2M;
|
||||
}
|
||||
else if (strcmp(uvm_perf_access_counter_granularity, "16m") == 0) {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_16M;
|
||||
}
|
||||
else if (strcmp(uvm_perf_access_counter_granularity, "16g") == 0) {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_16G;
|
||||
}
|
||||
else {
|
||||
g_uvm_access_counter_granularity = UVM_ACCESS_COUNTER_GRANULARITY_2M;
|
||||
pr_info("Invalid value '%s' for uvm_perf_access_counter_granularity, using '%s' instead",
|
||||
uvm_perf_access_counter_granularity,
|
||||
UVM_PERF_ACCESS_COUNTER_GRANULARITY_DEFAULT);
|
||||
}
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);
|
||||
|
||||
@ -422,7 +384,7 @@ NV_STATUS uvm_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(access_counters->rm_info.bufferSize %
|
||||
parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu) == 0);
|
||||
|
||||
status = config_granularity_to_bytes(g_uvm_access_counter_granularity, &granularity_bytes);
|
||||
status = config_granularity_to_bytes(UVM_PERF_ACCESS_COUNTER_GRANULARITY, &granularity_bytes);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
|
||||
UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
|
||||
@ -641,8 +603,8 @@ NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_spac
|
||||
else {
|
||||
UvmGpuAccessCntrConfig default_config =
|
||||
{
|
||||
.mimcGranularity = g_uvm_access_counter_granularity,
|
||||
.momcGranularity = g_uvm_access_counter_granularity,
|
||||
.mimcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
|
||||
.momcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
|
||||
.mimcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
|
||||
.momcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
|
||||
.threshold = g_uvm_access_counter_threshold,
|
||||
@ -767,6 +729,22 @@ static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const voi
|
||||
return cmp_access_counter_instance_ptr(a, b);
|
||||
}
|
||||
|
||||
// Sort comparator for pointers to GVA access counter notification buffer
|
||||
// entries that sorts by va_space, and fault address.
|
||||
static int cmp_sort_virt_notifications_by_va_space_address(const void *_a, const void *_b)
|
||||
{
|
||||
const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
|
||||
const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
|
||||
|
||||
int result;
|
||||
|
||||
result = UVM_CMP_DEFAULT((*a)->virtual_info.va_space, (*b)->virtual_info.va_space);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
|
||||
}
|
||||
|
||||
// Sort comparator for pointers to GPA access counter notification buffer
|
||||
// entries that sorts by physical address' aperture
|
||||
static int cmp_sort_phys_notifications_by_processor_id(const void *_a, const void *_b)
|
||||
@ -924,12 +902,11 @@ static void translate_virt_notifications_instance_ptrs(uvm_gpu_t *gpu,
|
||||
|
||||
// GVA notifications provide an instance_ptr and ve_id that can be directly
|
||||
// translated to a VA space. In order to minimize translations, we sort the
|
||||
// entries by instance_ptr.
|
||||
// entries by instance_ptr, va_space and notification address in that order.
|
||||
static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
if (!batch_context->virt.is_single_instance_ptr) {
|
||||
// Sort by instance_ptr
|
||||
sort(batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
sizeof(*batch_context->virt.notifications),
|
||||
@ -938,6 +915,12 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
translate_virt_notifications_instance_ptrs(gpu, batch_context);
|
||||
|
||||
sort(batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
sizeof(*batch_context->virt.notifications),
|
||||
cmp_sort_virt_notifications_by_va_space_address,
|
||||
NULL);
|
||||
}
|
||||
|
||||
// GPA notifications provide a physical address and an aperture. Sort
|
||||
@ -946,7 +929,6 @@ static void preprocess_virt_notifications(uvm_gpu_t *gpu,
|
||||
static void preprocess_phys_notifications(uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
if (!batch_context->phys.is_single_aperture) {
|
||||
// Sort by instance_ptr
|
||||
sort(batch_context->phys.notifications,
|
||||
batch_context->phys.num_notifications,
|
||||
sizeof(*batch_context->phys.notifications),
|
||||
@ -955,6 +937,28 @@ static void preprocess_phys_notifications(uvm_access_counter_service_batch_conte
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS notify_tools_and_process_flags(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_entries,
|
||||
NvU32 flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
if (uvm_enable_builtin_tests) {
|
||||
// TODO: Bug 4310744: [UVM][TOOLS] Attribute access counter tools events
|
||||
// to va_space instead of broadcasting.
|
||||
NvU32 i;
|
||||
|
||||
for (i = 0; i < num_entries; i++)
|
||||
uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
|
||||
}
|
||||
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR)
|
||||
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_va_block_retry_t *va_block_retry,
|
||||
@ -1163,7 +1167,7 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
unsigned *out_flags)
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
size_t index;
|
||||
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
|
||||
@ -1190,7 +1194,6 @@ static NV_STATUS service_phys_single_va_block(uvm_gpu_t *gpu,
|
||||
// If an mm is registered with the VA space, we have to retain it
|
||||
// in order to lock it before locking the VA space.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Re-check that the VA block is valid after taking the VA block lock.
|
||||
@ -1251,7 +1254,7 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
unsigned *out_flags)
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t index;
|
||||
@ -1259,7 +1262,7 @@ static NV_STATUS service_phys_va_blocks(uvm_gpu_t *gpu,
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
unsigned out_flags_local = 0;
|
||||
NvU32 out_flags_local = 0;
|
||||
status = service_phys_single_va_block(gpu,
|
||||
batch_context,
|
||||
current_entry,
|
||||
@ -1318,7 +1321,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
NvU64 address,
|
||||
unsigned long sub_granularity,
|
||||
size_t *num_reverse_mappings,
|
||||
unsigned *out_flags)
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 region_start, region_end;
|
||||
@ -1327,7 +1330,10 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
|
||||
// Get the reverse_map translations for all the regions set in the
|
||||
// sub_granularity field of the counter.
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, config->sub_granularity_regions_per_translation) {
|
||||
for_each_sub_granularity_region(region_start,
|
||||
region_end,
|
||||
sub_granularity,
|
||||
config->sub_granularity_regions_per_translation) {
|
||||
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
|
||||
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
|
||||
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
|
||||
@ -1376,7 +1382,7 @@ static NV_STATUS service_phys_notification_translation(uvm_gpu_t *gpu,
|
||||
static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
NvU64 address;
|
||||
NvU64 translation_index;
|
||||
@ -1387,7 +1393,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
size_t total_reverse_mappings = 0;
|
||||
uvm_gpu_t *resident_gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
unsigned flags = 0;
|
||||
NvU32 flags = 0;
|
||||
|
||||
address = current_entry->address.address;
|
||||
UVM_ASSERT(address % config->translation_size == 0);
|
||||
@ -1415,7 +1421,7 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
|
||||
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
|
||||
size_t num_reverse_mappings;
|
||||
unsigned out_flags_local = 0;
|
||||
NvU32 out_flags_local = 0;
|
||||
status = service_phys_notification_translation(gpu,
|
||||
resident_gpu,
|
||||
batch_context,
|
||||
@ -1437,11 +1443,8 @@ static NV_STATUS service_phys_notification(uvm_gpu_t *gpu,
|
||||
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
|
||||
}
|
||||
|
||||
// Currently we only report events for our tests, not for tools
|
||||
if (uvm_enable_builtin_tests) {
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_NOTIFY;
|
||||
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_ON_MANAGED : 0);
|
||||
}
|
||||
if (uvm_enable_builtin_tests)
|
||||
*out_flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
@ -1454,22 +1457,21 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;
|
||||
|
||||
preprocess_phys_notifications(batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->phys.notifications[i];
|
||||
unsigned flags = 0;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU32 flags = 0;
|
||||
|
||||
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
|
||||
continue;
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, current_entry, &flags);
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_NOTIFY)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
notify_tools_and_process_flags(gpu, ¬ifications[i], 1, flags);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -1478,152 +1480,218 @@ static NV_STATUS service_phys_notifications(uvm_gpu_t *gpu,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static int cmp_sort_gpu_phys_addr(const void *_a, const void *_b)
|
||||
static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
return uvm_gpu_phys_addr_cmp(*(uvm_gpu_phys_address_t*)_a,
|
||||
*(uvm_gpu_phys_address_t*)_b);
|
||||
}
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
|
||||
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
|
||||
|
||||
static bool gpu_phys_same_region(uvm_gpu_phys_address_t a, uvm_gpu_phys_address_t b, NvU64 granularity)
|
||||
{
|
||||
if (a.aperture != b.aperture)
|
||||
return false;
|
||||
|
||||
UVM_ASSERT(is_power_of_2(granularity));
|
||||
|
||||
return UVM_ALIGN_DOWN(a.address, granularity) == UVM_ALIGN_DOWN(b.address, granularity);
|
||||
}
|
||||
|
||||
static bool phys_address_in_accessed_sub_region(uvm_gpu_phys_address_t address,
|
||||
NvU64 region_size,
|
||||
NvU64 sub_region_size,
|
||||
NvU32 accessed_mask)
|
||||
{
|
||||
const unsigned accessed_index = (address.address % region_size) / sub_region_size;
|
||||
|
||||
// accessed_mask is only filled for tracking granularities larger than 64K
|
||||
if (region_size == UVM_PAGE_SIZE_64K)
|
||||
return true;
|
||||
|
||||
UVM_ASSERT(accessed_index < 32);
|
||||
return ((1 << accessed_index) & accessed_mask) != 0;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
unsigned *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 notification_size;
|
||||
NvU64 address;
|
||||
uvm_processor_id_t *resident_processors = batch_context->virt.scratch.resident_processors;
|
||||
uvm_gpu_phys_address_t *phys_addresses = batch_context->virt.scratch.phys_addresses;
|
||||
int num_addresses = 0;
|
||||
int i;
|
||||
|
||||
// Virtual address notifications are always 64K aligned
|
||||
NvU64 region_start = current_entry->address.address;
|
||||
NvU64 region_end = current_entry->address.address + UVM_PAGE_SIZE_64K;
|
||||
|
||||
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_type_t counter_type = current_entry->counter_type;
|
||||
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
|
||||
|
||||
uvm_va_space_t *va_space = current_entry->virtual_info.va_space;
|
||||
|
||||
UVM_ASSERT(counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
|
||||
// Entries with NULL va_space are simply dropped.
|
||||
if (!va_space)
|
||||
if (uvm_page_mask_empty(accessed_pages))
|
||||
return NV_OK;
|
||||
|
||||
status = config_granularity_to_bytes(config->rm.granularity, ¬ification_size);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
// Collect physical locations that could have been touched
|
||||
// in the reported 64K VA region. The notification mask can
|
||||
// correspond to any of them.
|
||||
uvm_va_space_down_read(va_space);
|
||||
for (address = region_start; address < region_end;) {
|
||||
uvm_va_block_t *va_block;
|
||||
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
|
||||
service_context->num_retries = 0;
|
||||
service_context->block_context.mm = mm;
|
||||
|
||||
NV_STATUS local_status = uvm_va_block_find(va_space, address, &va_block);
|
||||
if (local_status == NV_ERR_INVALID_ADDRESS || local_status == NV_ERR_OBJECT_NOT_FOUND) {
|
||||
address += PAGE_SIZE;
|
||||
continue;
|
||||
}
|
||||
return UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
&va_block_retry,
|
||||
service_va_block_locked(processor,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
service_context,
|
||||
accessed_pages));
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
while (address < va_block->end && address < region_end) {
|
||||
const unsigned page_index = uvm_va_block_cpu_page_index(va_block, address);
|
||||
static void expand_notification_block(struct mm_struct *mm,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *accessed_pages,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
{
|
||||
NvU64 addr;
|
||||
NvU64 granularity = 0;
|
||||
uvm_gpu_t *resident_gpu = NULL;
|
||||
uvm_processor_id_t resident_id;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
const uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters,
|
||||
UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
|
||||
// UVM va_block always maps the closest resident location to processor
|
||||
const uvm_processor_id_t res_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
config_granularity_to_bytes(config->rm.granularity, &granularity);
|
||||
|
||||
// Add physical location if it's valid and not local vidmem
|
||||
if (UVM_ID_IS_VALID(res_id) && !uvm_id_equal(res_id, gpu->id)) {
|
||||
uvm_gpu_phys_address_t phys_address = uvm_va_block_res_phys_page_address(va_block, page_index, res_id, gpu);
|
||||
if (phys_address_in_accessed_sub_region(phys_address,
|
||||
notification_size,
|
||||
config->sub_granularity_region_size,
|
||||
current_entry->sub_granularity)) {
|
||||
resident_processors[num_addresses] = res_id;
|
||||
phys_addresses[num_addresses] = phys_address;
|
||||
++num_addresses;
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT_RL("Skipping phys address %llx:%s, because it couldn't have been accessed in mask %x",
|
||||
phys_address.address,
|
||||
uvm_aperture_string(phys_address.aperture),
|
||||
current_entry->sub_granularity);
|
||||
}
|
||||
}
|
||||
// Granularities other than 2MB can only be enabled by UVM tests. Do nothing
|
||||
// in that case.
|
||||
if (granularity != UVM_PAGE_SIZE_2M)
|
||||
return;
|
||||
|
||||
address += PAGE_SIZE;
|
||||
}
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
addr = current_entry->address.address;
|
||||
|
||||
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
page_index = uvm_va_block_cpu_page_index(va_block, addr);
|
||||
|
||||
resident_id = uvm_va_block_page_get_closest_resident(va_block, page_index, gpu->id);
|
||||
|
||||
// resident_id might be invalid or might already be the same as the GPU
|
||||
// which received the notification if the memory was already migrated before
|
||||
// acquiring the locks either during the servicing of previous notifications
|
||||
// or during faults or because of explicit migrations or if the VA range was
|
||||
// freed after receving the notification. Return NV_OK in such cases.
|
||||
if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
|
||||
return;
|
||||
|
||||
if (UVM_ID_IS_GPU(resident_id))
|
||||
resident_gpu = uvm_va_space_get_gpu(gpu_va_space->va_space, resident_id);
|
||||
|
||||
if (uvm_va_block_get_physical_size(va_block, resident_id, page_index) != granularity) {
|
||||
uvm_page_mask_set(accessed_pages, page_index);
|
||||
}
|
||||
uvm_va_space_up_read(va_space);
|
||||
else {
|
||||
NvU32 region_start;
|
||||
NvU32 region_end;
|
||||
unsigned long sub_granularity = current_entry->sub_granularity;
|
||||
NvU32 num_regions = config->sub_granularity_regions_per_translation;
|
||||
NvU32 num_sub_pages = config->sub_granularity_region_size / PAGE_SIZE;
|
||||
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id);
|
||||
|
||||
// The addresses need to be sorted to aid coalescing.
|
||||
sort(phys_addresses,
|
||||
num_addresses,
|
||||
sizeof(*phys_addresses),
|
||||
cmp_sort_gpu_phys_addr,
|
||||
NULL);
|
||||
UVM_ASSERT(num_sub_pages >= 1);
|
||||
|
||||
for (i = 0; i < num_addresses; ++i) {
|
||||
uvm_access_counter_buffer_entry_t *fake_entry = &batch_context->virt.scratch.phys_entry;
|
||||
|
||||
// Skip the current pointer if the physical region was already handled
|
||||
if (i > 0 && gpu_phys_same_region(phys_addresses[i - 1], phys_addresses[i], notification_size)) {
|
||||
UVM_ASSERT(uvm_id_equal(resident_processors[i - 1], resident_processors[i]));
|
||||
continue;
|
||||
// region_start and region_end refer to sub_granularity indices, not
|
||||
// page_indices.
|
||||
for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) {
|
||||
uvm_page_mask_region_fill(accessed_pages,
|
||||
uvm_va_block_region(region_start * num_sub_pages,
|
||||
region_end * num_sub_pages));
|
||||
}
|
||||
UVM_DBG_PRINT_RL("Faking MIMC address[%i/%i]: %llx (granularity mask: %llx) in aperture %s on device %s\n",
|
||||
i,
|
||||
num_addresses,
|
||||
phys_addresses[i].address,
|
||||
notification_size - 1,
|
||||
uvm_aperture_string(phys_addresses[i].aperture),
|
||||
uvm_gpu_name(gpu));
|
||||
|
||||
// Construct a fake phys addr AC entry
|
||||
fake_entry->counter_type = current_entry->counter_type;
|
||||
fake_entry->address.address = UVM_ALIGN_DOWN(phys_addresses[i].address, notification_size);
|
||||
fake_entry->address.aperture = phys_addresses[i].aperture;
|
||||
fake_entry->address.is_virtual = false;
|
||||
fake_entry->physical_info.resident_id = resident_processors[i];
|
||||
fake_entry->counter_value = current_entry->counter_value;
|
||||
fake_entry->sub_granularity = current_entry->sub_granularity;
|
||||
// Remove pages in the va_block which are not resident on resident_id.
|
||||
// If the GPU is heavily accessing those pages, future access counter
|
||||
// migrations will migrate them to the GPU.
|
||||
uvm_page_mask_and(accessed_pages, accessed_pages, resident_mask);
|
||||
}
|
||||
}
|
||||
|
||||
status = service_phys_notification(gpu, batch_context, fake_entry, out_flags);
|
||||
if (status != NV_OK)
|
||||
static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
NvU32 i = index;
|
||||
NvU32 flags = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS flags_status;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(i < batch_context->virt.num_notifications);
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
uvm_page_mask_zero(accessed_pages);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
while (i < batch_context->virt.num_notifications) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU64 address = current_entry->address.address;
|
||||
|
||||
if ((current_entry->virtual_info.va_space != va_space) || (address > va_block->end)) {
|
||||
*out_index = i;
|
||||
break;
|
||||
}
|
||||
|
||||
expand_notification_block(mm, gpu_va_space, va_block, accessed_pages, current_entry);
|
||||
|
||||
i++;
|
||||
*out_index = i;
|
||||
}
|
||||
|
||||
status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
// Atleast one notification should have been processed.
|
||||
UVM_ASSERT(index < *out_index);
|
||||
|
||||
if (status == NV_OK)
|
||||
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
flags_status = notify_tools_and_process_flags(gpu, ¬ifications[index], *out_index - index, flags);
|
||||
|
||||
if ((status == NV_OK) && (flags_status != NV_OK))
|
||||
status = flags_status;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications_batch(struct mm_struct *mm,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
|
||||
NvU64 address = current_entry->address.address;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
// Virtual address notifications are always 64K aligned
|
||||
UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));
|
||||
|
||||
// TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
|
||||
// migrations for virtual notifications on configs with
|
||||
// 4KB page size
|
||||
status = uvm_va_block_find(va_space, address, &va_block);
|
||||
if ((status == NV_OK) && !uvm_va_block_is_hmm(va_block)) {
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
|
||||
status = service_virt_notifications_in_block(mm, gpu_va_space, va_block, batch_context, index, out_index);
|
||||
}
|
||||
else {
|
||||
NvU32 flags = 0;
|
||||
|
||||
UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
|
||||
(status == NV_ERR_INVALID_ADDRESS) ||
|
||||
uvm_va_block_is_hmm(va_block));
|
||||
|
||||
// NV_ERR_OBJECT_NOT_FOUND is returned if the VA range is valid but no
|
||||
// VA block has been allocated yet. This can happen if there are stale
|
||||
// notifications in the batch. A new VA range may have been allocated in
|
||||
// that range. So, clear the notification entry to continue getting
|
||||
// notifications for the new VA range.
|
||||
if (status == NV_ERR_OBJECT_NOT_FOUND)
|
||||
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
// NV_ERR_INVALID_ADDRESS is returned if the corresponding VA range
|
||||
// doesn't exist or it's not a managed range. Access counter migrations
|
||||
// are not currently supported on such ranges.
|
||||
//
|
||||
// TODO: Bug 1990466: [uvm] Use access counters to trigger migrations
|
||||
// When support for SAM migrations is addded, clear the notification
|
||||
// entry if the VA range doesn't exist in order to receive notifications
|
||||
// when a new VA range is allocated in that region.
|
||||
status = notify_tools_and_process_flags(gpu_va_space->gpu, &batch_context->virt.notifications[index], 1, flags);
|
||||
*out_index = index + 1;
|
||||
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
return status;
|
||||
@ -1632,33 +1700,67 @@ static NV_STATUS service_virt_notification(uvm_gpu_t *gpu,
|
||||
static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
NvU32 i = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
struct mm_struct *mm = NULL;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_va_space_t *prev_va_space = NULL;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
|
||||
// TODO: Bug 4299018 : Add support for virtual access counter migrations on
|
||||
// 4K page sizes.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
|
||||
return notify_tools_and_process_flags(gpu,
|
||||
batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
0);
|
||||
}
|
||||
|
||||
preprocess_virt_notifications(gpu, batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
unsigned flags = 0;
|
||||
while (i < batch_context->virt.num_notifications) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
|
||||
va_space = current_entry->virtual_info.va_space;
|
||||
|
||||
status = service_virt_notification(gpu, batch_context, current_entry, &flags);
|
||||
if (va_space != prev_va_space) {
|
||||
|
||||
UVM_DBG_PRINT_RL("Processed virt access counter (%d/%d): %sMANAGED (status: %d) clear: %s\n",
|
||||
i + 1,
|
||||
batch_context->virt.num_notifications,
|
||||
(flags & UVM_ACCESS_COUNTER_ON_MANAGED) ? "" : "NOT ",
|
||||
status,
|
||||
(flags & UVM_ACCESS_COUNTER_ACTION_CLEAR) ? "YES" : "NO");
|
||||
// New va_space detected, drop locks of the old va_space.
|
||||
if (prev_va_space) {
|
||||
uvm_va_space_up_read(prev_va_space);
|
||||
uvm_va_space_mm_release_unlock(prev_va_space, mm);
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
uvm_tools_broadcast_access_counter(gpu, current_entry, flags & UVM_ACCESS_COUNTER_ON_MANAGED);
|
||||
mm = NULL;
|
||||
gpu_va_space = NULL;
|
||||
}
|
||||
|
||||
if (status == NV_OK && (flags & UVM_ACCESS_COUNTER_ACTION_CLEAR))
|
||||
status = access_counter_clear_targeted(gpu, current_entry);
|
||||
// Acquire locks for the new va_space.
|
||||
if (va_space) {
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
}
|
||||
|
||||
prev_va_space = va_space;
|
||||
}
|
||||
|
||||
if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
|
||||
status = service_virt_notifications_batch(mm, gpu_va_space, batch_context, i, &i);
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
|
||||
i++;
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
if (va_space) {
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -1941,6 +2043,7 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
}
|
||||
else {
|
||||
uvm_access_counter_buffer_entry_t entry = { 0 };
|
||||
uvm_access_counter_buffer_entry_t *notification = &entry;
|
||||
|
||||
if (params->counter_type == UVM_TEST_ACCESS_COUNTER_TYPE_MIMC)
|
||||
entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MIMC;
|
||||
@ -1950,7 +2053,7 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
entry.bank = params->bank;
|
||||
entry.tag = params->tag;
|
||||
|
||||
status = access_counter_clear_targeted(gpu, &entry);
|
||||
status = access_counter_clear_notifications(gpu, ¬ification, 1);
|
||||
}
|
||||
|
||||
if (status == NV_OK)
|
||||
|
@ -235,17 +235,27 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// In SRIOV, the UVM (guest) driver does not have access to the privileged
|
||||
// registers used to clear the faulted bit. Instead, UVM requests host RM to do
|
||||
// the clearing on its behalf, using a SW method.
|
||||
static bool use_clear_faulted_channel_sw_method(uvm_gpu_t *gpu)
|
||||
{
|
||||
if (uvm_gpu_is_virt_mode_sriov(gpu)) {
|
||||
UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
|
||||
return true;
|
||||
}
|
||||
// If true, UVM uses a SW method to request RM to do the clearing on its
|
||||
// behalf.
|
||||
bool use_sw_method = false;
|
||||
|
||||
return false;
|
||||
// In SRIOV, the UVM (guest) driver does not have access to the privileged
|
||||
// registers used to clear the faulted bit.
|
||||
if (uvm_gpu_is_virt_mode_sriov(gpu))
|
||||
use_sw_method = true;
|
||||
|
||||
// In Confidential Computing access to the privileged registers is blocked,
|
||||
// in order to prevent interference between guests, or between the
|
||||
// (untrusted) host and the guests.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
use_sw_method = true;
|
||||
|
||||
if (use_sw_method)
|
||||
UVM_ASSERT(gpu->parent->has_clear_faulted_channel_sw_method);
|
||||
|
||||
return use_sw_method;
|
||||
}
|
||||
|
||||
static NV_STATUS clear_faulted_method_on_gpu(uvm_gpu_t *gpu,
|
||||
@ -570,7 +580,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_HUB;
|
||||
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
|
||||
va_range_next = uvm_va_space_iter_first(gpu_va_space->va_space, fault_entry->fault_address, ~0ULL);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -362,7 +362,8 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
|
||||
"Cancel targeting instance_ptr {0x%llx:%s}\n",
|
||||
instance_ptr.address,
|
||||
uvm_aperture_string(instance_ptr.aperture));
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&replayable_faults->replay_tracker,
|
||||
@ -697,9 +698,6 @@ static inline int cmp_access_type(uvm_fault_access_type_t a, uvm_fault_access_ty
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Fetch a batch of faults from the buffer.
|
||||
FAULT_FETCH_MODE_BATCH_ALL,
|
||||
|
||||
// Fetch a batch of faults from the buffer. Stop at the first entry that is
|
||||
// not ready yet
|
||||
FAULT_FETCH_MODE_BATCH_READY,
|
||||
@ -857,9 +855,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
|
||||
// written out of order
|
||||
UVM_SPIN_WHILE(!gpu->parent->fault_buffer_hal->entry_is_valid(gpu->parent, get), &spin) {
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode != FAULT_FETCH_MODE_ALL &&
|
||||
fetch_mode != FAULT_FETCH_MODE_BATCH_ALL &&
|
||||
fault_index > 0)
|
||||
if (fetch_mode == FAULT_FETCH_MODE_BATCH_READY && fault_index > 0)
|
||||
goto done;
|
||||
}
|
||||
|
||||
@ -888,6 +884,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_gpu_t *gpu,
|
||||
|
||||
current_entry->va_space = NULL;
|
||||
current_entry->filtered = false;
|
||||
current_entry->replayable.cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
|
||||
if (current_entry->fault_source.utlb_id > batch_context->max_utlb_id) {
|
||||
UVM_ASSERT(current_entry->fault_source.utlb_id < replayable_faults->utlb_count);
|
||||
@ -1184,7 +1181,11 @@ static void mark_fault_fatal(uvm_fault_service_batch_context_t *batch_context,
|
||||
fault_entry->replayable.cancel_va_mode = cancel_va_mode;
|
||||
|
||||
utlb->has_fatal_faults = true;
|
||||
batch_context->has_fatal_faults = true;
|
||||
|
||||
if (!batch_context->fatal_va_space) {
|
||||
UVM_ASSERT(fault_entry->va_space);
|
||||
batch_context->fatal_va_space = fault_entry->va_space;
|
||||
}
|
||||
}
|
||||
|
||||
static void fault_entry_duplicate_flags(uvm_fault_service_batch_context_t *batch_context,
|
||||
@ -1378,7 +1379,10 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
UVM_ASSERT(current_entry->fault_access_type ==
|
||||
uvm_fault_access_type_mask_highest(current_entry->access_type_mask));
|
||||
|
||||
current_entry->is_fatal = false;
|
||||
// Unserviceable faults were already skipped by the caller. There are no
|
||||
// unserviceable fault types that could be in the same VA block as a
|
||||
// serviceable fault.
|
||||
UVM_ASSERT(!current_entry->is_fatal);
|
||||
current_entry->is_throttled = false;
|
||||
current_entry->is_invalid_prefetch = false;
|
||||
|
||||
@ -1512,7 +1516,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
|
||||
++block_context->num_retries;
|
||||
|
||||
if (status == NV_OK && batch_context->has_fatal_faults)
|
||||
if (status == NV_OK && batch_context->fatal_va_space)
|
||||
status = uvm_va_block_set_cancel(va_block, &block_context->block_context, gpu);
|
||||
|
||||
return status;
|
||||
@ -1676,7 +1680,8 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
|
||||
if (access_type <= UVM_FAULT_ACCESS_TYPE_READ) {
|
||||
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
else if (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE) {
|
||||
else {
|
||||
UVM_ASSERT(access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
if (uvm_fault_access_type_mask_test(current_entry->access_type_mask, UVM_FAULT_ACCESS_TYPE_READ) &&
|
||||
!uvm_page_mask_test(reads_serviced_mask, page_index))
|
||||
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
@ -1735,6 +1740,10 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_fault_access_type_t access_type = current_entry->fault_access_type;
|
||||
bool is_duplicate = check_fault_entry_duplicate(current_entry, previous_entry);
|
||||
|
||||
// ATS faults can't be unserviceable, since unserviceable faults require
|
||||
// GMMU PTEs.
|
||||
UVM_ASSERT(!current_entry->is_fatal);
|
||||
|
||||
i++;
|
||||
|
||||
update_batch_and_notify_fault(gpu_va_space->gpu,
|
||||
@ -1934,14 +1943,198 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
// Called when a fault in the batch has been marked fatal. Flush the buffer
|
||||
// under the VA and mmap locks to remove any potential stale fatal faults, then
|
||||
// service all new faults for just that VA space and cancel those which are
|
||||
// fatal. Faults in other VA spaces are replayed when done and will be processed
|
||||
// when normal fault servicing resumes.
|
||||
static NV_STATUS service_fault_batch_for_cancel(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 i;
|
||||
uvm_va_space_t *va_space = batch_context->fatal_va_space;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
struct mm_struct *mm;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = &service_context->block_context;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
// Perform the flush and re-fetch while holding the mmap_lock and the
|
||||
// VA space lock. This avoids stale faults because it prevents any vma
|
||||
// modifications (mmap, munmap, mprotect) from happening between the time HW
|
||||
// takes the fault and we cancel it.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
va_block_context->mm = mm;
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// We saw fatal faults in this VA space before. Flush while holding
|
||||
// mmap_lock to make sure those faults come back (aren't stale).
|
||||
//
|
||||
// We need to wait until all old fault messages have arrived before
|
||||
// flushing, hence UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT.
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// Wait for the flush's replay to finish to give the legitimate faults a
|
||||
// chance to show up in the buffer again.
|
||||
status = uvm_tracker_wait(&replayable_faults->replay_tracker);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// We expect all replayed faults to have arrived in the buffer so we can re-
|
||||
// service them. The replay-and-wait sequence above will ensure they're all
|
||||
// in the HW buffer. When GSP owns the HW buffer, we also have to wait for
|
||||
// GSP to copy all available faults from the HW buffer into the shadow
|
||||
// buffer.
|
||||
//
|
||||
// TODO: Bug 2533557: This flush does not actually guarantee that GSP will
|
||||
// copy over all faults.
|
||||
status = hw_fault_buffer_flush_locked(gpu->parent);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// If there is no GPU VA space for the GPU, ignore all faults in the VA
|
||||
// space. This can happen if the GPU VA space has been destroyed since we
|
||||
// unlocked the VA space in service_fault_batch. That means the fatal faults
|
||||
// are stale, because unregistering the GPU VA space requires preempting the
|
||||
// context and detaching all channels in that VA space. Restart fault
|
||||
// servicing from the top.
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
if (!gpu_va_space)
|
||||
goto done;
|
||||
|
||||
// Re-parse the new faults
|
||||
batch_context->num_invalid_prefetch_faults = 0;
|
||||
batch_context->num_duplicate_faults = 0;
|
||||
batch_context->num_replays = 0;
|
||||
batch_context->fatal_va_space = NULL;
|
||||
batch_context->has_throttled_faults = false;
|
||||
|
||||
status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_ALL);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
|
||||
// No more faults left. Either the previously-seen fatal entry was stale, or
|
||||
// RM killed the context underneath us.
|
||||
if (batch_context->num_cached_faults == 0)
|
||||
goto done;
|
||||
|
||||
++batch_context->batch_id;
|
||||
|
||||
status = preprocess_fault_batch(gpu, batch_context);
|
||||
if (status != NV_OK) {
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED) {
|
||||
// Another flush happened due to stale faults or a context-fatal
|
||||
// error. The previously-seen fatal fault might not exist anymore,
|
||||
// so restart fault servicing from the top.
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
goto done;
|
||||
}
|
||||
|
||||
// Search for the target VA space
|
||||
for (i = 0; i < batch_context->num_coalesced_faults; i++) {
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
UVM_ASSERT(current_entry->va_space);
|
||||
if (current_entry->va_space == va_space)
|
||||
break;
|
||||
}
|
||||
|
||||
while (i < batch_context->num_coalesced_faults) {
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
|
||||
if (current_entry->va_space != va_space)
|
||||
break;
|
||||
|
||||
// service_fault_batch_dispatch() doesn't expect unserviceable faults.
|
||||
// Just cancel them directly.
|
||||
if (current_entry->is_fatal) {
|
||||
status = cancel_fault_precise_va(gpu, current_entry, UVM_FAULT_CANCEL_VA_MODE_ALL);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
++i;
|
||||
}
|
||||
else {
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
NvU32 block_faults;
|
||||
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
|
||||
// Service all the faults that we can. We only really need to search
|
||||
// for fatal faults, but attempting to service all is the easiest
|
||||
// way to do that.
|
||||
status = service_fault_batch_dispatch(va_space, gpu_va_space, batch_context, i, &block_faults, false);
|
||||
if (status != NV_OK) {
|
||||
// TODO: Bug 3900733: clean up locking in service_fault_batch().
|
||||
// We need to drop lock and retry. That means flushing and
|
||||
// starting over.
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
status = NV_OK;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// Invalidate TLBs before cancel to ensure that fatal faults don't
|
||||
// get stuck in HW behind non-fatal faults to the same line.
|
||||
status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
while (block_faults-- > 0) {
|
||||
current_entry = batch_context->ordered_fault_cache[i];
|
||||
if (current_entry->is_fatal) {
|
||||
status = cancel_fault_precise_va(gpu, current_entry, current_entry->replayable.cancel_va_mode);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
++i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
done:
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
|
||||
if (status == NV_OK) {
|
||||
// There are two reasons to flush the fault buffer here.
|
||||
//
|
||||
// 1) Functional. We need to replay both the serviced non-fatal faults
|
||||
// and the skipped faults in other VA spaces. The former need to be
|
||||
// restarted and the latter need to be replayed so the normal fault
|
||||
// service mechanism can fetch and process them.
|
||||
//
|
||||
// 2) Performance. After cancelling the fatal faults, a flush removes
|
||||
// any potential duplicated fault that may have been added while
|
||||
// processing the faults in this batch. This flush also avoids doing
|
||||
// unnecessary processing after the fatal faults have been cancelled,
|
||||
// so all the rest are unlikely to remain after a replay because the
|
||||
// context is probably in the process of dying.
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
// Scan the ordered view of faults and group them by different va_blocks
|
||||
// (managed faults) and service faults for each va_block, in batch.
|
||||
// Service non-managed faults one at a time as they are encountered during the
|
||||
// scan.
|
||||
//
|
||||
// This function returns NV_WARN_MORE_PROCESSING_REQUIRED if the fault buffer
|
||||
// was flushed because the needs_fault_buffer_flush flag was set on some GPU VA
|
||||
// space
|
||||
// Fatal faults are marked for later processing by the caller.
|
||||
static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
fault_service_mode_t service_mode,
|
||||
uvm_fault_service_batch_context_t *batch_context)
|
||||
@ -1960,7 +2153,7 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
|
||||
ats_invalidate->write_faults_in_batch = false;
|
||||
ats_invalidate->tlb_batch_pending = false;
|
||||
uvm_hmm_service_context_init(service_context);
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults;) {
|
||||
@ -1995,38 +2188,25 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
va_block_context->mm = mm;
|
||||
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
gpu_va_space = uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
if (uvm_processor_mask_test_and_clear_atomic(&va_space->needs_fault_buffer_flush, gpu->id)) {
|
||||
status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
batch_context);
|
||||
if (status == NV_OK)
|
||||
status = NV_WARN_MORE_PROCESSING_REQUIRED;
|
||||
|
||||
break;
|
||||
}
|
||||
|
||||
// The case where there is no valid GPU VA space for the GPU in this
|
||||
// VA space is handled next
|
||||
}
|
||||
|
||||
// Some faults could be already fatal if they cannot be handled by
|
||||
// the UVM driver
|
||||
if (current_entry->is_fatal) {
|
||||
++i;
|
||||
batch_context->has_fatal_faults = true;
|
||||
if (!batch_context->fatal_va_space)
|
||||
batch_context->fatal_va_space = va_space;
|
||||
|
||||
utlb->has_fatal_faults = true;
|
||||
UVM_ASSERT(utlb->num_pending_faults > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
|
||||
if (!gpu_va_space) {
|
||||
// If there is no GPU VA space for the GPU, ignore the fault. This
|
||||
// can happen if a GPU VA space is destroyed without explicitly
|
||||
// freeing all memory ranges (destroying the VA range triggers a
|
||||
// flush of the fault buffer) and there are stale entries in the
|
||||
// freeing all memory ranges and there are stale entries in the
|
||||
// buffer that got fixed by the servicing in a previous batch.
|
||||
++i;
|
||||
continue;
|
||||
@ -2044,15 +2224,17 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
mm = NULL;
|
||||
va_space = NULL;
|
||||
status = NV_OK;
|
||||
continue;
|
||||
}
|
||||
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
|
||||
i += block_faults;
|
||||
|
||||
// Don't issue replays in cancel mode
|
||||
if (replay_per_va_block && !batch_context->has_fatal_faults) {
|
||||
if (replay_per_va_block && !batch_context->fatal_va_space) {
|
||||
status = push_replay_on_gpu(gpu, UVM_FAULT_REPLAY_TYPE_START, batch_context);
|
||||
if (status != NV_OK)
|
||||
goto fail;
|
||||
@ -2064,8 +2246,6 @@ static NV_STATUS service_fault_batch(uvm_gpu_t *gpu,
|
||||
}
|
||||
}
|
||||
|
||||
// Only clobber status if invalidate_status != NV_OK, since status may also
|
||||
// contain NV_WARN_MORE_PROCESSING_REQUIRED.
|
||||
if (va_space != NULL) {
|
||||
NV_STATUS invalidate_status = uvm_ats_invalidate_tlbs(gpu_va_space, ats_invalidate, &batch_context->tracker);
|
||||
if (invalidate_status != NV_OK)
|
||||
@ -2273,77 +2453,48 @@ static NvU32 is_fatal_fault_in_buffer(uvm_fault_service_batch_context_t *batch_c
|
||||
return false;
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
// Only cancel faults flagged as fatal
|
||||
FAULT_CANCEL_MODE_FATAL,
|
||||
|
||||
// Cancel all faults in the batch unconditionally
|
||||
FAULT_CANCEL_MODE_ALL,
|
||||
} fault_cancel_mode_t;
|
||||
|
||||
// Cancel faults in the given fault service batch context. The function provides
|
||||
// two different modes depending on the value of cancel_mode:
|
||||
// - If cancel_mode == FAULT_CANCEL_MODE_FATAL, only faults flagged as fatal
|
||||
// will be cancelled. In this case, the reason reported to tools is the one
|
||||
// contained in the fault entry itself.
|
||||
// - If cancel_mode == FAULT_CANCEL_MODE_ALL, all faults will be cancelled
|
||||
// unconditionally. In this case, the reason reported to tools for non-fatal
|
||||
// faults is the one passed to this function.
|
||||
static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
fault_cancel_mode_t cancel_mode,
|
||||
UvmEventFatalReason reason)
|
||||
// Cancel all faults in the given fault service batch context, even those not
|
||||
// marked as fatal.
|
||||
static NV_STATUS cancel_faults_all(uvm_gpu_t *gpu,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
UvmEventFatalReason reason)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS fault_status;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
NvU32 i;
|
||||
NvU32 i = 0;
|
||||
|
||||
UVM_ASSERT(gpu->parent->fault_cancel_va_supported);
|
||||
if (cancel_mode == FAULT_CANCEL_MODE_ALL)
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
UVM_ASSERT(reason != UvmEventFatalReasonInvalid);
|
||||
|
||||
for (i = 0; i < batch_context->num_coalesced_faults; ++i) {
|
||||
while (i < batch_context->num_coalesced_faults && status == NV_OK) {
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[i];
|
||||
uvm_va_space_t *va_space = current_entry->va_space;
|
||||
bool skip_va_space;
|
||||
|
||||
UVM_ASSERT(current_entry->va_space);
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
if (current_entry->va_space != va_space) {
|
||||
// Fault on a different va_space, drop the lock of the old one...
|
||||
if (va_space != NULL)
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
va_space = current_entry->va_space;
|
||||
// If there is no GPU VA space for the GPU, ignore all faults in
|
||||
// that VA space. This can happen if the GPU VA space has been
|
||||
// destroyed since we unlocked the VA space in service_fault_batch.
|
||||
// Ignoring the fault avoids targetting a PDB that might have been
|
||||
// reused by another process.
|
||||
skip_va_space = !uvm_gpu_va_space_get_by_parent_gpu(va_space, gpu->parent);
|
||||
|
||||
// ... and take the lock of the new one
|
||||
uvm_va_space_down_read(va_space);
|
||||
for (;
|
||||
i < batch_context->num_coalesced_faults && current_entry->va_space == va_space;
|
||||
current_entry = batch_context->ordered_fault_cache[++i]) {
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode;
|
||||
|
||||
// We don't need to check whether a buffer flush is required
|
||||
// (due to VA range destruction).
|
||||
// - For cancel_mode == FAULT_CANCEL_MODE_FATAL, once a fault is
|
||||
// flagged as fatal we need to cancel it, even if its VA range no
|
||||
// longer exists.
|
||||
// - For cancel_mode == FAULT_CANCEL_MODE_ALL we don't care about
|
||||
// any of this, we just want to trigger RC in RM.
|
||||
}
|
||||
if (skip_va_space)
|
||||
continue;
|
||||
|
||||
if (!uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->parent->id)) {
|
||||
// If there is no GPU VA space for the GPU, ignore the fault.
|
||||
// This can happen if the GPU VA did not exist in
|
||||
// service_fault_batch(), or it was destroyed since then.
|
||||
// This is to avoid targetting a PDB that might have been reused
|
||||
// by another process.
|
||||
continue;
|
||||
}
|
||||
|
||||
// Cancel the fault
|
||||
if (cancel_mode == FAULT_CANCEL_MODE_ALL || current_entry->is_fatal) {
|
||||
uvm_fault_cancel_va_mode_t cancel_va_mode = current_entry->replayable.cancel_va_mode;
|
||||
|
||||
// If cancelling unconditionally and the fault was not fatal,
|
||||
// set the cancel reason passed to this function
|
||||
if (!current_entry->is_fatal) {
|
||||
if (current_entry->is_fatal) {
|
||||
UVM_ASSERT(current_entry->fatal_reason != UvmEventFatalReasonInvalid);
|
||||
cancel_va_mode = current_entry->replayable.cancel_va_mode;
|
||||
}
|
||||
else {
|
||||
current_entry->fatal_reason = reason;
|
||||
cancel_va_mode = UVM_FAULT_CANCEL_VA_MODE_ALL;
|
||||
}
|
||||
@ -2352,17 +2503,13 @@ static NV_STATUS cancel_faults_precise_va(uvm_gpu_t *gpu,
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
uvm_va_space_up_read(va_space);
|
||||
}
|
||||
|
||||
if (va_space != NULL)
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
// After cancelling the fatal faults, the fault buffer is flushed to remove
|
||||
// any potential duplicated fault that may have been added while processing
|
||||
// the faults in this batch. This flush also avoids doing unnecessary
|
||||
// processing after the fatal faults have been cancelled, so all the rest
|
||||
// are unlikely to remain after a replay because the context is probably in
|
||||
// the process of dying.
|
||||
// Because each cancel itself triggers a replay, there may be a large number
|
||||
// of new duplicated faults in the buffer after cancelling all the known
|
||||
// ones. Flushing the buffer discards them to avoid unnecessary processing.
|
||||
fault_status = fault_buffer_flush_locked(gpu,
|
||||
UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT,
|
||||
UVM_FAULT_REPLAY_TYPE_START,
|
||||
@ -2410,12 +2557,12 @@ static void cancel_fault_batch(uvm_gpu_t *gpu,
|
||||
uvm_fault_service_batch_context_t *batch_context,
|
||||
UvmEventFatalReason reason)
|
||||
{
|
||||
if (gpu->parent->fault_cancel_va_supported) {
|
||||
cancel_faults_precise_va(gpu, batch_context, FAULT_CANCEL_MODE_ALL, reason);
|
||||
return;
|
||||
}
|
||||
|
||||
cancel_fault_batch_tlb(gpu, batch_context, reason);
|
||||
// Return code is ignored since we're on a global error path and wouldn't be
|
||||
// able to recover anyway.
|
||||
if (gpu->parent->fault_cancel_va_supported)
|
||||
cancel_faults_all(gpu, batch_context, reason);
|
||||
else
|
||||
cancel_fault_batch_tlb(gpu, batch_context, reason);
|
||||
}
|
||||
|
||||
|
||||
@ -2502,7 +2649,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
|
||||
batch_context->num_invalid_prefetch_faults = 0;
|
||||
batch_context->num_replays = 0;
|
||||
batch_context->has_fatal_faults = false;
|
||||
batch_context->fatal_va_space = NULL;
|
||||
batch_context->has_throttled_faults = false;
|
||||
|
||||
// 5) Fetch all faults from buffer
|
||||
@ -2549,9 +2696,6 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
// 8) Service all non-fatal faults and mark all non-serviceable faults
|
||||
// as fatal
|
||||
status = service_fault_batch(gpu, FAULT_SERVICE_MODE_CANCEL, batch_context);
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
continue;
|
||||
|
||||
UVM_ASSERT(batch_context->num_replays == 0);
|
||||
if (status == NV_ERR_NO_MEMORY)
|
||||
continue;
|
||||
@ -2559,7 +2703,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
break;
|
||||
|
||||
// No more fatal faults left, we are done
|
||||
if (!batch_context->has_fatal_faults)
|
||||
if (!batch_context->fatal_va_space)
|
||||
break;
|
||||
|
||||
// 9) Search for uTLBs that contain fatal faults and meet the
|
||||
@ -2581,13 +2725,9 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
|
||||
static NV_STATUS cancel_faults_precise(uvm_gpu_t *gpu, uvm_fault_service_batch_context_t *batch_context)
|
||||
{
|
||||
UVM_ASSERT(batch_context->has_fatal_faults);
|
||||
if (gpu->parent->fault_cancel_va_supported) {
|
||||
return cancel_faults_precise_va(gpu,
|
||||
batch_context,
|
||||
FAULT_CANCEL_MODE_FATAL,
|
||||
UvmEventFatalReasonInvalid);
|
||||
}
|
||||
UVM_ASSERT(batch_context->fatal_va_space);
|
||||
if (gpu->parent->fault_cancel_va_supported)
|
||||
return service_fault_batch_for_cancel(gpu, batch_context);
|
||||
|
||||
return cancel_faults_precise_tlb(gpu, batch_context);
|
||||
}
|
||||
@ -2643,7 +2783,7 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
|
||||
batch_context->num_invalid_prefetch_faults = 0;
|
||||
batch_context->num_duplicate_faults = 0;
|
||||
batch_context->num_replays = 0;
|
||||
batch_context->has_fatal_faults = false;
|
||||
batch_context->fatal_va_space = NULL;
|
||||
batch_context->has_throttled_faults = false;
|
||||
|
||||
status = fetch_fault_buffer_entries(gpu, batch_context, FAULT_FETCH_MODE_BATCH_READY);
|
||||
@ -2671,9 +2811,6 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
|
||||
// was flushed
|
||||
num_replays += batch_context->num_replays;
|
||||
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
continue;
|
||||
|
||||
enable_disable_prefetch_faults(gpu->parent, batch_context);
|
||||
|
||||
if (status != NV_OK) {
|
||||
@ -2687,10 +2824,17 @@ void uvm_gpu_service_replayable_faults(uvm_gpu_t *gpu)
|
||||
break;
|
||||
}
|
||||
|
||||
if (batch_context->has_fatal_faults) {
|
||||
if (batch_context->fatal_va_space) {
|
||||
status = uvm_tracker_wait(&batch_context->tracker);
|
||||
if (status == NV_OK)
|
||||
if (status == NV_OK) {
|
||||
status = cancel_faults_precise(gpu, batch_context);
|
||||
if (status == NV_OK) {
|
||||
// Cancel handling should've issued at least one replay
|
||||
UVM_ASSERT(batch_context->num_replays > 0);
|
||||
++num_batches;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
|
@ -103,5 +103,7 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->map_remap_larger_page_promotion = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = true;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2020-2022 NVIDIA Corporation
|
||||
Copyright (c) 2020-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -33,6 +33,7 @@
|
||||
|
||||
#include "uvm_types.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "uvm_hal_types.h"
|
||||
#include "uvm_hopper_fault_buffer.h"
|
||||
@ -42,6 +43,10 @@
|
||||
#define MMU_BIG 0
|
||||
#define MMU_SMALL 1
|
||||
|
||||
// Used in pde_pcf().
|
||||
#define ATS_ALLOWED 0
|
||||
#define ATS_NOT_ALLOWED 1
|
||||
|
||||
uvm_mmu_engine_type_t uvm_hal_hopper_mmu_engine_id_to_type(NvU16 mmu_engine_id)
|
||||
{
|
||||
if (mmu_engine_id >= NV_PFAULT_MMU_ENG_ID_HOST0 && mmu_engine_id <= NV_PFAULT_MMU_ENG_ID_HOST44)
|
||||
@ -260,7 +265,108 @@ static NvU64 poisoned_pte_hopper(void)
|
||||
return WRITE_HWCONST64(pte_bits, _MMU_VER3, PTE, PCF, PRIVILEGE_RO_NO_ATOMIC_UNCACHED_ACD);
|
||||
}
|
||||
|
||||
static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 depth)
|
||||
typedef enum
|
||||
{
|
||||
PDE_TYPE_SINGLE,
|
||||
PDE_TYPE_DUAL_BIG,
|
||||
PDE_TYPE_DUAL_SMALL,
|
||||
PDE_TYPE_COUNT,
|
||||
} pde_type_t;
|
||||
|
||||
static const NvU8 valid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_ALLOWED,
|
||||
NV_MMU_VER3_PDE_PCF_VALID_UNCACHED_ATS_NOT_ALLOWED },
|
||||
{ NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_ALLOWED,
|
||||
NV_MMU_VER3_DUAL_PDE_PCF_BIG_VALID_UNCACHED_ATS_NOT_ALLOWED },
|
||||
{ NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_ALLOWED,
|
||||
NV_MMU_VER3_DUAL_PDE_PCF_SMALL_VALID_UNCACHED_ATS_NOT_ALLOWED } };
|
||||
|
||||
static const NvU8 invalid_pcf[][2] = { { NV_MMU_VER3_PDE_PCF_INVALID_ATS_ALLOWED,
|
||||
NV_MMU_VER3_PDE_PCF_INVALID_ATS_NOT_ALLOWED },
|
||||
{ NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_ALLOWED,
|
||||
NV_MMU_VER3_DUAL_PDE_PCF_BIG_INVALID_ATS_NOT_ALLOWED },
|
||||
{ NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_ALLOWED,
|
||||
NV_MMU_VER3_DUAL_PDE_PCF_SMALL_INVALID_ATS_NOT_ALLOWED } };
|
||||
|
||||
static const NvU8 va_base[] = { 56, 47, 38, 29, 21 };
|
||||
|
||||
static bool is_ats_range_valid(uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
NvU64 pde_base_va;
|
||||
NvU64 min_va_upper;
|
||||
NvU64 max_va_lower;
|
||||
NvU32 index_in_dir;
|
||||
|
||||
uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
|
||||
|
||||
UVM_ASSERT(dir->depth < ARRAY_SIZE(va_base));
|
||||
|
||||
// We can use UVM_PAGE_SIZE_AGNOSTIC because page_size is only used in
|
||||
// index_bits_hopper() for PTE table, i.e., depth 5+, which does not use a
|
||||
// PDE PCF or an ATS_ALLOWED/NOT_ALLOWED setting.
|
||||
UVM_ASSERT(child_index < (1ull << index_bits_hopper(dir->depth, UVM_PAGE_SIZE_AGNOSTIC)));
|
||||
|
||||
pde_base_va = 0;
|
||||
index_in_dir = child_index;
|
||||
while (dir) {
|
||||
pde_base_va += index_in_dir * (1ull << va_base[dir->depth]);
|
||||
index_in_dir = dir->index_in_parent;
|
||||
dir = dir->host_parent;
|
||||
}
|
||||
pde_base_va = (NvU64)((NvS64)(pde_base_va << (64 - num_va_bits_hopper())) >> (64 - num_va_bits_hopper()));
|
||||
|
||||
if (pde_base_va < max_va_lower || pde_base_va >= min_va_upper)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
// PDE Permission Control Flags
|
||||
static NvU32 pde_pcf(bool valid, pde_type_t pde_type, uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
const NvU8 (*pcf)[2] = valid ? valid_pcf : invalid_pcf;
|
||||
NvU8 depth = dir->depth;
|
||||
|
||||
UVM_ASSERT(pde_type < PDE_TYPE_COUNT);
|
||||
UVM_ASSERT(depth < 5);
|
||||
|
||||
// On non-ATS systems, PDE PCF only sets the valid and volatile/cache bits.
|
||||
if (!g_uvm_global.ats.enabled)
|
||||
return pcf[pde_type][ATS_ALLOWED];
|
||||
|
||||
// We assume all supported ATS platforms use canonical form address.
|
||||
// See comments in uvm_gpu.c:uvm_gpu_can_address() and in
|
||||
// uvm_mmu.c:page_tree_ats_init();
|
||||
UVM_ASSERT(uvm_platform_uses_canonical_form_address());
|
||||
|
||||
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
|
||||
// ATS and GMMU page tables. For managed memory we need to prevent this
|
||||
// parallel lookup since we would not get any GPU fault if the CPU has
|
||||
// a valid mapping. Also, for external ranges that are known to be
|
||||
// mapped entirely on the GMMU page table we can skip the ATS lookup
|
||||
// for performance reasons. Parallel ATS lookup is disabled in PDE1
|
||||
// (depth 3) and, therefore, it applies to the underlying 512MB VA
|
||||
// range.
|
||||
//
|
||||
// UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
|
||||
// This is fine because CUDA ensures that all managed and external
|
||||
// allocations are properly compartmentalized in 512MB-aligned VA
|
||||
// regions. For cudaHostRegister CUDA cannot control the VA range, but
|
||||
// we rely on ATS for those allocations so they can't choose the
|
||||
// ATS_NOT_ALLOWED mode.
|
||||
// TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range to
|
||||
// PTEs.
|
||||
// HW complies with the leaf PDE's ATS_ALLOWED/ATS_NOT_ALLOWED settings,
|
||||
// enabling us to treat any upper-level PDE as a don't care as long as there
|
||||
// are leaf PDEs for the entire upper-level PDE range. We assume PDE4
|
||||
// entries (depth == 0) are always ATS enabled, and the no_ats_range is in
|
||||
// PDE3 or lower.
|
||||
if (depth == 0 || (!valid && is_ats_range_valid(dir, child_index)))
|
||||
return pcf[pde_type][ATS_ALLOWED];
|
||||
|
||||
return pcf[pde_type][ATS_NOT_ALLOWED];
|
||||
}
|
||||
|
||||
static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
|
||||
@ -280,38 +386,17 @@ static NvU64 single_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, NvU32 dep
|
||||
break;
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
// Hopper GPUs on ATS-enabled systems, perform a parallel lookup on both
|
||||
// ATS and GMMU page tables. For managed memory we need to prevent this
|
||||
// parallel lookup since we would not get any GPU fault if the CPU has
|
||||
// a valid mapping. Also, for external ranges that are known to be
|
||||
// mapped entirely on the GMMU page table we can skip the ATS lookup
|
||||
// for performance reasons. Parallel ATS lookup is disabled in PDE1
|
||||
// (depth 3) and, therefore, it applies to the underlying 512MB VA
|
||||
// range.
|
||||
//
|
||||
// UVM sets ATS_NOT_ALLOWED for all Hopper+ mappings on ATS systems.
|
||||
// This is fine because CUDA ensures that all managed and external
|
||||
// allocations are properly compartmentalized in 512MB-aligned VA
|
||||
// regions. For cudaHostRegister CUDA cannot control the VA range, but
|
||||
// we rely on ATS for those allocations so they can't choose the
|
||||
// ATS_NOT_ALLOWED mode.
|
||||
//
|
||||
// TODO: Bug 3254055: Relax the NO_ATS setting from 512MB (pde1) range
|
||||
// to PTEs.
|
||||
if (depth == 3 && g_uvm_global.ats.enabled)
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_NOT_ALLOWED);
|
||||
else
|
||||
pde_bits |= HWCONST64(_MMU_VER3, PDE, PCF, VALID_UNCACHED_ATS_ALLOWED);
|
||||
|
||||
// address 51:12
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, PDE, ADDRESS, address);
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, PDE, PCF, pde_pcf(phys_alloc != NULL, PDE_TYPE_SINGLE, dir, child_index));
|
||||
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
|
||||
@ -330,17 +415,20 @@ static NvU64 big_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
break;
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_BIG, VALID_UNCACHED_ATS_NOT_ALLOWED);
|
||||
|
||||
// address 51:8
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_BIG, address);
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 5:3
|
||||
pde_bits |= HWVALUE64(_MMU_VER3,
|
||||
DUAL_PDE,
|
||||
PCF_BIG,
|
||||
pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_BIG, dir, child_index));
|
||||
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc, uvm_page_directory_t *dir, NvU32 child_index)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
|
||||
@ -359,29 +447,40 @@ static NvU64 small_half_pde_hopper(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
break;
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 69:67 [5:3]
|
||||
pde_bits |= HWCONST64(_MMU_VER3, DUAL_PDE, PCF_SMALL, VALID_UNCACHED_ATS_NOT_ALLOWED);
|
||||
|
||||
// address 115:76 [51:12]
|
||||
pde_bits |= HWVALUE64(_MMU_VER3, DUAL_PDE, ADDRESS_SMALL, address);
|
||||
}
|
||||
|
||||
// PCF (permission control flags) 69:67 [5:3]
|
||||
pde_bits |= HWVALUE64(_MMU_VER3,
|
||||
DUAL_PDE,
|
||||
PCF_SMALL,
|
||||
pde_pcf(phys_alloc != NULL, PDE_TYPE_DUAL_SMALL, dir, child_index));
|
||||
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_hopper(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
static void make_pde_hopper(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
uvm_page_directory_t *dir,
|
||||
NvU32 child_index)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_hopper(depth);
|
||||
NvU32 entry_count;
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
||||
UVM_ASSERT(dir);
|
||||
|
||||
entry_count = entries_per_index_hopper(dir->depth);
|
||||
|
||||
if (entry_count == 1) {
|
||||
*entry_bits = single_pde_hopper(*phys_allocs, depth);
|
||||
*entry_bits = single_pde_hopper(*phys_allocs, dir, child_index);
|
||||
}
|
||||
else if (entry_count == 2) {
|
||||
entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG]);
|
||||
entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL]);
|
||||
entry_bits[MMU_BIG] = big_half_pde_hopper(phys_allocs[MMU_BIG], dir, child_index);
|
||||
entry_bits[MMU_SMALL] = small_half_pde_hopper(phys_allocs[MMU_SMALL], dir, child_index);
|
||||
|
||||
// This entry applies to the whole dual PDE but is stored in the lower
|
||||
// bits
|
||||
// bits.
|
||||
entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER3, DUAL_PDE, IS_PTE, FALSE);
|
||||
}
|
||||
else {
|
||||
|
@ -128,8 +128,9 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
// present if we see the callback.
|
||||
//
|
||||
// The callback was added in commit 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e,
|
||||
// v3.19 (2014-11-13).
|
||||
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
|
||||
// v3.19 (2014-11-13) and renamed in commit 1af5a8109904.
|
||||
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE) || \
|
||||
defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
|
||||
#define UVM_CAN_USE_MMU_NOTIFIERS() 1
|
||||
#else
|
||||
#define UVM_CAN_USE_MMU_NOTIFIERS() 0
|
||||
@ -153,10 +154,6 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#define VM_MIXEDMAP 0x00000000
|
||||
#endif
|
||||
|
||||
#if !defined(MPOL_PREFERRED_MANY)
|
||||
#define MPOL_PREFERRED_MANY 5
|
||||
#endif
|
||||
|
||||
//
|
||||
// printk.h already defined pr_fmt, so we have to redefine it so the pr_*
|
||||
// routines pick up our version
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -71,4 +71,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->smc.supported = false;
|
||||
|
||||
parent_gpu->plc_supported = false;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -106,10 +106,16 @@ static NvU64 small_half_pde_maxwell(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_maxwell(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
static void make_pde_maxwell(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
uvm_page_directory_t *dir,
|
||||
NvU32 child_index)
|
||||
{
|
||||
NvU64 pde_bits = 0;
|
||||
UVM_ASSERT(depth == 0);
|
||||
|
||||
UVM_ASSERT(dir);
|
||||
UVM_ASSERT(dir->depth == 0);
|
||||
|
||||
pde_bits |= HWCONST64(_MMU, PDE, SIZE, FULL);
|
||||
pde_bits |= big_half_pde_maxwell(phys_allocs[MMU_BIG]) | small_half_pde_maxwell(phys_allocs[MMU_SMALL]);
|
||||
|
||||
|
@ -672,14 +672,6 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
|
||||
.finalize_and_map = uvm_migrate_vma_finalize_and_map_helper,
|
||||
};
|
||||
|
||||
// WAR for Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only to read-write upgrades
|
||||
//
|
||||
// This code path isn't used on GH180 but we need to maintain consistent
|
||||
// behaviour on systems that do.
|
||||
if (!vma_is_anonymous(args->vma))
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
ret = migrate_vma(&uvm_migrate_vma_ops, args->vma, args->start, args->end, args->src, args->dst, state);
|
||||
if (ret < 0)
|
||||
return errno_to_nv_status(ret);
|
||||
@ -693,24 +685,6 @@ static NV_STATUS nv_migrate_vma(struct migrate_vma *args, migrate_vma_state_t *s
|
||||
if (ret < 0)
|
||||
return errno_to_nv_status(ret);
|
||||
|
||||
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
|
||||
// support for it is added to the Linux kernel
|
||||
//
|
||||
// A side-effect of migrate_vma_setup() is it calls mmu notifiers even if a
|
||||
// page can't be migrated (eg. because it's a non-anonymous mapping). We
|
||||
// need this side-effect for SMMU on GH180 to ensure any cached read-only
|
||||
// entries are flushed from SMMU on permission upgrade.
|
||||
//
|
||||
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only to read-write upgrades
|
||||
//
|
||||
// The above WAR doesn't work for HugeTLBfs mappings because
|
||||
// migrate_vma_setup() will fail in that case.
|
||||
if (!vma_is_anonymous(args->vma)) {
|
||||
migrate_vma_finalize(args);
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
}
|
||||
|
||||
uvm_migrate_vma_alloc_and_copy(args, state);
|
||||
if (state->status == NV_OK) {
|
||||
migrate_vma_pages(args);
|
||||
@ -862,6 +836,17 @@ static NV_STATUS migrate_pageable_vma_region(struct vm_area_struct *vma,
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp)
|
||||
{
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
|
||||
uvm_va_space_down_write(va_space);
|
||||
va_space->test.skip_migrate_vma = params->skip;
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
|
||||
unsigned long start,
|
||||
unsigned long outer,
|
||||
@ -884,13 +869,12 @@ static NV_STATUS migrate_pageable_vma(struct vm_area_struct *vma,
|
||||
start = max(start, vma->vm_start);
|
||||
outer = min(outer, vma->vm_end);
|
||||
|
||||
// migrate_vma only supports anonymous VMAs. We check for those after
|
||||
// calling migrate_vma_setup() to workaround Bug 4130089. We need to check
|
||||
// for HugeTLB VMAs here because migrate_vma_setup() will return a fatal
|
||||
// error for those.
|
||||
// TODO: Bug 4130089: [GH180][r535] WAR for kernel not issuing SMMU TLB
|
||||
// invalidates on read-only to read-write upgrades
|
||||
if (is_vm_hugetlb_page(vma))
|
||||
if (va_space->test.skip_migrate_vma)
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
// TODO: Bug 2419180: support file-backed pages in migrate_vma, when
|
||||
// support for it is added to the Linux kernel
|
||||
if (!vma_is_anonymous(vma))
|
||||
return NV_WARN_NOTHING_TO_DO;
|
||||
|
||||
if (uvm_processor_mask_empty(&va_space->registered_gpus))
|
||||
@ -950,7 +934,9 @@ static NV_STATUS migrate_pageable(migrate_vma_state_t *state)
|
||||
bool touch = uvm_migrate_args->touch;
|
||||
uvm_populate_permissions_t populate_permissions = uvm_migrate_args->populate_permissions;
|
||||
|
||||
UVM_ASSERT(!vma_is_anonymous(vma) || uvm_processor_mask_empty(&va_space->registered_gpus));
|
||||
UVM_ASSERT(va_space->test.skip_migrate_vma ||
|
||||
!vma_is_anonymous(vma) ||
|
||||
uvm_processor_mask_empty(&va_space->registered_gpus));
|
||||
|
||||
// We can't use migrate_vma to move the pages as desired. Normally
|
||||
// this fallback path is supposed to populate the memory then inform
|
||||
|
@ -51,7 +51,7 @@ typedef struct
|
||||
#if defined(CONFIG_MIGRATE_VMA_HELPER)
|
||||
#define UVM_MIGRATE_VMA_SUPPORTED 1
|
||||
#else
|
||||
#if defined(CONFIG_DEVICE_PRIVATE) && defined(NV_MIGRATE_VMA_SETUP_PRESENT)
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_migrate_vma_setup
|
||||
#define UVM_MIGRATE_VMA_SUPPORTED 1
|
||||
#endif
|
||||
#endif
|
||||
@ -218,6 +218,9 @@ NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args);
|
||||
NV_STATUS uvm_migrate_pageable_init(void);
|
||||
|
||||
void uvm_migrate_pageable_exit(void);
|
||||
|
||||
NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp);
|
||||
|
||||
#else // UVM_MIGRATE_VMA_SUPPORTED
|
||||
|
||||
static NV_STATUS uvm_migrate_pageable(uvm_migrate_args_t *uvm_migrate_args)
|
||||
@ -251,6 +254,10 @@ static void uvm_migrate_pageable_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
static inline NV_STATUS uvm_test_skip_migrate_vma(UVM_TEST_SKIP_MIGRATE_VMA_PARAMS *params, struct file *filp)
|
||||
{
|
||||
return NV_OK;
|
||||
}
|
||||
#endif // UVM_MIGRATE_VMA_SUPPORTED
|
||||
|
||||
#endif
|
||||
|
@ -323,37 +323,156 @@ static void uvm_mmu_page_table_cpu_memset_16(uvm_gpu_t *gpu,
|
||||
uvm_mmu_page_table_cpu_unmap(gpu, phys_alloc);
|
||||
}
|
||||
|
||||
static void pde_fill_cpu(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(uvm_mmu_use_cpu(tree));
|
||||
|
||||
entry_size = tree->hal->entry_size(directory->depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
for (i = 0; i < pde_count; i++) {
|
||||
tree->hal->make_pde(pde_data, phys_addr, directory, start_index + i);
|
||||
|
||||
if (entry_size == sizeof(pde_data[0]))
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu, &directory->phys_alloc, start_index + i, pde_data[0], 1);
|
||||
else
|
||||
uvm_mmu_page_table_cpu_memset_16(tree->gpu, &directory->phys_alloc, start_index + i, pde_data, 1);
|
||||
}
|
||||
}
|
||||
|
||||
static void pde_fill_gpu(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->phys_alloc.addr);
|
||||
NvU32 max_inline_entries;
|
||||
uvm_push_flag_t push_membar_flag = UVM_PUSH_FLAG_COUNT;
|
||||
uvm_gpu_address_t inline_data_addr;
|
||||
uvm_push_inline_data_t inline_data;
|
||||
NvU32 entry_count, i, j;
|
||||
|
||||
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
|
||||
|
||||
entry_size = tree->hal->entry_size(directory->depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / entry_size;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
|
||||
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
push_membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
|
||||
|
||||
pde_entry_addr.address += start_index * entry_size;
|
||||
|
||||
for (i = 0; i < pde_count;) {
|
||||
// All but the first memory operation can be pipelined. We respect the
|
||||
// caller's pipelining settings for the first push.
|
||||
if (i != 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
entry_count = min(pde_count - i, max_inline_entries);
|
||||
|
||||
// No membar is needed until the last memory operation. Otherwise,
|
||||
// use caller's membar flag.
|
||||
if ((i + entry_count) < pde_count)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
else if (push_membar_flag != UVM_PUSH_FLAG_COUNT)
|
||||
uvm_push_set_flag(push, push_membar_flag);
|
||||
|
||||
uvm_push_inline_data_begin(push, &inline_data);
|
||||
for (j = 0; j < entry_count; j++) {
|
||||
tree->hal->make_pde(pde_data, phys_addr, directory, start_index + i + j);
|
||||
uvm_push_inline_data_add(&inline_data, pde_data, entry_size);
|
||||
}
|
||||
inline_data_addr = uvm_push_inline_data_end(&inline_data);
|
||||
|
||||
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * entry_size);
|
||||
|
||||
i += entry_count;
|
||||
pde_entry_addr.address += entry_size * entry_count;
|
||||
}
|
||||
}
|
||||
|
||||
// pde_fill() populates pde_count PDE entries (starting at start_index) with
|
||||
// the same mapping, i.e., with the same physical address (phys_addr).
|
||||
// pde_fill() is optimized for pde_count == 1, which is the common case.
|
||||
static void pde_fill(uvm_page_tree_t *tree,
|
||||
uvm_page_directory_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, directory->depth, UVM_PAGE_SIZE_AGNOSTIC));
|
||||
|
||||
if (push)
|
||||
pde_fill_gpu(tree, directory, start_index, pde_count, phys_addr, push);
|
||||
else
|
||||
pde_fill_cpu(tree, directory, start_index, pde_count, phys_addr);
|
||||
}
|
||||
|
||||
static void phys_mem_init(uvm_page_tree_t *tree, NvU32 page_size, uvm_page_directory_t *dir, uvm_push_t *push)
|
||||
{
|
||||
NvU64 clear_bits[2];
|
||||
uvm_mmu_mode_hal_t *hal = tree->hal;
|
||||
NvU32 entries_count = uvm_mmu_page_tree_entries(tree, dir->depth, page_size);
|
||||
NvU8 max_pde_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC) - 1;
|
||||
|
||||
if (dir->depth == tree->hal->page_table_depth(page_size)) {
|
||||
*clear_bits = 0; // Invalid PTE
|
||||
}
|
||||
else {
|
||||
// passing in NULL for the phys_allocs will mark the child entries as invalid
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
hal->make_pde(clear_bits, phys_allocs, dir->depth);
|
||||
// Passing in NULL for the phys_allocs will mark the child entries as
|
||||
// invalid.
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
|
||||
// Make sure that using only clear_bits[0] will work
|
||||
UVM_ASSERT(hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
|
||||
}
|
||||
// Init with an invalid PTE or clean PDE. Only Maxwell PDEs can have more
|
||||
// than 512 entries. In this case, we initialize them all with the same
|
||||
// clean PDE. ATS systems may require clean PDEs with
|
||||
// ATS_ALLOWED/ATS_NOT_ALLOWED bit settings based on the mapping VA.
|
||||
// We only clean_bits to 0 at the lowest page table level (PTE table), i.e.,
|
||||
// when depth is greater than the max_pde_depth.
|
||||
if ((dir->depth > max_pde_depth) || (entries_count > 512 && !g_uvm_global.ats.enabled)) {
|
||||
NvU64 clear_bits[2];
|
||||
|
||||
// initialize the memory to a reasonable value
|
||||
if (push) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push,
|
||||
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
|
||||
// If it is not a PTE, make a clean PDE.
|
||||
if (dir->depth != tree->hal->page_table_depth(page_size)) {
|
||||
// make_pde() child index is zero/ignored, since it is only used in
|
||||
// PDEs on ATS-enabled systems where pde_fill() is preferred.
|
||||
tree->hal->make_pde(clear_bits, phys_allocs, dir, 0);
|
||||
|
||||
// Make sure that using only clear_bits[0] will work.
|
||||
UVM_ASSERT(tree->hal->entry_size(dir->depth) == sizeof(clear_bits[0]) || clear_bits[0] == clear_bits[1]);
|
||||
}
|
||||
else {
|
||||
*clear_bits = 0;
|
||||
}
|
||||
|
||||
// Initialize the memory to a reasonable value.
|
||||
if (push) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push,
|
||||
uvm_mmu_gpu_address(tree->gpu, dir->phys_alloc.addr),
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size);
|
||||
}
|
||||
else {
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
|
||||
&dir->phys_alloc,
|
||||
0,
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size);
|
||||
dir->phys_alloc.size / sizeof(*clear_bits));
|
||||
}
|
||||
}
|
||||
else {
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu,
|
||||
&dir->phys_alloc,
|
||||
0,
|
||||
*clear_bits,
|
||||
dir->phys_alloc.size / sizeof(*clear_bits));
|
||||
pde_fill(tree, dir, 0, entries_count, phys_allocs, push);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
|
||||
@ -367,8 +486,10 @@ static uvm_page_directory_t *allocate_directory(uvm_page_tree_t *tree,
|
||||
NvLength phys_alloc_size = hal->allocation_size(depth, page_size);
|
||||
uvm_page_directory_t *dir;
|
||||
|
||||
// The page tree doesn't cache PTEs so space is not allocated for entries that are always PTEs.
|
||||
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not page_size.
|
||||
// The page tree doesn't cache PTEs so space is not allocated for entries
|
||||
// that are always PTEs.
|
||||
// 2M PTEs may later become PDEs so pass UVM_PAGE_SIZE_AGNOSTIC, not
|
||||
// page_size.
|
||||
if (depth == hal->page_table_depth(UVM_PAGE_SIZE_AGNOSTIC))
|
||||
entry_count = 0;
|
||||
else
|
||||
@ -409,108 +530,6 @@ static inline NvU32 index_to_entry(uvm_mmu_mode_hal_t *hal, NvU32 entry_index, N
|
||||
return hal->entries_per_index(depth) * entry_index + hal->entry_offset(depth, page_size);
|
||||
}
|
||||
|
||||
static void pde_fill_cpu(uvm_page_tree_t *tree,
|
||||
NvU32 depth,
|
||||
uvm_mmu_page_table_alloc_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
|
||||
UVM_ASSERT(uvm_mmu_use_cpu(tree));
|
||||
entry_size = tree->hal->entry_size(depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
tree->hal->make_pde(pde_data, phys_addr, depth);
|
||||
|
||||
if (entry_size == sizeof(pde_data[0]))
|
||||
uvm_mmu_page_table_cpu_memset_8(tree->gpu, directory, start_index, pde_data[0], pde_count);
|
||||
else
|
||||
uvm_mmu_page_table_cpu_memset_16(tree->gpu, directory, start_index, pde_data, pde_count);
|
||||
}
|
||||
|
||||
static void pde_fill_gpu(uvm_page_tree_t *tree,
|
||||
NvU32 depth,
|
||||
uvm_mmu_page_table_alloc_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
NvU64 pde_data[2], entry_size;
|
||||
uvm_gpu_address_t pde_entry_addr = uvm_mmu_gpu_address(tree->gpu, directory->addr);
|
||||
|
||||
UVM_ASSERT(!uvm_mmu_use_cpu(tree));
|
||||
|
||||
entry_size = tree->hal->entry_size(depth);
|
||||
UVM_ASSERT(sizeof(pde_data) >= entry_size);
|
||||
|
||||
tree->hal->make_pde(pde_data, phys_addr, depth);
|
||||
pde_entry_addr.address += start_index * entry_size;
|
||||
|
||||
if (entry_size == sizeof(pde_data[0])) {
|
||||
tree->gpu->parent->ce_hal->memset_8(push, pde_entry_addr, pde_data[0], sizeof(pde_data[0]) * pde_count);
|
||||
}
|
||||
else {
|
||||
NvU32 max_inline_entries = UVM_PUSH_INLINE_DATA_MAX_SIZE / sizeof(pde_data);
|
||||
uvm_gpu_address_t inline_data_addr;
|
||||
uvm_push_inline_data_t inline_data;
|
||||
NvU32 membar_flag = 0;
|
||||
NvU32 i;
|
||||
|
||||
if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE))
|
||||
membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_NONE;
|
||||
else if (uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU))
|
||||
membar_flag = UVM_PUSH_FLAG_NEXT_MEMBAR_GPU;
|
||||
|
||||
for (i = 0; i < pde_count;) {
|
||||
NvU32 j;
|
||||
NvU32 entry_count = min(pde_count - i, max_inline_entries);
|
||||
|
||||
uvm_push_inline_data_begin(push, &inline_data);
|
||||
for (j = 0; j < entry_count; j++)
|
||||
uvm_push_inline_data_add(&inline_data, pde_data, sizeof(pde_data));
|
||||
inline_data_addr = uvm_push_inline_data_end(&inline_data);
|
||||
|
||||
// All but the first memcopy can be pipelined. We respect the
|
||||
// caller's pipelining settings for the first push.
|
||||
if (i != 0)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_CE_NEXT_PIPELINED);
|
||||
|
||||
// No membar is needed until the last copy. Otherwise, use
|
||||
// caller's membar flag.
|
||||
if (i + entry_count < pde_count)
|
||||
uvm_push_set_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);
|
||||
else if (membar_flag)
|
||||
uvm_push_set_flag(push, membar_flag);
|
||||
|
||||
tree->gpu->parent->ce_hal->memcopy(push, pde_entry_addr, inline_data_addr, entry_count * sizeof(pde_data));
|
||||
|
||||
i += entry_count;
|
||||
pde_entry_addr.address += sizeof(pde_data) * entry_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pde_fill() populates pde_count PDE entries (starting at start_index) with
|
||||
// the same mapping, i.e., with the same physical address (phys_addr).
|
||||
static void pde_fill(uvm_page_tree_t *tree,
|
||||
NvU32 depth,
|
||||
uvm_mmu_page_table_alloc_t *directory,
|
||||
NvU32 start_index,
|
||||
NvU32 pde_count,
|
||||
uvm_mmu_page_table_alloc_t **phys_addr,
|
||||
uvm_push_t *push)
|
||||
{
|
||||
UVM_ASSERT(start_index + pde_count <= uvm_mmu_page_tree_entries(tree, depth, UVM_PAGE_SIZE_AGNOSTIC));
|
||||
|
||||
if (push)
|
||||
pde_fill_gpu(tree, depth, directory, start_index, pde_count, phys_addr, push);
|
||||
else
|
||||
pde_fill_cpu(tree, depth, directory, start_index, pde_count, phys_addr);
|
||||
}
|
||||
|
||||
static uvm_page_directory_t *host_pde_write(uvm_page_directory_t *dir,
|
||||
uvm_page_directory_t *parent,
|
||||
NvU32 index_in_parent)
|
||||
@ -540,7 +559,7 @@ static void pde_write(uvm_page_tree_t *tree,
|
||||
phys_allocs[i] = &entry->phys_alloc;
|
||||
}
|
||||
|
||||
pde_fill(tree, dir->depth, &dir->phys_alloc, entry_index, 1, phys_allocs, push);
|
||||
pde_fill(tree, dir, entry_index, 1, phys_allocs, push);
|
||||
}
|
||||
|
||||
static void host_pde_clear(uvm_page_tree_t *tree, uvm_page_directory_t *dir, NvU32 entry_index, NvU32 page_size)
|
||||
@ -800,7 +819,6 @@ static void free_unused_directories(uvm_page_tree_t *tree,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm_mmu_page_table_alloc_t *out)
|
||||
@ -811,10 +829,93 @@ static NV_STATUS allocate_page_table(uvm_page_tree_t *tree, NvU32 page_size, uvm
|
||||
return phys_mem_allocate(tree, alloc_size, tree->location, UVM_PMM_ALLOC_FLAGS_EVICT, out);
|
||||
}
|
||||
|
||||
static bool page_tree_ats_init_required(uvm_page_tree_t *tree)
|
||||
{
|
||||
// We have full control of the kernel page tables mappings, no ATS address
|
||||
// aliases is expected.
|
||||
if (tree->type == UVM_PAGE_TREE_TYPE_KERNEL)
|
||||
return false;
|
||||
|
||||
// Enable uvm_page_tree_init() from the page_tree test.
|
||||
if (uvm_enable_builtin_tests && tree->gpu_va_space == NULL)
|
||||
return false;
|
||||
|
||||
if (!tree->gpu_va_space->ats.enabled)
|
||||
return false;
|
||||
|
||||
return tree->gpu->parent->no_ats_range_required;
|
||||
}
|
||||
|
||||
static NV_STATUS page_tree_ats_init(uvm_page_tree_t *tree)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU64 min_va_upper, max_va_lower;
|
||||
NvU32 page_size;
|
||||
|
||||
if (!page_tree_ats_init_required(tree))
|
||||
return NV_OK;
|
||||
|
||||
page_size = uvm_mmu_biggest_page_size(tree);
|
||||
|
||||
uvm_cpu_get_unaddressable_range(&max_va_lower, &min_va_upper);
|
||||
|
||||
// Potential violation of the UVM internal get/put_ptes contract. get_ptes()
|
||||
// creates and initializes enough PTEs to populate all PDEs covering the
|
||||
// no_ats_ranges. We store the no_ats_ranges in the tree, so they can be
|
||||
// put_ptes()'ed on deinit(). It doesn't preclude the range to be used by a
|
||||
// future get_ptes(), since we don't write to the PTEs (range->table) from
|
||||
// the tree->no_ats_ranges.
|
||||
//
|
||||
// Lower half
|
||||
status = uvm_page_tree_get_ptes(tree,
|
||||
page_size,
|
||||
max_va_lower,
|
||||
page_size,
|
||||
UVM_PMM_ALLOC_FLAGS_EVICT,
|
||||
&tree->no_ats_ranges[0]);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
UVM_ASSERT(tree->no_ats_ranges[0].entry_count == 1);
|
||||
|
||||
if (uvm_platform_uses_canonical_form_address()) {
|
||||
// Upper half
|
||||
status = uvm_page_tree_get_ptes(tree,
|
||||
page_size,
|
||||
min_va_upper - page_size,
|
||||
page_size,
|
||||
UVM_PMM_ALLOC_FLAGS_EVICT,
|
||||
&tree->no_ats_ranges[1]);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
UVM_ASSERT(tree->no_ats_ranges[1].entry_count == 1);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static void page_tree_ats_deinit(uvm_page_tree_t *tree)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
if (page_tree_ats_init_required(tree)) {
|
||||
for (i = 0; i < ARRAY_SIZE(tree->no_ats_ranges); i++) {
|
||||
if (tree->no_ats_ranges[i].entry_count)
|
||||
uvm_page_tree_put_ptes(tree, &tree->no_ats_ranges[i]);
|
||||
}
|
||||
|
||||
memset(tree->no_ats_ranges, 0, sizeof(tree->no_ats_ranges));
|
||||
}
|
||||
}
|
||||
|
||||
static void map_remap_deinit(uvm_page_tree_t *tree)
|
||||
{
|
||||
if (tree->map_remap.pde0.size)
|
||||
phys_mem_deallocate(tree, &tree->map_remap.pde0);
|
||||
if (tree->map_remap.pde0) {
|
||||
phys_mem_deallocate(tree, &tree->map_remap.pde0->phys_alloc);
|
||||
uvm_kvfree(tree->map_remap.pde0);
|
||||
tree->map_remap.pde0 = NULL;
|
||||
}
|
||||
|
||||
if (tree->map_remap.ptes_invalid_4k.size)
|
||||
phys_mem_deallocate(tree, &tree->map_remap.ptes_invalid_4k);
|
||||
@ -839,10 +940,16 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
|
||||
// PDE1-depth(512M) PTE. We first map it to the pde0 directory, then we
|
||||
// return the PTE for the get_ptes()'s caller.
|
||||
if (tree->hal->page_sizes() & UVM_PAGE_SIZE_512M) {
|
||||
status = allocate_page_table(tree, UVM_PAGE_SIZE_2M, &tree->map_remap.pde0);
|
||||
if (status != NV_OK)
|
||||
tree->map_remap.pde0 = allocate_directory(tree,
|
||||
UVM_PAGE_SIZE_2M,
|
||||
tree->hal->page_table_depth(UVM_PAGE_SIZE_2M),
|
||||
UVM_PMM_ALLOC_FLAGS_EVICT);
|
||||
if (tree->map_remap.pde0 == NULL) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto error;
|
||||
}
|
||||
}
|
||||
|
||||
status = page_tree_begin_acquire(tree, &tree->tracker, &push, "map remap init");
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
@ -864,22 +971,23 @@ static NV_STATUS map_remap_init(uvm_page_tree_t *tree)
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
NvU32 depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_4K) - 1;
|
||||
size_t index_4k = tree->hal->entry_offset(depth, UVM_PAGE_SIZE_4K);
|
||||
|
||||
// pde0 depth equals UVM_PAGE_SIZE_2M.
|
||||
NvU32 pde0_depth = tree->hal->page_table_depth(UVM_PAGE_SIZE_2M);
|
||||
NvU32 pde0_entries = tree->map_remap.pde0.size / tree->hal->entry_size(pde0_depth);
|
||||
NvU32 pde0_entries = tree->map_remap.pde0->phys_alloc.size / tree->hal->entry_size(tree->map_remap.pde0->depth);
|
||||
|
||||
// The big-page entry is NULL which makes it an invalid entry.
|
||||
phys_allocs[index_4k] = &tree->map_remap.ptes_invalid_4k;
|
||||
|
||||
// By default CE operations include a MEMBAR_SYS. MEMBAR_GPU is
|
||||
// sufficient when pde0 is allocated in VIDMEM.
|
||||
if (tree->map_remap.pde0.addr.aperture == UVM_APERTURE_VID)
|
||||
if (tree->map_remap.pde0->phys_alloc.addr.aperture == UVM_APERTURE_VID)
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
|
||||
// This is an orphan directory, make_pde() requires a directory to
|
||||
// compute the VA. The UVM depth map_remap() operates on is not in the
|
||||
// range make_pde() must operate. We only need to supply the fields used
|
||||
// by make_pde() to not access invalid memory addresses.
|
||||
|
||||
pde_fill(tree,
|
||||
pde0_depth,
|
||||
&tree->map_remap.pde0,
|
||||
tree->map_remap.pde0,
|
||||
0,
|
||||
pde0_entries,
|
||||
(uvm_mmu_page_table_alloc_t **)&phys_allocs,
|
||||
@ -1006,11 +1114,22 @@ NV_STATUS uvm_page_tree_init(uvm_gpu_t *gpu,
|
||||
return status;
|
||||
|
||||
phys_mem_init(tree, UVM_PAGE_SIZE_AGNOSTIC, tree->root, &push);
|
||||
return page_tree_end_and_wait(tree, &push);
|
||||
|
||||
status = page_tree_end_and_wait(tree, &push);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = page_tree_ats_init(tree);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_page_tree_deinit(uvm_page_tree_t *tree)
|
||||
{
|
||||
page_tree_ats_deinit(tree);
|
||||
|
||||
UVM_ASSERT(tree->root->ref_count == 0);
|
||||
|
||||
// Take the tree lock only to avoid assertions. It is not required for
|
||||
@ -1249,7 +1368,6 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
|
||||
UVM_ASSERT(uvm_gpu_can_address_kernel(tree->gpu, start, size));
|
||||
|
||||
while (true) {
|
||||
|
||||
// index of the entry, for the first byte of the range, within its
|
||||
// containing directory
|
||||
NvU32 start_index;
|
||||
@ -1281,7 +1399,8 @@ static NV_STATUS try_get_ptes(uvm_page_tree_t *tree,
|
||||
if (dir_cache[dir->depth] == NULL) {
|
||||
*cur_depth = dir->depth;
|
||||
|
||||
// Undo the changes to the tree so that the dir cache remains private to the thread
|
||||
// Undo the changes to the tree so that the dir cache
|
||||
// remains private to the thread.
|
||||
for (i = 0; i < used_count; i++)
|
||||
host_pde_clear(tree, dirs_used[i]->host_parent, dirs_used[i]->index_in_parent, page_size);
|
||||
|
||||
@ -1332,10 +1451,9 @@ static NV_STATUS map_remap(uvm_page_tree_t *tree, NvU64 start, NvLength size, uv
|
||||
if (uvm_page_table_range_aperture(range) == UVM_APERTURE_VID)
|
||||
uvm_push_set_flag(&push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
|
||||
|
||||
phys_alloc[0] = &tree->map_remap.pde0;
|
||||
phys_alloc[0] = &tree->map_remap.pde0->phys_alloc;
|
||||
pde_fill(tree,
|
||||
range->table->depth,
|
||||
&range->table->phys_alloc,
|
||||
range->table,
|
||||
range->start_index,
|
||||
range->entry_count,
|
||||
(uvm_mmu_page_table_alloc_t **)&phys_alloc,
|
||||
@ -1380,7 +1498,8 @@ NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
dir_cache)) == NV_ERR_MORE_PROCESSING_REQUIRED) {
|
||||
uvm_mutex_unlock(&tree->lock);
|
||||
|
||||
// try_get_ptes never needs depth 0, so store a directory at its parent's depth
|
||||
// try_get_ptes never needs depth 0, so store a directory at its
|
||||
// parent's depth.
|
||||
// TODO: Bug 1766655: Allocate everything below cur_depth instead of
|
||||
// retrying for every level.
|
||||
dir_cache[cur_depth] = allocate_directory(tree, page_size, cur_depth + 1, pmm_flags);
|
||||
@ -1663,8 +1782,12 @@ NV_STATUS uvm_page_table_range_vec_init(uvm_page_tree_t *tree,
|
||||
range);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to get PTEs for subrange %zd [0x%llx, 0x%llx) size 0x%llx, part of [0x%llx, 0x%llx)\n",
|
||||
i, range_start, range_start + range_size, range_size,
|
||||
start, size);
|
||||
i,
|
||||
range_start,
|
||||
range_start + range_size,
|
||||
range_size,
|
||||
start,
|
||||
size);
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -215,11 +215,14 @@ struct uvm_mmu_mode_hal_struct
|
||||
// memory out-of-range error so we can immediately identify bad PTE usage.
|
||||
NvU64 (*poisoned_pte)(void);
|
||||
|
||||
// write a PDE bit-pattern to entry based on the data in entries (which may
|
||||
// Write a PDE bit-pattern to entry based on the data in allocs (which may
|
||||
// point to two items for dual PDEs).
|
||||
// any of allocs are allowed to be NULL, in which case they are to be
|
||||
// treated as empty.
|
||||
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, NvU32 depth);
|
||||
// Any of allocs are allowed to be NULL, in which case they are to be
|
||||
// treated as empty. make_pde() uses dir and child_index to compute the
|
||||
// mapping PDE VA. On ATS-enabled systems, we may set PDE's PCF as
|
||||
// ATS_ALLOWED or ATS_NOT_ALLOWED based on the mapping PDE VA, even for
|
||||
// invalid/clean PDE entries.
|
||||
void (*make_pde)(void *entry, uvm_mmu_page_table_alloc_t **allocs, uvm_page_directory_t *dir, NvU32 child_index);
|
||||
|
||||
// size of an entry in a directory/table. Generally either 8 or 16 bytes.
|
||||
// (in the case of Pascal dual PDEs)
|
||||
@ -229,7 +232,7 @@ struct uvm_mmu_mode_hal_struct
|
||||
NvU32 (*entries_per_index)(NvU32 depth);
|
||||
|
||||
// For dual PDEs, this is ether 1 or 0, depending on the page size.
|
||||
// This is used to index the host copy only. GPU PDEs are always entirely
|
||||
// This is used to index the host copy only. GPU PDEs are always entirely
|
||||
// re-written using make_pde.
|
||||
NvLength (*entry_offset)(NvU32 depth, NvU32 page_size);
|
||||
|
||||
@ -295,11 +298,16 @@ struct uvm_page_tree_struct
|
||||
|
||||
// PDE0 where all big-page entries are invalid, and small-page entries
|
||||
// point to ptes_invalid_4k.
|
||||
// pde0 is only used on Pascal-Ampere, i.e., they have the same PDE
|
||||
// format.
|
||||
uvm_mmu_page_table_alloc_t pde0;
|
||||
// pde0 is used on Pascal+ GPUs, i.e., they have the same PDE format.
|
||||
uvm_page_directory_t *pde0;
|
||||
} map_remap;
|
||||
|
||||
// On ATS-enabled systems where the CPU VA width is smaller than the GPU VA
|
||||
// width, the excess address range is set with ATS_NOT_ALLOWED on all leaf
|
||||
// PDEs covering that range. We have at most 2 no_ats_ranges, due to
|
||||
// canonical form address systems.
|
||||
uvm_page_table_range_t no_ats_ranges[2];
|
||||
|
||||
// Tracker for all GPU operations on the tree
|
||||
uvm_tracker_t tracker;
|
||||
};
|
||||
@ -365,21 +373,32 @@ void uvm_page_tree_deinit(uvm_page_tree_t *tree);
|
||||
// the same page size without an intervening put_ptes. To duplicate a subset of
|
||||
// an existing range or change the size of an existing range, use
|
||||
// uvm_page_table_range_get_upper() and/or uvm_page_table_range_shrink().
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
|
||||
NV_STATUS uvm_page_tree_get_ptes(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *range);
|
||||
|
||||
// Same as uvm_page_tree_get_ptes(), but doesn't synchronize the GPU work.
|
||||
//
|
||||
// All pending operations can be waited on with uvm_page_tree_wait().
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start, NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *range);
|
||||
NV_STATUS uvm_page_tree_get_ptes_async(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
NvLength size,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *range);
|
||||
|
||||
// Returns a single-entry page table range for the addresses passed.
|
||||
// The size parameter must be a page size supported by this tree.
|
||||
// This is equivalent to calling uvm_page_tree_get_ptes() with size equal to
|
||||
// page_size.
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree, NvU32 page_size, NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags, uvm_page_table_range_t *single);
|
||||
NV_STATUS uvm_page_tree_get_entry(uvm_page_tree_t *tree,
|
||||
NvU32 page_size,
|
||||
NvU64 start,
|
||||
uvm_pmm_alloc_flags_t pmm_flags,
|
||||
uvm_page_table_range_t *single);
|
||||
|
||||
// For a single-entry page table range, write the PDE (which could be a dual
|
||||
// PDE) to the GPU.
|
||||
@ -478,8 +497,8 @@ NV_STATUS uvm_page_table_range_vec_create(uvm_page_tree_t *tree,
|
||||
// new_range_vec will contain the upper portion of range_vec, starting at
|
||||
// new_end + 1.
|
||||
//
|
||||
// new_end + 1 is required to be within the address range of range_vec and be aligned to
|
||||
// range_vec's page_size.
|
||||
// new_end + 1 is required to be within the address range of range_vec and be
|
||||
// aligned to range_vec's page_size.
|
||||
//
|
||||
// On failure, the original range vector is left unmodified.
|
||||
NV_STATUS uvm_page_table_range_vec_split_upper(uvm_page_table_range_vec_t *range_vec,
|
||||
@ -501,18 +520,22 @@ void uvm_page_table_range_vec_destroy(uvm_page_table_range_vec_t *range_vec);
|
||||
// for each offset.
|
||||
// The caller_data pointer is what the caller passed in as caller_data to
|
||||
// uvm_page_table_range_vec_write_ptes().
|
||||
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec, NvU64 offset,
|
||||
void *caller_data);
|
||||
typedef NvU64 (*uvm_page_table_range_pte_maker_t)(uvm_page_table_range_vec_t *range_vec,
|
||||
NvU64 offset,
|
||||
void *caller_data);
|
||||
|
||||
// Write all PTEs covered by the range vector using the given PTE making function.
|
||||
// Write all PTEs covered by the range vector using the given PTE making
|
||||
// function.
|
||||
//
|
||||
// After writing all the PTEs a TLB invalidate operation is performed including
|
||||
// the passed in tlb_membar.
|
||||
//
|
||||
// See comments about uvm_page_table_range_pte_maker_t for details about the
|
||||
// PTE making callback.
|
||||
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec, uvm_membar_t tlb_membar,
|
||||
uvm_page_table_range_pte_maker_t pte_maker, void *caller_data);
|
||||
NV_STATUS uvm_page_table_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec,
|
||||
uvm_membar_t tlb_membar,
|
||||
uvm_page_table_range_pte_maker_t pte_maker,
|
||||
void *caller_data);
|
||||
|
||||
// Set all PTEs covered by the range vector to an empty PTE
|
||||
//
|
||||
@ -636,8 +659,9 @@ static NvU64 uvm_page_table_range_size(uvm_page_table_range_t *range)
|
||||
|
||||
// Get the physical address of the entry at entry_index within the range
|
||||
// (counted from range->start_index).
|
||||
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree, uvm_page_table_range_t *range,
|
||||
size_t entry_index)
|
||||
static uvm_gpu_phys_address_t uvm_page_table_range_entry_address(uvm_page_tree_t *tree,
|
||||
uvm_page_table_range_t *range,
|
||||
size_t entry_index)
|
||||
{
|
||||
NvU32 entry_size = uvm_mmu_pte_size(tree, range->page_size);
|
||||
uvm_gpu_phys_address_t entry = range->table->phys_alloc.addr;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -146,9 +146,15 @@ static void fake_tlb_invals_disable(void)
|
||||
g_fake_tlb_invals_tracking_enabled = false;
|
||||
}
|
||||
|
||||
// Fake TLB invalidate VA that just saves off the parameters so that they can be verified later
|
||||
static void fake_tlb_invalidate_va(uvm_push_t *push, uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth, NvU64 base, NvU64 size, NvU32 page_size, uvm_membar_t membar)
|
||||
// Fake TLB invalidate VA that just saves off the parameters so that they can be
|
||||
// verified later.
|
||||
static void fake_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_membar_t membar)
|
||||
{
|
||||
if (!g_fake_tlb_invals_tracking_enabled)
|
||||
return;
|
||||
@ -210,8 +216,8 @@ static bool assert_and_reset_last_invalidate(NvU32 expected_depth, bool expected
|
||||
}
|
||||
if ((g_last_fake_inval->membar == UVM_MEMBAR_NONE) == expected_membar) {
|
||||
UVM_TEST_PRINT("Expected %s membar, got %s instead\n",
|
||||
expected_membar ? "a" : "no",
|
||||
uvm_membar_string(g_last_fake_inval->membar));
|
||||
expected_membar ? "a" : "no",
|
||||
uvm_membar_string(g_last_fake_inval->membar));
|
||||
result = false;
|
||||
}
|
||||
|
||||
@ -230,7 +236,8 @@ static bool assert_last_invalidate_all(NvU32 expected_depth, bool expected_memba
|
||||
}
|
||||
if (g_last_fake_inval->base != 0 || g_last_fake_inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected invalidate all but got range [0x%llx, 0x%llx) instead\n",
|
||||
g_last_fake_inval->base, g_last_fake_inval->base + g_last_fake_inval->size);
|
||||
g_last_fake_inval->base,
|
||||
g_last_fake_inval->base + g_last_fake_inval->size);
|
||||
return false;
|
||||
}
|
||||
if (g_last_fake_inval->depth != expected_depth) {
|
||||
@ -247,15 +254,16 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
UVM_ASSERT(g_fake_tlb_invals_tracking_enabled);
|
||||
|
||||
if (g_fake_invals_count == 0) {
|
||||
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n",
|
||||
base, base + size);
|
||||
UVM_TEST_PRINT("Expected an invalidate for range [0x%llx, 0x%llx), but got none\n", base, base + size);
|
||||
return false;
|
||||
}
|
||||
|
||||
if ((inval->base != base || inval->size != size) && inval->base != 0 && inval->size != -1) {
|
||||
UVM_TEST_PRINT("Expected invalidate range [0x%llx, 0x%llx), but got range [0x%llx, 0x%llx) instead\n",
|
||||
base, base + size,
|
||||
inval->base, inval->base + inval->size);
|
||||
base,
|
||||
base + size,
|
||||
inval->base,
|
||||
inval->base + inval->size);
|
||||
return false;
|
||||
}
|
||||
if (inval->depth != expected_depth) {
|
||||
@ -270,7 +278,13 @@ static bool assert_invalidate_range_specific(fake_tlb_invalidate_t *inval,
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool assert_invalidate_range(NvU64 base, NvU64 size, NvU32 page_size, bool allow_inval_all, NvU32 range_depth, NvU32 all_depth, bool expected_membar)
|
||||
static bool assert_invalidate_range(NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
bool allow_inval_all,
|
||||
NvU32 range_depth,
|
||||
NvU32 all_depth,
|
||||
bool expected_membar)
|
||||
{
|
||||
NvU32 i;
|
||||
|
||||
@ -488,7 +502,6 @@ static NV_STATUS alloc_adjacent_pde_64k_memory(uvm_gpu_t *gpu)
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
||||
static NV_STATUS alloc_nearby_pde_64k_memory(uvm_gpu_t *gpu)
|
||||
{
|
||||
uvm_page_tree_t tree;
|
||||
@ -842,6 +855,7 @@ static NV_STATUS get_two_free_apart(uvm_gpu_t *gpu)
|
||||
TEST_CHECK_RET(range2.entry_count == 256);
|
||||
TEST_CHECK_RET(range2.table->ref_count == 512);
|
||||
TEST_CHECK_RET(range1.table == range2.table);
|
||||
|
||||
// 4k page is second entry in a dual PDE
|
||||
TEST_CHECK_RET(range1.table == tree.root->entries[0]->entries[0]->entries[0]->entries[1]);
|
||||
TEST_CHECK_RET(range1.start_index == 256);
|
||||
@ -871,6 +885,7 @@ static NV_STATUS get_overlapping_dual_pdes(uvm_gpu_t *gpu)
|
||||
MEM_NV_CHECK_RET(test_page_tree_get_ptes(&tree, UVM_PAGE_SIZE_64K, size, size, &range64k), NV_OK);
|
||||
TEST_CHECK_RET(range64k.entry_count == 16);
|
||||
TEST_CHECK_RET(range64k.table->ref_count == 16);
|
||||
|
||||
// 4k page is second entry in a dual PDE
|
||||
TEST_CHECK_RET(range64k.table == tree.root->entries[0]->entries[0]->entries[0]->entries[0]);
|
||||
TEST_CHECK_RET(range64k.start_index == 16);
|
||||
@ -1030,10 +1045,13 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
|
||||
|
||||
// Depth 4
|
||||
NvU64 extent_pte = UVM_PAGE_SIZE_2M;
|
||||
|
||||
// Depth 3
|
||||
NvU64 extent_pde0 = extent_pte * (1ull << 8);
|
||||
|
||||
// Depth 2
|
||||
NvU64 extent_pde1 = extent_pde0 * (1ull << 9);
|
||||
|
||||
// Depth 1
|
||||
NvU64 extent_pde2 = extent_pde1 * (1ull << 9);
|
||||
|
||||
@ -1081,7 +1099,11 @@ static NV_STATUS test_tlb_invalidates(uvm_gpu_t *gpu)
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree, NvU64 base, NvU64 size, NvU32 min_page_size, NvU32 max_page_size)
|
||||
static NV_STATUS test_tlb_batch_invalidates_case(uvm_page_tree_t *tree,
|
||||
NvU64 base,
|
||||
NvU64 size,
|
||||
NvU32 min_page_size,
|
||||
NvU32 max_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_push_t push;
|
||||
@ -1205,7 +1227,11 @@ static bool assert_range_vec_ptes(uvm_page_table_range_vec_t *range_vec, bool ex
|
||||
NvU64 expected_pte = expecting_cleared ? 0 : range_vec->size + offset;
|
||||
if (*pte != expected_pte) {
|
||||
UVM_TEST_PRINT("PTE is 0x%llx instead of 0x%llx for offset 0x%llx within range [0x%llx, 0x%llx)\n",
|
||||
*pte, expected_pte, offset, range_vec->start, range_vec->size);
|
||||
*pte,
|
||||
expected_pte,
|
||||
offset,
|
||||
range_vec->start,
|
||||
range_vec->size);
|
||||
return false;
|
||||
}
|
||||
offset += range_vec->page_size;
|
||||
@ -1226,7 +1252,11 @@ static NV_STATUS test_range_vec_write_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
TEST_CHECK_RET(data.status == NV_OK);
|
||||
TEST_CHECK_RET(data.count == range_vec->size / range_vec->page_size);
|
||||
TEST_CHECK_RET(assert_invalidate_range_specific(g_last_fake_inval,
|
||||
range_vec->start, range_vec->size, range_vec->page_size, page_table_depth, membar != UVM_MEMBAR_NONE));
|
||||
range_vec->start,
|
||||
range_vec->size,
|
||||
range_vec->page_size,
|
||||
page_table_depth,
|
||||
membar != UVM_MEMBAR_NONE));
|
||||
TEST_CHECK_RET(assert_range_vec_ptes(range_vec, false));
|
||||
|
||||
fake_tlb_invals_disable();
|
||||
@ -1249,7 +1279,11 @@ static NV_STATUS test_range_vec_clear_ptes(uvm_page_table_range_vec_t *range_vec
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree, NvU64 start, NvU64 size, NvU32 page_size, uvm_page_table_range_vec_t **range_vec_out)
|
||||
static NV_STATUS test_range_vec_create(uvm_page_tree_t *tree,
|
||||
NvU64 start,
|
||||
NvU64 size,
|
||||
NvU32 page_size,
|
||||
uvm_page_table_range_vec_t **range_vec_out)
|
||||
{
|
||||
uvm_page_table_range_vec_t *range_vec;
|
||||
uvm_pmm_alloc_flags_t pmm_flags = UVM_PMM_ALLOC_FLAGS_EVICT;
|
||||
@ -1544,25 +1578,28 @@ static NV_STATUS entry_test_maxwell(uvm_gpu_t *gpu)
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
uvm_mmu_mode_hal_t *hal;
|
||||
uvm_page_directory_t dir;
|
||||
NvU32 i, j, big_page_size, page_size;
|
||||
|
||||
dir.depth = 0;
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(big_page_sizes); i++) {
|
||||
big_page_size = big_page_sizes[i];
|
||||
hal = gpu->parent->arch_hal->mmu_mode_hal(big_page_size);
|
||||
|
||||
memset(phys_allocs, 0, sizeof(phys_allocs));
|
||||
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits == 0x0L);
|
||||
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits == 0x1BBBBBBD99999992LL);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(&pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(&pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits == 0x9999999E1BBBBBB1LL);
|
||||
|
||||
for (j = 0; j <= 2; j++) {
|
||||
@ -1632,38 +1669,47 @@ static NV_STATUS entry_test_pascal(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
uvm_page_directory_t dir;
|
||||
|
||||
// big versions have [11:8] set as well to test the page table merging
|
||||
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBBB00LL);
|
||||
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
dir.index_in_parent = 0;
|
||||
dir.host_parent = NULL;
|
||||
dir.depth = 0;
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
dir.depth = 3;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
dir.depth = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
|
||||
|
||||
// Dual PDEs
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
dir.depth = 3;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
|
||||
|
||||
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache. Clear
|
||||
@ -1719,6 +1765,7 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0x1BBBBBB000LL);
|
||||
uvm_page_directory_t dir;
|
||||
|
||||
// big versions have [11:8] set as well to test the page table merging
|
||||
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x399999999999900LL);
|
||||
@ -1726,37 +1773,45 @@ static NV_STATUS entry_test_volta(uvm_gpu_t *gpu, entry_test_page_size_func entr
|
||||
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
dir.index_in_parent = 0;
|
||||
dir.host_parent = NULL;
|
||||
dir.depth = 0;
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
dir.depth = 3;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
|
||||
// Sys and vidmem PDEs
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
dir.depth = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999990C);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB0A);
|
||||
|
||||
// Dual PDEs
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
dir.depth = 3;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x3999999999999C && pde_bits[1] == 0x1BBBBBB0A);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 3);
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBBBA && pde_bits[1] == 0x3999999999990C);
|
||||
|
||||
// NO_ATS PDE1 (depth 2)
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 2);
|
||||
dir.depth = 2;
|
||||
hal->make_pde(pde_bits, phys_allocs, &dir, 0);
|
||||
if (g_uvm_global.ats.enabled)
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x1BBBBBB2A);
|
||||
else
|
||||
@ -1791,104 +1846,203 @@ static NV_STATUS entry_test_ampere(uvm_gpu_t *gpu, entry_test_page_size_func ent
|
||||
|
||||
static NV_STATUS entry_test_hopper(uvm_gpu_t *gpu, entry_test_page_size_func entry_test_page_size)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 page_sizes[MAX_NUM_PAGE_SIZES];
|
||||
NvU64 pde_bits[2];
|
||||
uvm_page_directory_t *dirs[5];
|
||||
size_t i, num_page_sizes;
|
||||
uvm_mmu_page_table_alloc_t *phys_allocs[2] = {NULL, NULL};
|
||||
uvm_mmu_page_table_alloc_t alloc_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999000LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBB000LL);
|
||||
|
||||
// big versions have [11:8] set as well to test the page table merging
|
||||
// Big versions have [11:8] set as well to test the page table merging
|
||||
uvm_mmu_page_table_alloc_t alloc_big_sys = fake_table_alloc(UVM_APERTURE_SYS, 0x9999999999900LL);
|
||||
uvm_mmu_page_table_alloc_t alloc_big_vid = fake_table_alloc(UVM_APERTURE_VID, 0xBBBBBBBB00LL);
|
||||
|
||||
uvm_mmu_mode_hal_t *hal = gpu->parent->arch_hal->mmu_mode_hal(UVM_PAGE_SIZE_64K);
|
||||
|
||||
// Make sure cleared PDEs work as expected
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0);
|
||||
memset(dirs, 0, sizeof(dirs));
|
||||
// Fake directory tree.
|
||||
for (i = 0; i < ARRAY_SIZE(dirs); i++) {
|
||||
dirs[i] = uvm_kvmalloc_zero(sizeof(uvm_page_directory_t) + sizeof(dirs[i]->entries[0]) * 512);
|
||||
TEST_CHECK_GOTO(dirs[i] != NULL, cleanup);
|
||||
|
||||
dirs[i]->depth = i;
|
||||
dirs[i]->index_in_parent = 0;
|
||||
|
||||
if (i == 0)
|
||||
dirs[i]->host_parent = NULL;
|
||||
else
|
||||
dirs[i]->host_parent = dirs[i - 1];
|
||||
}
|
||||
|
||||
// Make sure cleared PDEs work as expected.
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0, cleanup);
|
||||
|
||||
// Cleared PDEs work as expected for big and small PDEs.
|
||||
memset(pde_bits, 0xFF, sizeof(pde_bits));
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0 && pde_bits[1] == 0);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0 && pde_bits[1] == 0, cleanup);
|
||||
|
||||
// Sys and vidmem PDEs, uncached ATS allowed.
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x999999999900C);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
|
||||
|
||||
phys_allocs[0] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 0);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBB00A);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBB00A, cleanup);
|
||||
|
||||
// Dual PDEs, uncached.
|
||||
// Dual PDEs, uncached. We don't use child_dir in the depth 4 checks because
|
||||
// our policy decides the PDE's PCF without using it.
|
||||
phys_allocs[0] = &alloc_big_sys;
|
||||
phys_allocs[1] = &alloc_vid;
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
|
||||
if (g_uvm_global.ats.enabled)
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999991C && pde_bits[1] == 0xBBBBBBB01A, cleanup);
|
||||
else
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999990C && pde_bits[1] == 0xBBBBBBB00A, cleanup);
|
||||
|
||||
phys_allocs[0] = &alloc_big_vid;
|
||||
phys_allocs[1] = &alloc_sys;
|
||||
hal->make_pde(pde_bits, phys_allocs, 4);
|
||||
TEST_CHECK_RET(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C);
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[4], 0);
|
||||
if (g_uvm_global.ats.enabled)
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBBB1A && pde_bits[1] == 0x999999999901C, cleanup);
|
||||
else
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0xBBBBBBBB0A && pde_bits[1] == 0x999999999900C, cleanup);
|
||||
|
||||
// We only need to test make_pde() on ATS when the CPU VA width < GPU's.
|
||||
if (g_uvm_global.ats.enabled && uvm_cpu_num_va_bits() < hal->num_va_bits()) {
|
||||
phys_allocs[0] = &alloc_sys;
|
||||
|
||||
dirs[1]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 1;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 1);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 2;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 2);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 511;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 511);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[1]->index_in_parent = 1;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 1);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999900C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 509;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 509);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 510;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 510);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x999999999901C, cleanup);
|
||||
|
||||
phys_allocs[0] = NULL;
|
||||
|
||||
dirs[1]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[0], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 0;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 0);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 2;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 2);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x10, cleanup);
|
||||
|
||||
dirs[1]->index_in_parent = 1;
|
||||
dirs[2]->index_in_parent = 509;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 509);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x10, cleanup);
|
||||
|
||||
dirs[2]->index_in_parent = 510;
|
||||
hal->make_pde(pde_bits, phys_allocs, dirs[1], 510);
|
||||
TEST_CHECK_GOTO(pde_bits[0] == 0x0, cleanup);
|
||||
}
|
||||
|
||||
// uncached, i.e., the sysmem data is not cached in GPU's L2 cache, and
|
||||
// access counters disabled.
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) == 0x999999999968D);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) == 0x999999999968D,
|
||||
cleanup);
|
||||
|
||||
// change to cached.
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_CACHED | UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) ==
|
||||
0x9999999999685);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_CACHED | UVM_MMU_PTE_FLAGS_ACCESS_COUNTERS_DISABLED) ==
|
||||
0x9999999999685,
|
||||
cleanup);
|
||||
|
||||
// enable access counters.
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999605);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE_ATOMIC,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999605,
|
||||
cleanup);
|
||||
|
||||
// remove atomic
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999645);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_WRITE,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999645,
|
||||
cleanup);
|
||||
|
||||
// read only
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999665);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_SYS,
|
||||
0x9999999999000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x9999999999665,
|
||||
cleanup);
|
||||
|
||||
// local video
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_VID,
|
||||
0xBBBBBBB000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0xBBBBBBB661);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_VID,
|
||||
0xBBBBBBB000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0xBBBBBBB661,
|
||||
cleanup);
|
||||
|
||||
// peer 1
|
||||
TEST_CHECK_RET(hal->make_pte(UVM_APERTURE_PEER_1,
|
||||
0xBBBBBBB000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x200000BBBBBBB663);
|
||||
TEST_CHECK_GOTO(hal->make_pte(UVM_APERTURE_PEER_1,
|
||||
0xBBBBBBB000LL,
|
||||
UVM_PROT_READ_ONLY,
|
||||
UVM_MMU_PTE_FLAGS_CACHED) == 0x200000BBBBBBB663,
|
||||
cleanup);
|
||||
|
||||
// sparse
|
||||
TEST_CHECK_RET(hal->make_sparse_pte() == 0x8);
|
||||
TEST_CHECK_GOTO(hal->make_sparse_pte() == 0x8, cleanup);
|
||||
|
||||
// sked reflected
|
||||
TEST_CHECK_RET(hal->make_sked_reflected_pte() == 0xF09);
|
||||
TEST_CHECK_GOTO(hal->make_sked_reflected_pte() == 0xF09, cleanup);
|
||||
|
||||
num_page_sizes = get_page_sizes(gpu, page_sizes);
|
||||
|
||||
for (i = 0; i < num_page_sizes; i++)
|
||||
TEST_NV_CHECK_RET(entry_test_page_size(gpu, page_sizes[i]));
|
||||
TEST_NV_CHECK_GOTO(entry_test_page_size(gpu, page_sizes[i]), cleanup);
|
||||
|
||||
return NV_OK;
|
||||
cleanup:
|
||||
for (i = 0; i < ARRAY_SIZE(dirs); i++)
|
||||
uvm_kvfree(dirs[i]);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS alloc_4k_maxwell(uvm_gpu_t *gpu)
|
||||
@ -2303,7 +2457,8 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
|
||||
gpu->parent = parent_gpu;
|
||||
|
||||
// At least test_tlb_invalidates() relies on global state
|
||||
// (g_tlb_invalidate_*) so make sure only one test instance can run at a time.
|
||||
// (g_tlb_invalidate_*) so make sure only one test instance can run at a
|
||||
// time.
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
|
||||
// Allocate the fake TLB tracking state. Notably tests still need to enable
|
||||
@ -2311,7 +2466,13 @@ NV_STATUS uvm_test_page_tree(UVM_TEST_PAGE_TREE_PARAMS *params, struct file *fil
|
||||
// calls.
|
||||
TEST_NV_CHECK_GOTO(fake_tlb_invals_alloc(), done);
|
||||
|
||||
TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
|
||||
// We prevent the maxwell_test_page_tree test from running on ATS-enabled
|
||||
// systems. On "fake" Maxwell-based ATS systems pde_fill() may push more
|
||||
// methods than what we support in UVM. Specifically, on
|
||||
// uvm_page_tree_init() which eventually calls phys_mem_init(). On Maxwell,
|
||||
// upper PDE levels have more than 512 entries.
|
||||
if (!g_uvm_global.ats.enabled)
|
||||
TEST_NV_CHECK_GOTO(maxwell_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(pascal_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(volta_test_page_tree(gpu), done);
|
||||
TEST_NV_CHECK_GOTO(ampere_test_page_tree(gpu), done);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2020 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -100,4 +100,6 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->smc.supported = false;
|
||||
|
||||
parent_gpu->plc_supported = false;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2020 NVIDIA Corporation
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -140,11 +140,18 @@ static NvU64 small_half_pde_pascal(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_pascal(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
static void make_pde_pascal(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
uvm_page_directory_t *dir,
|
||||
NvU32 child_index)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_pascal(depth);
|
||||
NvU32 entry_count;
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
||||
UVM_ASSERT(dir);
|
||||
|
||||
entry_count = entries_per_index_pascal(dir->depth);
|
||||
|
||||
if (entry_count == 1) {
|
||||
*entry_bits = single_pde_pascal(*phys_allocs);
|
||||
}
|
||||
@ -152,7 +159,8 @@ static void make_pde_pascal(void *entry, uvm_mmu_page_table_alloc_t **phys_alloc
|
||||
entry_bits[MMU_BIG] = big_half_pde_pascal(phys_allocs[MMU_BIG]);
|
||||
entry_bits[MMU_SMALL] = small_half_pde_pascal(phys_allocs[MMU_SMALL]);
|
||||
|
||||
// This entry applies to the whole dual PDE but is stored in the lower bits
|
||||
// This entry applies to the whole dual PDE but is stored in the lower
|
||||
// bits.
|
||||
entry_bits[MMU_BIG] |= HWCONST64(_MMU_VER2, DUAL_PDE, IS_PDE, TRUE);
|
||||
}
|
||||
else {
|
||||
|
@ -36,6 +36,7 @@
|
||||
#include "uvm_mmu.h"
|
||||
#include "uvm_gpu_access_counters.h"
|
||||
#include "uvm_pmm_sysmem.h"
|
||||
#include "uvm_migrate_pageable.h"
|
||||
|
||||
static NV_STATUS uvm_test_get_gpu_ref_count(UVM_TEST_GET_GPU_REF_COUNT_PARAMS *params, struct file *filp)
|
||||
{
|
||||
@ -331,6 +332,7 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
UVM_ROUTE_CMD_STACK_NO_INIT_CHECK(UVM_TEST_CGROUP_ACCOUNTING_SUPPORTED, uvm_test_cgroup_accounting_supported);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SPLIT_INVALIDATE_DELAY, uvm_test_split_invalidate_delay);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_CPU_CHUNK_API, uvm_test_cpu_chunk_api);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SKIP_MIGRATE_VMA, uvm_test_skip_migrate_vma);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
|
@ -28,6 +28,13 @@
|
||||
#include "uvm_ioctl.h"
|
||||
#include "nv_uvm_types.h"
|
||||
|
||||
#define UVM_TEST_SKIP_MIGRATE_VMA UVM_TEST_IOCTL_BASE(103)
|
||||
typedef struct
|
||||
{
|
||||
NvBool skip; // In
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_SKIP_MIGRATE_VMA_PARAMS;
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
@ -1082,25 +1082,19 @@ void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
|
||||
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu,
|
||||
NvU32 batch_id,
|
||||
uvm_fault_client_type_t client_type)
|
||||
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type)
|
||||
{
|
||||
UVM_ASSERT(!gpu->parent->has_clear_faulted_channel_method);
|
||||
|
||||
if (!tools_is_event_enabled_in_any_va_space(UvmEventTypeGpuFaultReplay))
|
||||
return;
|
||||
|
||||
record_replay_event_helper(gpu->id,
|
||||
batch_id,
|
||||
client_type,
|
||||
NV_GETTIME(),
|
||||
gpu->parent->host_hal->get_time(gpu));
|
||||
record_replay_event_helper(gpu->id, batch_id, client_type, NV_GETTIME(), gpu->parent->host_hal->get_time(gpu));
|
||||
}
|
||||
|
||||
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry,
|
||||
bool on_managed)
|
||||
bool on_managed_phys)
|
||||
{
|
||||
UvmEventEntry entry;
|
||||
UvmEventTestAccessCounterInfo *info = &entry.testEventData.accessCounter;
|
||||
@ -1119,6 +1113,7 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
|
||||
info->srcIndex = uvm_id_value(gpu->id);
|
||||
info->address = buffer_entry->address.address;
|
||||
info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
|
||||
|
||||
if (buffer_entry->address.is_virtual) {
|
||||
info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
|
||||
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
|
||||
@ -1126,9 +1121,10 @@ void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
|
||||
}
|
||||
else {
|
||||
info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
|
||||
info->physOnManaged = on_managed_phys? 1 : 0;
|
||||
}
|
||||
|
||||
info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
|
||||
info->onManaged = on_managed? 1 : 0;
|
||||
info->value = buffer_entry->counter_value;
|
||||
info->subGranularity = buffer_entry->sub_granularity;
|
||||
info->bank = buffer_entry->bank;
|
||||
|
@ -102,18 +102,13 @@ void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
|
||||
uvm_va_block_region_t region,
|
||||
const uvm_page_mask_t *page_mask);
|
||||
|
||||
void uvm_tools_broadcast_replay(uvm_gpu_t *gpu,
|
||||
uvm_push_t *push,
|
||||
NvU32 batch_id,
|
||||
uvm_fault_client_type_t client_type);
|
||||
void uvm_tools_broadcast_replay(uvm_gpu_t *gpu, uvm_push_t *push, NvU32 batch_id, uvm_fault_client_type_t client_type);
|
||||
|
||||
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu,
|
||||
NvU32 batch_id,
|
||||
uvm_fault_client_type_t client_type);
|
||||
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type);
|
||||
|
||||
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry,
|
||||
bool on_managed);
|
||||
bool on_managed_phys);
|
||||
|
||||
void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -93,4 +93,6 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->smc.supported = false;
|
||||
|
||||
parent_gpu->plc_supported = true;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
}
|
||||
|
@ -967,8 +967,10 @@ typedef struct
|
||||
NvU8 isFromCpu;
|
||||
|
||||
NvU8 veId;
|
||||
NvU8 onManaged; // The access counter notification was triggered on
|
||||
// a managed memory region
|
||||
|
||||
// The physical access counter notification was triggered on a managed
|
||||
// memory region. This is not set for virtual access counter notifications.
|
||||
NvU8 physOnManaged;
|
||||
|
||||
NvU32 value;
|
||||
NvU32 subGranularity;
|
||||
|
@ -1760,6 +1760,21 @@ static NvU32 block_phys_page_size(uvm_va_block_t *block, block_phys_page_t page)
|
||||
return (NvU32)chunk_size;
|
||||
}
|
||||
|
||||
NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
|
||||
uvm_processor_id_t processor,
|
||||
uvm_page_index_t page_index)
|
||||
{
|
||||
block_phys_page_t page;
|
||||
|
||||
UVM_ASSERT(block);
|
||||
|
||||
uvm_assert_mutex_locked(&block->lock);
|
||||
|
||||
page = block_phys_page(processor, page_index);
|
||||
|
||||
return block_phys_page_size(block, page);
|
||||
}
|
||||
|
||||
static uvm_pte_bits_cpu_t get_cpu_pte_bit_index(uvm_prot_t prot)
|
||||
{
|
||||
uvm_pte_bits_cpu_t pte_bit_index = UVM_PTE_BITS_CPU_MAX;
|
||||
@ -8248,14 +8263,6 @@ void uvm_va_block_munmap_region(uvm_va_block_t *va_block,
|
||||
event_data.block_munmap.region = region;
|
||||
uvm_perf_event_notify(&va_space->perf_events, UVM_PERF_EVENT_BLOCK_MUNMAP, &event_data);
|
||||
|
||||
// Set a flag so that GPU fault events are flushed since they might refer
|
||||
// to the region being unmapped.
|
||||
// Note that holding the va_block lock prevents GPU VA spaces from
|
||||
// being removed so the registered_gpu_va_spaces mask is stable.
|
||||
for_each_gpu_id_in_mask(gpu_id, &va_space->registered_gpu_va_spaces) {
|
||||
uvm_processor_mask_set_atomic(&va_space->needs_fault_buffer_flush, gpu_id);
|
||||
}
|
||||
|
||||
// Release any remaining vidmem chunks in the given region.
|
||||
for_each_gpu_id(gpu_id) {
|
||||
uvm_va_block_gpu_state_t *gpu_state = uvm_va_block_gpu_state_get(va_block, gpu_id);
|
||||
@ -10155,6 +10162,34 @@ static uvm_processor_id_t block_select_residency(uvm_va_block_t *va_block,
|
||||
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(preferred_location)], processor_id))
|
||||
return preferred_location;
|
||||
|
||||
// Check if we should map the closest resident processor remotely on remote CPU fault
|
||||
//
|
||||
// When faulting on CPU, there's a linux process on behalf of it, which is associated
|
||||
// with a unique VM pointed by current->mm. A block of memory residing on GPU is also
|
||||
// associated with VM, pointed by va_block_context->mm. If they match, it's a regular
|
||||
// (local) fault, and we may want to migrate a page from GPU to CPU.
|
||||
// If it's a 'remote' fault, i.e. linux process differs from one associated with block
|
||||
// VM, we might preserve residence.
|
||||
//
|
||||
// Establishing a remote fault without access counters means the memory could stay in
|
||||
// the wrong spot for a long time, which is why we prefer to avoid creating remote
|
||||
// mappings. However when NIC accesses a memory residing on GPU, it's worth to keep it
|
||||
// in place for NIC accesses.
|
||||
//
|
||||
// The logic that's used to detect remote faulting also keeps memory in place for
|
||||
// ptrace accesses. We would prefer to control those policies separately, but the
|
||||
// NIC case takes priority.
|
||||
// If the accessing processor is CPU, we're either handling a fault
|
||||
// from other than owning process, or we're handling an MOMC
|
||||
// notification. Only prevent migration for the former.
|
||||
if (UVM_ID_IS_CPU(processor_id) &&
|
||||
operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS &&
|
||||
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(closest_resident_processor)], processor_id) &&
|
||||
va_block_context->mm != current->mm) {
|
||||
UVM_ASSERT(va_block_context->mm != NULL);
|
||||
return closest_resident_processor;
|
||||
}
|
||||
|
||||
// If the page is resident on a processor other than the preferred location,
|
||||
// or the faulting processor can't access the preferred location, we select
|
||||
// the faulting processor as the new residency.
|
||||
@ -10713,7 +10748,7 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_fault_type_t access_type,
|
||||
uvm_fault_access_type_t access_type,
|
||||
bool allow_migration)
|
||||
{
|
||||
uvm_va_range_t *va_range = va_block->va_range;
|
||||
|
@ -1000,7 +1000,7 @@ NV_STATUS uvm_va_block_check_logical_permissions(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_processor_id_t processor_id,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_fault_type_t access_type,
|
||||
uvm_fault_access_type_t access_type,
|
||||
bool allow_migration);
|
||||
|
||||
// API for access privilege revocation
|
||||
@ -2072,6 +2072,14 @@ void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_va_block_remove_cpu_chunks(uvm_va_block_t *va_block, uvm_va_block_region_t region);
|
||||
|
||||
// Get the size of the physical allocation backing the page at page_index on the
|
||||
// specified processor in the block. Returns 0 if the address is not resident on
|
||||
// the specified processor.
|
||||
// Locking: The va_block lock must be held.
|
||||
NvU32 uvm_va_block_get_physical_size(uvm_va_block_t *block,
|
||||
uvm_processor_id_t processor,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Get CPU page size or 0 if it is not mapped
|
||||
NvU32 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
|
@ -193,7 +193,8 @@ uvm_va_policy_node_t *uvm_va_policy_node_iter_next(uvm_va_block_t *va_block, uvm
|
||||
for ((node) = uvm_va_policy_node_iter_first((va_block), (start), (end)), \
|
||||
(next) = uvm_va_policy_node_iter_next((va_block), (node), (end)); \
|
||||
(node); \
|
||||
(node) = (next))
|
||||
(node) = (next), \
|
||||
(next) = uvm_va_policy_node_iter_next((va_block), (node), (end)))
|
||||
|
||||
// Returns the first policy in the range [start, end], if any.
|
||||
// Locking: The va_block lock must be held.
|
||||
|
@ -1540,7 +1540,6 @@ static void remove_gpu_va_space(uvm_gpu_va_space_t *gpu_va_space,
|
||||
atomic_inc(&va_space->gpu_va_space_deferred_free.num_pending);
|
||||
|
||||
uvm_processor_mask_clear(&va_space->registered_gpu_va_spaces, gpu_va_space->gpu->id);
|
||||
uvm_processor_mask_clear_atomic(&va_space->needs_fault_buffer_flush, gpu_va_space->gpu->id);
|
||||
va_space->gpu_va_spaces[uvm_id_gpu_index(gpu_va_space->gpu->id)] = NULL;
|
||||
gpu_va_space->state = UVM_GPU_VA_SPACE_STATE_DEAD;
|
||||
}
|
||||
|
@ -253,17 +253,6 @@ struct uvm_va_space_struct
|
||||
// corrupting state.
|
||||
uvm_processor_mask_t gpu_unregister_in_progress;
|
||||
|
||||
// On VMA destruction, the fault buffer needs to be flushed for all the GPUs
|
||||
// registered in the VA space to avoid leaving stale entries of the VA range
|
||||
// that is going to be destroyed. Otherwise, these fault entries can be
|
||||
// attributed to new VA ranges reallocated at the same addresses. However,
|
||||
// uvm_vm_close is called with mm->mmap_lock taken and we cannot take the
|
||||
// ISR lock. Therefore, we use a flag to notify the GPU fault handler that
|
||||
// the fault buffer needs to be flushed, before servicing the faults that
|
||||
// belong to the va_space. The bits are set and cleared atomically so no
|
||||
// va_space lock is required.
|
||||
uvm_processor_mask_t needs_fault_buffer_flush;
|
||||
|
||||
// Mask of processors that are participating in system-wide atomics
|
||||
uvm_processor_mask_t system_wide_atomics_enabled_processors;
|
||||
|
||||
@ -353,6 +342,7 @@ struct uvm_va_space_struct
|
||||
struct
|
||||
{
|
||||
bool page_prefetch_enabled;
|
||||
bool skip_migrate_vma;
|
||||
|
||||
atomic_t migrate_vma_allocation_fail_nth;
|
||||
|
||||
|
@ -215,7 +215,13 @@ bool uvm_va_space_mm_enabled(uvm_va_space_t *va_space)
|
||||
|
||||
static struct mmu_notifier_ops uvm_mmu_notifier_ops_ats =
|
||||
{
|
||||
#if defined(NV_MMU_NOTIFIER_OPS_HAS_INVALIDATE_RANGE)
|
||||
.invalidate_range = uvm_mmu_notifier_invalidate_range_ats,
|
||||
#elif defined(NV_MMU_NOTIFIER_OPS_HAS_ARCH_INVALIDATE_SECONDARY_TLBS)
|
||||
.arch_invalidate_secondary_tlbs = uvm_mmu_notifier_invalidate_range_ats,
|
||||
#else
|
||||
#error One of invalidate_range/arch_invalid_secondary must be present
|
||||
#endif
|
||||
};
|
||||
|
||||
static int uvm_mmu_notifier_register(uvm_va_space_mm_t *va_space_mm)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -98,4 +98,6 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
parent_gpu->smc.supported = false;
|
||||
|
||||
parent_gpu->plc_supported = false;
|
||||
|
||||
parent_gpu->no_ats_range_required = false;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2021 NVIDIA Corporation
|
||||
Copyright (c) 2017-2023 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -145,13 +145,20 @@ static NvU64 small_half_pde_volta(uvm_mmu_page_table_alloc_t *phys_alloc)
|
||||
return pde_bits;
|
||||
}
|
||||
|
||||
static void make_pde_volta(void *entry, uvm_mmu_page_table_alloc_t **phys_allocs, NvU32 depth)
|
||||
static void make_pde_volta(void *entry,
|
||||
uvm_mmu_page_table_alloc_t **phys_allocs,
|
||||
uvm_page_directory_t *dir,
|
||||
NvU32 child_index)
|
||||
{
|
||||
NvU32 entry_count = entries_per_index_volta(depth);
|
||||
NvU32 entry_count;
|
||||
NvU64 *entry_bits = (NvU64 *)entry;
|
||||
|
||||
UVM_ASSERT(dir);
|
||||
|
||||
entry_count = entries_per_index_volta(dir->depth);
|
||||
|
||||
if (entry_count == 1) {
|
||||
*entry_bits = single_pde_volta(*phys_allocs, depth);
|
||||
*entry_bits = single_pde_volta(*phys_allocs, dir->depth);
|
||||
}
|
||||
else if (entry_count == 2) {
|
||||
entry_bits[MMU_BIG] = big_half_pde_volta(phys_allocs[MMU_BIG]);
|
||||
|
@ -23,10 +23,16 @@
|
||||
|
||||
#include "internal_crypt_lib.h"
|
||||
|
||||
#ifdef USE_LKCA
|
||||
#ifndef NV_CRYPTO_TFM_CTX_ALIGNED_PRESENT
|
||||
#include <crypto/internal/hash.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
void *lkca_hash_new(const char* alg_name)
|
||||
{
|
||||
#ifndef USE_LKCA
|
||||
return false;
|
||||
return NULL;
|
||||
#else
|
||||
//XXX: can we reuse crypto_shash part and just allocate desc
|
||||
struct crypto_shash *alg;
|
||||
@ -87,9 +93,24 @@ bool lkca_hmac_duplicate(struct shash_desc *dst, struct shash_desc const *src)
|
||||
|
||||
struct crypto_shash *src_tfm = src->tfm;
|
||||
struct crypto_shash *dst_tfm = dst->tfm;
|
||||
int ss = crypto_shash_statesize(dst_tfm);
|
||||
|
||||
#ifdef NV_CRYPTO_TFM_CTX_ALIGNED_PRESENT
|
||||
char *src_ipad = crypto_tfm_ctx_aligned(&src_tfm->base);
|
||||
char *dst_ipad = crypto_tfm_ctx_aligned(&dst_tfm->base);
|
||||
int ss = crypto_shash_statesize(dst_tfm);
|
||||
#else
|
||||
int ctx_size = crypto_shash_alg(dst_tfm)->base.cra_ctxsize;
|
||||
char *src_ipad = crypto_shash_ctx(src_tfm);
|
||||
char *dst_ipad = crypto_shash_ctx(dst_tfm);
|
||||
/*
|
||||
* Actual struct definition is hidden, so I assume data we need is at
|
||||
* the end. In 6.0 the struct has a pointer to crpyto_shash followed by:
|
||||
* 'u8 ipad[statesize];', then 'u8 opad[statesize];'
|
||||
*/
|
||||
src_ipad += ctx_size - 2 * ss;
|
||||
dst_ipad += ctx_size - 2 * ss;
|
||||
#endif
|
||||
|
||||
memcpy(dst_ipad, src_ipad, crypto_shash_blocksize(src->tfm));
|
||||
memcpy(dst_ipad + ss, src_ipad + ss, crypto_shash_blocksize(src->tfm));
|
||||
crypto_shash_clear_flags(dst->tfm, CRYPTO_TFM_NEED_KEY);
|
||||
|
@ -156,7 +156,7 @@ NvS32 NV_API_CALL nv_request_msix_irq(nv_linux_state_t *nvl)
|
||||
{
|
||||
for( j = 0; j < i; j++)
|
||||
{
|
||||
free_irq(nvl->msix_entries[i].vector, (void *)nvl);
|
||||
free_irq(nvl->msix_entries[j].vector, (void *)nvl);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -316,14 +316,14 @@ int nvidia_p2p_init_mapping(
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_init_mapping);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_init_mapping);
|
||||
|
||||
int nvidia_p2p_destroy_mapping(uint64_t p2p_token)
|
||||
{
|
||||
return -ENOTSUPP;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_destroy_mapping);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_destroy_mapping);
|
||||
|
||||
static void nv_p2p_mem_info_free_callback(void *data)
|
||||
{
|
||||
@ -506,8 +506,13 @@ static int nv_p2p_get_pages(
|
||||
(*page_table)->page_size = page_size_index;
|
||||
|
||||
os_free_mem(physical_addresses);
|
||||
physical_addresses = NULL;
|
||||
|
||||
os_free_mem(wreqmb_h);
|
||||
wreqmb_h = NULL;
|
||||
|
||||
os_free_mem(rreqmb_h);
|
||||
rreqmb_h = NULL;
|
||||
|
||||
if (free_callback != NULL)
|
||||
{
|
||||
@ -582,7 +587,7 @@ int nvidia_p2p_get_pages(
|
||||
p2p_token, va_space, virtual_address,
|
||||
length, page_table, free_callback, data);
|
||||
}
|
||||
EXPORT_SYMBOL(nvidia_p2p_get_pages);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_get_pages);
|
||||
|
||||
int nvidia_p2p_get_pages_persistent(
|
||||
uint64_t virtual_address,
|
||||
@ -600,7 +605,7 @@ int nvidia_p2p_get_pages_persistent(
|
||||
virtual_address, length, page_table,
|
||||
NULL, NULL);
|
||||
}
|
||||
EXPORT_SYMBOL(nvidia_p2p_get_pages_persistent);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_get_pages_persistent);
|
||||
|
||||
/*
|
||||
* This function is a no-op, but is left in place (for now), in order to allow
|
||||
@ -613,7 +618,7 @@ int nvidia_p2p_free_page_table(struct nvidia_p2p_page_table *page_table)
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_free_page_table);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_free_page_table);
|
||||
|
||||
int nvidia_p2p_put_pages(
|
||||
uint64_t p2p_token,
|
||||
@ -645,7 +650,7 @@ int nvidia_p2p_put_pages(
|
||||
|
||||
return nvidia_p2p_map_status(status);
|
||||
}
|
||||
EXPORT_SYMBOL(nvidia_p2p_put_pages);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_put_pages);
|
||||
|
||||
int nvidia_p2p_put_pages_persistent(
|
||||
uint64_t virtual_address,
|
||||
@ -685,7 +690,7 @@ int nvidia_p2p_put_pages_persistent(
|
||||
|
||||
return nvidia_p2p_map_status(status);
|
||||
}
|
||||
EXPORT_SYMBOL(nvidia_p2p_put_pages_persistent);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_put_pages_persistent);
|
||||
|
||||
int nvidia_p2p_dma_map_pages(
|
||||
struct pci_dev *peer,
|
||||
@ -800,7 +805,7 @@ failed:
|
||||
return nvidia_p2p_map_status(status);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_dma_map_pages);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_dma_map_pages);
|
||||
|
||||
int nvidia_p2p_dma_unmap_pages(
|
||||
struct pci_dev *peer,
|
||||
@ -840,7 +845,7 @@ int nvidia_p2p_dma_unmap_pages(
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_dma_unmap_pages);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_dma_unmap_pages);
|
||||
|
||||
/*
|
||||
* This function is a no-op, but is left in place (for now), in order to allow
|
||||
@ -855,7 +860,7 @@ int nvidia_p2p_free_dma_mapping(
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_free_dma_mapping);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_free_dma_mapping);
|
||||
|
||||
int nvidia_p2p_register_rsync_driver(
|
||||
nvidia_p2p_rsync_driver_t *driver,
|
||||
@ -884,7 +889,7 @@ int nvidia_p2p_register_rsync_driver(
|
||||
driver->wait_for_rsync, data);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_register_rsync_driver);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_register_rsync_driver);
|
||||
|
||||
void nvidia_p2p_unregister_rsync_driver(
|
||||
nvidia_p2p_rsync_driver_t *driver,
|
||||
@ -916,7 +921,7 @@ void nvidia_p2p_unregister_rsync_driver(
|
||||
driver->wait_for_rsync, data);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_unregister_rsync_driver);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_unregister_rsync_driver);
|
||||
|
||||
int nvidia_p2p_get_rsync_registers(
|
||||
nvidia_p2p_rsync_reg_info_t **reg_info
|
||||
@ -1009,7 +1014,7 @@ int nvidia_p2p_get_rsync_registers(
|
||||
return 0;
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_get_rsync_registers);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_get_rsync_registers);
|
||||
|
||||
void nvidia_p2p_put_rsync_registers(
|
||||
nvidia_p2p_rsync_reg_info_t *reg_info
|
||||
@ -1041,4 +1046,4 @@ void nvidia_p2p_put_rsync_registers(
|
||||
os_free_mem(reg_info);
|
||||
}
|
||||
|
||||
EXPORT_SYMBOL(nvidia_p2p_put_rsync_registers);
|
||||
NV_EXPORT_SYMBOL(nvidia_p2p_put_rsync_registers);
|
||||
|
@ -1224,12 +1224,11 @@ static int nv_start_device(nv_state_t *nv, nvidia_stack_t *sp)
|
||||
rm_read_registry_dword(sp, nv, NV_REG_ENABLE_MSI, &msi_config);
|
||||
if (msi_config == 1)
|
||||
{
|
||||
if (pci_find_capability(nvl->pci_dev, PCI_CAP_ID_MSIX))
|
||||
if (nvl->pci_dev->msix_cap && rm_is_msix_allowed(sp, nv))
|
||||
{
|
||||
nv_init_msix(nv);
|
||||
}
|
||||
if (pci_find_capability(nvl->pci_dev, PCI_CAP_ID_MSI) &&
|
||||
!(nv->flags & NV_FLAG_USES_MSIX))
|
||||
if (nvl->pci_dev->msi_cap && !(nv->flags & NV_FLAG_USES_MSIX))
|
||||
{
|
||||
nv_init_msi(nv);
|
||||
}
|
||||
|
@ -195,6 +195,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += devm_clk_bulk_get_all
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += get_task_ioprio
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += mdev_set_iommu_device
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += offline_and_remove_memory
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += crypto_tfm_ctx_aligned
|
||||
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_of_node_to_nid
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_sme_active
|
||||
@ -215,6 +216,7 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_get_dram_num_channe
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_dram_types
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_pxm_to_node
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_screen_info
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_screen_info
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_i2c_bus_status
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_fuse_control_read
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_tegra_get_platform
|
||||
|
@ -46,6 +46,11 @@ NvlStatus nvlink_lib_unload(void);
|
||||
*/
|
||||
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
|
||||
|
||||
/*
|
||||
* Gets number of devices with type deviceType
|
||||
*/
|
||||
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -28,6 +28,11 @@
|
||||
|
||||
#include "nv-time.h"
|
||||
|
||||
#include <linux/mmzone.h>
|
||||
#include <linux/numa.h>
|
||||
|
||||
#include <linux/pid.h>
|
||||
|
||||
extern char *NVreg_TemporaryFilePath;
|
||||
|
||||
#define MAX_ERROR_STRING 512
|
||||
@ -1242,9 +1247,12 @@ void NV_API_CALL os_get_screen_info(
|
||||
* SYSFB_SIMPLEFB registers a dummy framebuffer which does not contain the
|
||||
* information required by os_get_screen_info(), therefore you need to
|
||||
* fall back onto the screen_info structure.
|
||||
*
|
||||
* After commit b8466fe82b79 ("efi: move screen_info into efi init code")
|
||||
* in v6.7, 'screen_info' is exported as GPL licensed symbol for ARM64.
|
||||
*/
|
||||
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_screen_info
|
||||
#if NV_CHECK_EXPORT_SYMBOL(screen_info)
|
||||
/*
|
||||
* If there is not a framebuffer console, return 0 size.
|
||||
*
|
||||
@ -2122,6 +2130,43 @@ void NV_API_CALL os_nv_cap_close_fd
|
||||
nv_cap_close_fd(fd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Reads the total memory and free memory of a NUMA node from the kernel.
|
||||
*/
|
||||
NV_STATUS NV_API_CALL os_get_numa_node_memory_usage
|
||||
(
|
||||
NvS32 node_id,
|
||||
NvU64 *free_memory_bytes,
|
||||
NvU64 *total_memory_bytes
|
||||
)
|
||||
{
|
||||
struct pglist_data *pgdat;
|
||||
struct zone *zone;
|
||||
NvU32 zone_id;
|
||||
|
||||
if (node_id >= MAX_NUMNODES)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "Invalid NUMA node ID\n");
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
pgdat = NODE_DATA(node_id);
|
||||
|
||||
*free_memory_bytes = 0;
|
||||
*total_memory_bytes = 0;
|
||||
|
||||
for (zone_id = 0; zone_id < MAX_NR_ZONES; zone_id++)
|
||||
{
|
||||
zone = &(pgdat->node_zones[zone_id]);
|
||||
if (!populated_zone(zone))
|
||||
continue;
|
||||
*free_memory_bytes += (zone_page_state_snapshot(zone, NR_FREE_PAGES) * PAGE_SIZE);
|
||||
*total_memory_bytes += (zone->present_pages * PAGE_SIZE);
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
typedef struct os_numa_gpu_mem_hotplug_notifier_s
|
||||
{
|
||||
NvU64 start_pa;
|
||||
@ -2373,3 +2418,28 @@ NV_STATUS NV_API_CALL os_offline_page_at_address
|
||||
#endif
|
||||
}
|
||||
|
||||
void* NV_API_CALL os_get_pid_info(void)
|
||||
{
|
||||
return get_task_pid(current, PIDTYPE_PID);
|
||||
}
|
||||
|
||||
void NV_API_CALL os_put_pid_info(void *pid_info)
|
||||
{
|
||||
if (pid_info != NULL)
|
||||
put_pid(pid_info);
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL os_find_ns_pid(void *pid_info, NvU32 *ns_pid)
|
||||
{
|
||||
if ((pid_info == NULL) || (ns_pid == NULL))
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
*ns_pid = pid_vnr((struct pid *)pid_info);
|
||||
|
||||
// The call returns 0 if the PID is not found in the current ns
|
||||
if (*ns_pid == 0)
|
||||
return NV_ERR_OBJECT_NOT_FOUND;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
@ -1360,7 +1360,7 @@ bool ConnectorImpl::compoundQueryAttach(Group * target,
|
||||
if (dev->pconCaps.maxHdmiLinkBandwidthGbps != 0)
|
||||
{
|
||||
NvU64 requiredBW = (NvU64)(modesetParams.modesetInfo.pixelClockHz * modesetParams.modesetInfo.depth);
|
||||
NvU64 availableBw = (NvU64)(dev->pconCaps.maxHdmiLinkBandwidthGbps * 1000000000);
|
||||
NvU64 availableBw = (NvU64)(dev->pconCaps.maxHdmiLinkBandwidthGbps * (NvU64)1000000000);
|
||||
if (requiredBW > availableBw)
|
||||
{
|
||||
compoundQueryResult = false;
|
||||
@ -1375,10 +1375,10 @@ bool ConnectorImpl::compoundQueryAttach(Group * target,
|
||||
else if (dev->pconCaps.maxTmdsClkRate != 0)
|
||||
{
|
||||
NvU64 maxTmdsClkRateU64 = (NvU64)(dev->pconCaps.maxTmdsClkRate);
|
||||
NvU64 requireBw = (NvU64)(modesetParams.modesetInfo.pixelClockHz * modesetParams.modesetInfo.depth);
|
||||
NvU64 requiredBw = (NvU64)(modesetParams.modesetInfo.pixelClockHz * modesetParams.modesetInfo.depth);
|
||||
if (modesetParams.colorFormat == dpColorFormat_YCbCr420)
|
||||
{
|
||||
if (maxTmdsClkRateU64 < ((requireBw/24)/2))
|
||||
if (maxTmdsClkRateU64 < ((requiredBw/24)/2))
|
||||
{
|
||||
compoundQueryResult = false;
|
||||
return false;
|
||||
@ -1386,7 +1386,7 @@ bool ConnectorImpl::compoundQueryAttach(Group * target,
|
||||
}
|
||||
else
|
||||
{
|
||||
if (maxTmdsClkRateU64 < (requireBw/24))
|
||||
if (maxTmdsClkRateU64 < (requiredBw/24))
|
||||
{
|
||||
compoundQueryResult = false;
|
||||
return false;
|
||||
@ -4740,7 +4740,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
|
||||
{
|
||||
LinkTrainingType preferredTrainingType = trainType;
|
||||
bool result;
|
||||
bool bEnableFecOnSor;
|
||||
|
||||
//
|
||||
// Validate link config against caps
|
||||
//
|
||||
@ -4832,16 +4832,7 @@ bool ConnectorImpl::train(const LinkConfiguration & lConfig, bool force,
|
||||
result = postLTAdjustment(activeLinkConfig, force);
|
||||
}
|
||||
|
||||
bEnableFecOnSor = lConfig.bEnableFEC;
|
||||
|
||||
if (main->isEDP())
|
||||
{
|
||||
DeviceImpl * nativeDev = findDeviceInList(Address());
|
||||
if (nativeDev && nativeDev->bIsPreviouslyFakedMuxDevice)
|
||||
bEnableFecOnSor = activeLinkConfig.bEnableFEC;
|
||||
}
|
||||
|
||||
if((lConfig.lanes != 0) && result && bEnableFecOnSor)
|
||||
if((lConfig.lanes != 0) && result && activeLinkConfig.bEnableFEC)
|
||||
{
|
||||
//
|
||||
// Extended latency from link-train end to FEC enable pattern
|
||||
@ -6057,7 +6048,7 @@ void ConnectorImpl::notifyLongPulseInternal(bool statusConnected)
|
||||
if (this->bReassessMaxLink)
|
||||
{
|
||||
//
|
||||
// If the highest assessed LC is not equal to
|
||||
// If the highest assessed LC is not equal to
|
||||
// max possible link config, re-assess link
|
||||
//
|
||||
NvU8 retries = 0U;
|
||||
|
@ -43,18 +43,18 @@
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/VK535_87-147"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33800935)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/VK535_87-148"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33833102)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r535/VK535_87-147"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33800935)
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r535/VK535_87-148"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33833102)
|
||||
|
||||
#else /* Windows builds */
|
||||
#define NV_BUILD_BRANCH_VERSION "VK535_87-24"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33800935)
|
||||
#define NV_BUILD_BRANCH_VERSION "VK535_87-25"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33833102)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "538.31"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33800935)
|
||||
#define NV_BUILD_NAME "538.35"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33833102)
|
||||
#define NV_BUILD_BRANCH_BASE_VERSION R535
|
||||
#endif
|
||||
// End buildmeister python edited section
|
||||
|
@ -4,7 +4,7 @@
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
||||
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
|
||||
|
||||
#define NV_VERSION_STRING "535.43.23"
|
||||
#define NV_VERSION_STRING "535.43.24"
|
||||
|
||||
#else
|
||||
|
||||
|
@ -3,7 +3,7 @@
|
||||
#define NV_COMPANY_NAME_STRING_SHORT "NVIDIA"
|
||||
#define NV_COMPANY_NAME_STRING_FULL "NVIDIA Corporation"
|
||||
#define NV_COMPANY_NAME_STRING NV_COMPANY_NAME_STRING_FULL
|
||||
#define NV_COPYRIGHT_YEAR "2023"
|
||||
#define NV_COPYRIGHT_YEAR "2024"
|
||||
#define NV_COPYRIGHT "(C) " NV_COPYRIGHT_YEAR " NVIDIA Corporation. All rights reserved." // Please do not use the non-ascii copyright symbol for (C).
|
||||
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
||||
|
@ -39,48 +39,63 @@ extern "C" {
|
||||
#endif //NV_UNIX
|
||||
#endif //!__cplusplus
|
||||
|
||||
// Surprise removal capable TB3 and TB2 BUS Device ID
|
||||
#define BUS_DEVICE_ID_TB3_ALPINE_RIDGE_01 0x1578
|
||||
#define BUS_DEVICE_ID_TB3_02 0x1576
|
||||
#define BUS_DEVICE_ID_TB3_03 0x15C0
|
||||
#define BUS_DEVICE_ID_TB3_04 0x15D3
|
||||
#define BUS_DEVICE_ID_TB3_05 0x15DA
|
||||
#define BUS_DEVICE_ID_TB3_06 0x15EA
|
||||
#define BUS_DEVICE_ID_TB3_07 0x15E7
|
||||
#define BUS_DEVICE_ID_TB3_08 0x15EF
|
||||
#define BUS_DEVICE_ID_TB3_09 0x1133
|
||||
#define BUS_DEVICE_ID_TB3_10 0x1136
|
||||
#define PARENT_EGPU_BUS_DEVICE_43 0x57A4
|
||||
#define PARENT_EGPU_BUS_DEVICE_42 0x5786
|
||||
#define PARENT_EGPU_BUS_DEVICE_41 0x1578
|
||||
#define PARENT_EGPU_BUS_DEVICE_40 0x1576
|
||||
#define PARENT_EGPU_BUS_DEVICE_39 0x15C0
|
||||
#define PARENT_EGPU_BUS_DEVICE_38 0x15D3
|
||||
#define PARENT_EGPU_BUS_DEVICE_37 0x15DA
|
||||
#define PARENT_EGPU_BUS_DEVICE_36 0x15EA
|
||||
#define PARENT_EGPU_BUS_DEVICE_35 0x15E7
|
||||
#define PARENT_EGPU_BUS_DEVICE_34 0x15EF
|
||||
#define PARENT_EGPU_BUS_DEVICE_33 0x1133
|
||||
#define PARENT_EGPU_BUS_DEVICE_32 0x1136
|
||||
|
||||
// IceLake-U TB3 device ids. Below TB3 would be integrated to CPU.
|
||||
#define BUS_DEVICE_ID_ICELAKE_TB3_01 0x8A1D
|
||||
#define BUS_DEVICE_ID_ICELAKE_TB3_02 0x8A1F
|
||||
#define BUS_DEVICE_ID_ICELAKE_TB3_03 0x8A21
|
||||
#define BUS_DEVICE_ID_ICELAKE_TB3_04 0x8A23
|
||||
#define BUS_DEVICE_ID_ICELAKE_TB3_05 0x8A0D
|
||||
#define BUS_DEVICE_ID_ICELAKE_TB3_06 0x8A17
|
||||
#define PARENT_EGPU_BUS_DEVICE_31 0x8A1D
|
||||
#define PARENT_EGPU_BUS_DEVICE_30 0x8A1F
|
||||
#define PARENT_EGPU_BUS_DEVICE_29 0x8A21
|
||||
#define PARENT_EGPU_BUS_DEVICE_28 0x8A23
|
||||
#define PARENT_EGPU_BUS_DEVICE_27 0x8A0D
|
||||
#define PARENT_EGPU_BUS_DEVICE_26 0x8A17
|
||||
|
||||
// TigerLake Thunderbolt device ids.
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_01 0x9A1B
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_02 0x9A1D
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_03 0x9A1F
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_04 0x9A21
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_05 0x9A23
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_06 0x9A25
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_07 0x9A27
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_08 0x9A29
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_09 0x9A2B
|
||||
#define BUS_DEVICE_ID_TIGERLAKE_TB3_10 0x9A2D
|
||||
|
||||
//#define BUS_DEVICE_ID_TB2_FALCON_RIDGE_DSL5520_01 0X156C // obsolete
|
||||
#define BUS_DEVICE_ID_TB2_FALCON_RIDGE_DSL5520_02 0X156D
|
||||
#define BUS_DEVICE_ID_TB2_03 0x157E
|
||||
#define BUS_DEVICE_ID_TB2_04 0x156B
|
||||
#define BUS_DEVICE_ID_TB2_05 0x1567
|
||||
#define BUS_DEVICE_ID_TB2_06 0x1569
|
||||
//#define BUS_DEVICE_ID_TB2_07 0x1548 // obsolete
|
||||
#define BUS_DEVICE_ID_TB2_08 0x151B
|
||||
#define BUS_DEVICE_ID_TB2_09 0x1549
|
||||
#define BUS_DEVICE_ID_TB2_10 0x1513
|
||||
#define PARENT_EGPU_BUS_DEVICE_25 0x9A1B
|
||||
#define PARENT_EGPU_BUS_DEVICE_24 0x9A1D
|
||||
#define PARENT_EGPU_BUS_DEVICE_23 0x9A1F
|
||||
#define PARENT_EGPU_BUS_DEVICE_22 0x9A21
|
||||
#define PARENT_EGPU_BUS_DEVICE_21 0x9A23
|
||||
#define PARENT_EGPU_BUS_DEVICE_20 0x9A25
|
||||
#define PARENT_EGPU_BUS_DEVICE_19 0x9A27
|
||||
#define PARENT_EGPU_BUS_DEVICE_18 0x9A29
|
||||
#define PARENT_EGPU_BUS_DEVICE_17 0x9A2B
|
||||
#define PARENT_EGPU_BUS_DEVICE_16 0x9A2D
|
||||
|
||||
#define PARENT_EGPU_BUS_DEVICE_15 0x7EB2
|
||||
#define PARENT_EGPU_BUS_DEVICE_14 0x7EC2
|
||||
#define PARENT_EGPU_BUS_DEVICE_13 0x7EC3
|
||||
#define PARENT_EGPU_BUS_DEVICE_12 0x7EB4
|
||||
#define PARENT_EGPU_BUS_DEVICE_11 0x7EC4
|
||||
#define PARENT_EGPU_BUS_DEVICE_10 0x7EB5
|
||||
#define PARENT_EGPU_BUS_DEVICE_09 0x7EC5
|
||||
#define PARENT_EGPU_BUS_DEVICE_08 0x7EC6
|
||||
#define PARENT_EGPU_BUS_DEVICE_07 0x7EC7
|
||||
|
||||
#define PARENT_EGPU_BUS_DEVICE_06 0xA73E
|
||||
#define PARENT_EGPU_BUS_DEVICE_05 0xA76D
|
||||
#define PARENT_EGPU_BUS_DEVICE_04 0x466E
|
||||
#define PARENT_EGPU_BUS_DEVICE_03 0x463F
|
||||
#define PARENT_EGPU_BUS_DEVICE_02 0x462F
|
||||
#define PARENT_EGPU_BUS_DEVICE_01 0x461F
|
||||
|
||||
#define PARENT_EGPU_BUS_DEVICE_02_08 0X156D
|
||||
#define PARENT_EGPU_BUS_DEVICE_02_07 0x157E
|
||||
#define PARENT_EGPU_BUS_DEVICE_02_06 0x156B
|
||||
#define PARENT_EGPU_BUS_DEVICE_02_05 0x1567
|
||||
#define PARENT_EGPU_BUS_DEVICE_02_04 0x1569
|
||||
#define PARENT_EGPU_BUS_DEVICE_02_03 0x151B
|
||||
#define PARENT_EGPU_BUS_DEVICE_02_02 0x1549
|
||||
#define PARENT_EGPU_BUS_DEVICE_02_01 0x1513
|
||||
|
||||
//*****************************************************************************
|
||||
// Function: isTB3DeviceID
|
||||
@ -103,33 +118,51 @@ extern "C" {
|
||||
EGPU_INLINE NvBool isTB3DeviceID(NvU16 deviceID)
|
||||
{
|
||||
NvU32 index;
|
||||
NvU16 tb3DeviceIDList[]={ BUS_DEVICE_ID_TB3_ALPINE_RIDGE_01,
|
||||
BUS_DEVICE_ID_TB3_02,
|
||||
BUS_DEVICE_ID_TB3_03,
|
||||
BUS_DEVICE_ID_TB3_04,
|
||||
BUS_DEVICE_ID_TB3_05,
|
||||
BUS_DEVICE_ID_TB3_06,
|
||||
BUS_DEVICE_ID_TB3_07,
|
||||
BUS_DEVICE_ID_TB3_08,
|
||||
BUS_DEVICE_ID_TB3_09,
|
||||
BUS_DEVICE_ID_TB3_10,
|
||||
BUS_DEVICE_ID_ICELAKE_TB3_01,
|
||||
BUS_DEVICE_ID_ICELAKE_TB3_02,
|
||||
BUS_DEVICE_ID_ICELAKE_TB3_03,
|
||||
BUS_DEVICE_ID_ICELAKE_TB3_04,
|
||||
BUS_DEVICE_ID_ICELAKE_TB3_05,
|
||||
BUS_DEVICE_ID_ICELAKE_TB3_06,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_01,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_02,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_03,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_04,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_05,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_06,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_07,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_08,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_09,
|
||||
BUS_DEVICE_ID_TIGERLAKE_TB3_10
|
||||
};
|
||||
NvU16 tb3DeviceIDList[]={ PARENT_EGPU_BUS_DEVICE_01,
|
||||
PARENT_EGPU_BUS_DEVICE_02,
|
||||
PARENT_EGPU_BUS_DEVICE_03,
|
||||
PARENT_EGPU_BUS_DEVICE_04,
|
||||
PARENT_EGPU_BUS_DEVICE_05,
|
||||
PARENT_EGPU_BUS_DEVICE_06,
|
||||
PARENT_EGPU_BUS_DEVICE_07,
|
||||
PARENT_EGPU_BUS_DEVICE_08,
|
||||
PARENT_EGPU_BUS_DEVICE_09,
|
||||
PARENT_EGPU_BUS_DEVICE_10,
|
||||
PARENT_EGPU_BUS_DEVICE_11,
|
||||
PARENT_EGPU_BUS_DEVICE_12,
|
||||
PARENT_EGPU_BUS_DEVICE_13,
|
||||
PARENT_EGPU_BUS_DEVICE_14,
|
||||
PARENT_EGPU_BUS_DEVICE_15,
|
||||
PARENT_EGPU_BUS_DEVICE_16,
|
||||
PARENT_EGPU_BUS_DEVICE_17,
|
||||
PARENT_EGPU_BUS_DEVICE_18,
|
||||
PARENT_EGPU_BUS_DEVICE_19,
|
||||
PARENT_EGPU_BUS_DEVICE_20,
|
||||
PARENT_EGPU_BUS_DEVICE_21,
|
||||
PARENT_EGPU_BUS_DEVICE_22,
|
||||
PARENT_EGPU_BUS_DEVICE_23,
|
||||
PARENT_EGPU_BUS_DEVICE_24,
|
||||
PARENT_EGPU_BUS_DEVICE_25,
|
||||
PARENT_EGPU_BUS_DEVICE_26,
|
||||
PARENT_EGPU_BUS_DEVICE_27,
|
||||
PARENT_EGPU_BUS_DEVICE_28,
|
||||
PARENT_EGPU_BUS_DEVICE_29,
|
||||
PARENT_EGPU_BUS_DEVICE_30,
|
||||
PARENT_EGPU_BUS_DEVICE_31,
|
||||
PARENT_EGPU_BUS_DEVICE_32,
|
||||
PARENT_EGPU_BUS_DEVICE_33,
|
||||
PARENT_EGPU_BUS_DEVICE_34,
|
||||
PARENT_EGPU_BUS_DEVICE_35,
|
||||
PARENT_EGPU_BUS_DEVICE_36,
|
||||
PARENT_EGPU_BUS_DEVICE_37,
|
||||
PARENT_EGPU_BUS_DEVICE_38,
|
||||
PARENT_EGPU_BUS_DEVICE_39,
|
||||
PARENT_EGPU_BUS_DEVICE_40,
|
||||
PARENT_EGPU_BUS_DEVICE_41,
|
||||
PARENT_EGPU_BUS_DEVICE_42,
|
||||
PARENT_EGPU_BUS_DEVICE_43
|
||||
};
|
||||
|
||||
for (index = 0; index < (sizeof(tb3DeviceIDList)/sizeof(NvU16)); index++)
|
||||
{
|
||||
if (deviceID == tb3DeviceIDList[index])
|
||||
@ -161,11 +194,14 @@ EGPU_INLINE NvBool isTB3DeviceID(NvU16 deviceID)
|
||||
EGPU_INLINE NvBool isTB2DeviceID(NvU16 deviceID)
|
||||
{
|
||||
NvU32 index;
|
||||
NvU16 tb2DeviceIDList[]={ BUS_DEVICE_ID_TB2_FALCON_RIDGE_DSL5520_02,
|
||||
BUS_DEVICE_ID_TB2_03, BUS_DEVICE_ID_TB2_04,
|
||||
BUS_DEVICE_ID_TB2_05, BUS_DEVICE_ID_TB2_06,
|
||||
BUS_DEVICE_ID_TB2_08, BUS_DEVICE_ID_TB2_09,
|
||||
BUS_DEVICE_ID_TB2_10
|
||||
NvU16 tb2DeviceIDList[]={ PARENT_EGPU_BUS_DEVICE_02_01,
|
||||
PARENT_EGPU_BUS_DEVICE_02_02,
|
||||
PARENT_EGPU_BUS_DEVICE_02_03,
|
||||
PARENT_EGPU_BUS_DEVICE_02_04,
|
||||
PARENT_EGPU_BUS_DEVICE_02_05,
|
||||
PARENT_EGPU_BUS_DEVICE_02_06,
|
||||
PARENT_EGPU_BUS_DEVICE_02_07,
|
||||
PARENT_EGPU_BUS_DEVICE_02_08
|
||||
};
|
||||
for (index = 0; index < (sizeof(tb2DeviceIDList)/sizeof(NvU16)); index++)
|
||||
{
|
||||
|
28
src/common/inc/swref/published/ampere/ga100/hwproject.h
Normal file
28
src/common/inc/swref/published/ampere/ga100/hwproject.h
Normal file
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#ifndef __ga100_hwproject_h__
|
||||
#define __ga100_hwproject_h__
|
||||
|
||||
#define NV_SCAL_LITTER_NUM_FBPAS 24
|
||||
|
||||
#endif // __ga100_hwproject_h__
|
@ -20,7 +20,7 @@
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
|
||||
#ifndef __gh100_dev_fb_h_
|
||||
#define __gh100_dev_fb_h_
|
||||
#define NV_PFB_NISO_FLUSH_SYSMEM_ADDR_SHIFT 8 /* */
|
||||
@ -29,4 +29,5 @@
|
||||
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI 0x00100A38 /* RW-4R */
|
||||
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR 31:0 /* RWIVF */
|
||||
#define NV_PFB_FBHUB_PCIE_FLUSH_SYSMEM_ADDR_HI_ADR_MASK 0x000FFFFF /* ----V */
|
||||
|
||||
#endif // __gh100_dev_fb_h_
|
||||
|
29
src/common/inc/swref/published/hopper/gh100/dev_fbpa.h
Normal file
29
src/common/inc/swref/published/hopper/gh100/dev_fbpa.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gh100_dev_fbpa_h_
|
||||
#define __gh100_dev_fbpa_h_
|
||||
|
||||
#define NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1 4 /* */
|
||||
#define NV_PFB_FBPA_0_ECC_DED_COUNT(i) (0x009025A0+(i)*4) /* RW-4A */
|
||||
#endif // __gh100_dev_fbpa_h_
|
@ -31,4 +31,22 @@
|
||||
#define NV_PGSP_FALCON_ENGINE_RESET_STATUS_ASSERTED 0x00000000 /* R-E-V */
|
||||
#define NV_PGSP_FALCON_ENGINE_RESET_STATUS_DEASSERTED 0x00000002 /* R---V */
|
||||
#define NV_PGSP_MAILBOX(i) (0x110804+(i)*4) /* RW-4A */
|
||||
#define NV_PGSP_EMEMC(i) (0x110ac0+(i)*8) /* RW-4A */
|
||||
#define NV_PGSP_EMEMC__SIZE_1 8 /* */
|
||||
#define NV_PGSP_EMEMC_OFFS 7:2 /* RWIVF */
|
||||
#define NV_PGSP_EMEMC_OFFS_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_PGSP_EMEMC_BLK 15:8 /* RWIVF */
|
||||
#define NV_PGSP_EMEMC_BLK_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_PGSP_EMEMC_AINCW 24:24 /* RWIVF */
|
||||
#define NV_PGSP_EMEMC_AINCW_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_PGSP_EMEMC_AINCW_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_PGSP_EMEMC_AINCW_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_PGSP_EMEMC_AINCR 25:25 /* RWIVF */
|
||||
#define NV_PGSP_EMEMC_AINCR_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_PGSP_EMEMC_AINCR_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_PGSP_EMEMC_AINCR_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_PGSP_EMEMD(i) (0x110ac4+(i)*8) /* RW-4A */
|
||||
#define NV_PGSP_EMEMD__SIZE_1 8 /* */
|
||||
#define NV_PGSP_EMEMD_DATA 31:0 /* RWXVF */
|
||||
|
||||
#endif // __gh100_dev_gsp_h__
|
||||
|
52
src/common/inc/swref/published/hopper/gh100/dev_nv_xpl.h
Normal file
52
src/common/inc/swref/published/hopper/gh100/dev_nv_xpl.h
Normal file
@ -0,0 +1,52 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __gh100_dev_nv_xpl_h_
|
||||
#define __gh100_dev_nv_xpl_h_
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF 0x00000a54 /* R--4R */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF__PRIV_LEVEL_MASK 0x00000b08 /* */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR 15:0 /* R-EVF */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF_CORR_ERR_INIT 0x0000 /* R-E-V */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR 31:16 /* R-EVF */
|
||||
#define NV_XPL_DL_ERR_COUNT_RBUF_UNCORR_ERR_INIT 0x0000 /* R-E-V */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT 0x00000a58 /* R--4R */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT__PRIV_LEVEL_MASK 0x00000b08 /* */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR 15:0 /* R-EVF */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_CORR_ERR_INIT 0x0000 /* R-E-V */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR 31:16 /* R-EVF */
|
||||
#define NV_XPL_DL_ERR_COUNT_SEQ_LUT_UNCORR_ERR_INIT 0x0000 /* R-E-V */
|
||||
|
||||
#define NV_XPL_DL_ERR_RESET 0x00000a5c /* RW-4R */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT 0:0 /* RWCVF */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT 1:1 /* RWCVF */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_DONE 0x0 /* RWC-V */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_CORR_ERR_COUNT_PENDING 0x1 /* -W--T */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT 16:16 /* RWCVF */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
|
||||
#define NV_XPL_DL_ERR_RESET_RBUF_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT 17:17 /* RWCVF */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_DONE 0x0 /* RWC-V */
|
||||
#define NV_XPL_DL_ERR_RESET_SEQ_LUT_UNCORR_ERR_COUNT_PENDING 0x1 /* -W--T */
|
||||
#endif // __gh100_dev_nv_xpl_h__
|
@ -24,4 +24,7 @@
|
||||
#ifndef __gh100_dev_xtl_ep_pri_h__
|
||||
#define __gh100_dev_xtl_ep_pri_h__
|
||||
#define NV_EP_PCFGM 0x92FFF:0x92000 /* RW--D */
|
||||
|
||||
#define NV_XTL_EP_PRI_DED_ERROR_STATUS 0x0000043C /* RW-4R */
|
||||
#define NV_XTL_EP_PRI_RAM_ERROR_INTR_STATUS 0x000003C8 /* RW-4R */
|
||||
#endif // __gh100_dev_xtl_ep_pri_h__
|
||||
|
@ -21,3 +21,6 @@
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
#define NV_CHIP_EXTENDED_SYSTEM_PHYSICAL_ADDRESS_BITS 52
|
||||
#define NV_XPL_BASE_ADDRESS 540672
|
||||
#define NV_XTL_BASE_ADDRESS 593920
|
||||
#define NV_FBPA_PRI_STRIDE 16384
|
||||
|
@ -47,5 +47,17 @@
|
||||
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT 3:3
|
||||
#define NV_XAL_EP_INTR_0_PRI_RSP_TIMEOUT_PENDING 0x1
|
||||
#define NV_XAL_EP_SCPM_PRI_DUMMY_DATA_PATTERN_INIT 0xbadf0200
|
||||
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT 0x0010f364 /* RW-4R */
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
|
||||
#define NV_XAL_EP_REORDER_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
|
||||
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT 0x0010f37c /* RW-4R */
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIUF */
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIUF */
|
||||
#define NV_XAL_EP_P2PREQ_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
|
||||
#endif // __gh100_pri_nv_xal_ep_h__
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -635,4 +635,7 @@
|
||||
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT 28:28 /* RWIVF */
|
||||
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_SUPPORTED 0x00000001 /* RWI-V */
|
||||
#define NV_NVLIPT_LNK_CTRL_CAP_LOCAL_LINK_CHANNEL_ALI_SUPPORT_NOT_SUPPORTED 0x00000000 /* RW--V */
|
||||
#define NV_NVLIPT_LNK_SCRATCH_WARM 0x000007c0 /* RW-4R */
|
||||
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA 31:0 /* RWEVF */
|
||||
#define NV_NVLIPT_LNK_SCRATCH_WARM_DATA_INIT 0xdeadbaad /* RWE-V */
|
||||
#endif // __ls10_dev_nvlipt_lnk_ip_h__
|
||||
|
@ -0,0 +1,28 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2003-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the Software),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED AS IS, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __ls10_ptop_discovery_ip_h__
|
||||
#define __ls10_ptop_discovery_ip_h__
|
||||
/* This file is autogenerated. Do not edit */
|
||||
#define NV_PTOP_UNICAST_SW_DEVICE_BASE_SAW_0 0x00028000 /* */
|
||||
#endif // __ls10_ptop_discovery_ip_h__
|
@ -38,4 +38,25 @@
|
||||
#define NV_PFB_PRI_MMU_WPR2_ADDR_HI_VAL 31:4 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_WPR2_ADDR_HI_ALIGNMENT 0x0000000c /* */
|
||||
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E78 /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_L2TLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
|
||||
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT 0x00100E8C /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_HUBTLB_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
|
||||
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT 0x00100EA0 /* RW-4R */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0 /* RWE-V */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWEVF */
|
||||
#define NV_PFB_PRI_MMU_FILLUNIT_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0 /* RWE-V */
|
||||
|
||||
#endif // __tu102_dev_fb_h__
|
||||
|
29
src/common/inc/swref/published/turing/tu102/dev_fbpa.h
Normal file
29
src/common/inc/swref/published/turing/tu102/dev_fbpa.h
Normal file
@ -0,0 +1,29 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __tu102_dev_fbpa_h_
|
||||
#define __tu102_dev_fbpa_h_
|
||||
|
||||
#define NV_PFB_FBPA_0_ECC_DED_COUNT__SIZE_1 2 /* */
|
||||
#define NV_PFB_FBPA_0_ECC_DED_COUNT(i) (0x00900488+(i)*4) /* RW-4A */
|
||||
#endif // __tu102_dev_fbpa_h_
|
@ -24,6 +24,7 @@
|
||||
#ifndef __tu102_dev_gc6_island_h__
|
||||
#define __tu102_dev_gc6_island_h__
|
||||
|
||||
#define NV_PGC6 0x118fff:0x118000 /* RW--D */
|
||||
#define NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK 0x00118128 /* RW-4R */
|
||||
#define NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK_READ_PROTECTION 3:0 /* RWIVF */
|
||||
#define NV_PGC6_AON_SECURE_SCRATCH_GROUP_05_PRIV_LEVEL_MASK_READ_PROTECTION_LEVEL0 0:0 /* */
|
||||
|
@ -38,5 +38,22 @@
|
||||
#define NV_PGSP_QUEUE_HEAD(i) (0x110c00+(i)*8) /* RW-4A */
|
||||
#define NV_PGSP_QUEUE_HEAD__SIZE_1 8 /* */
|
||||
#define NV_PGSP_QUEUE_HEAD_ADDRESS 31:0 /* RWIVF */
|
||||
#define NV_PGSP_EMEMC(i) (0x110ac0+(i)*8) /* RW-4A */
|
||||
#define NV_PGSP_EMEMC__SIZE_1 4 /* */
|
||||
#define NV_PGSP_EMEMC_OFFS 7:2 /* RWIVF */
|
||||
#define NV_PGSP_EMEMC_OFFS_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_PGSP_EMEMC_BLK 15:8 /* RWIVF */
|
||||
#define NV_PGSP_EMEMC_BLK_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_PGSP_EMEMC_AINCW 24:24 /* RWIVF */
|
||||
#define NV_PGSP_EMEMC_AINCW_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_PGSP_EMEMC_AINCW_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_PGSP_EMEMC_AINCW_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_PGSP_EMEMC_AINCR 25:25 /* RWIVF */
|
||||
#define NV_PGSP_EMEMC_AINCR_INIT 0x00000000 /* RWI-V */
|
||||
#define NV_PGSP_EMEMC_AINCR_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_PGSP_EMEMC_AINCR_FALSE 0x00000000 /* RW--V */
|
||||
#define NV_PGSP_EMEMD(i) (0x110ac4+(i)*8) /* RW-4A */
|
||||
#define NV_PGSP_EMEMD__SIZE_1 4 /* */
|
||||
#define NV_PGSP_EMEMD_DATA 31:0 /* RW-VF */
|
||||
|
||||
#endif // __tu102_dev_gsp_h__
|
||||
|
33
src/common/inc/swref/published/turing/tu102/dev_ltc.h
Normal file
33
src/common/inc/swref/published/turing/tu102/dev_ltc.h
Normal file
@ -0,0 +1,33 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
* copy of this software and associated documentation files (the "Software"),
|
||||
* to deal in the Software without restriction, including without limitation
|
||||
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
||||
* and/or sell copies of the Software, and to permit persons to whom the
|
||||
* Software is furnished to do so, subject to the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be included in
|
||||
* all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*/
|
||||
|
||||
#ifndef __tu102_dev_ltc_h_
|
||||
#define __tu102_dev_ltc_h_
|
||||
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT 0x001404f8 /* RW-4R */
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL 15:0 /* RWIVF */
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_TOTAL_INIT 0x0000 /* RWI-V */
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE 31:16 /* RWIVF */
|
||||
#define NV_PLTCG_LTC0_LTS0_L2_CACHE_ECC_UNCORRECTED_ERR_COUNT_UNIQUE_INIT 0x0000 /* RWI-V */
|
||||
|
||||
#endif // __tu102_dev_ltc_h_
|
@ -28,6 +28,10 @@
|
||||
#define NV_XVE_MSIX_CAP_HDR_ENABLE 31:31 /* RWIVF */
|
||||
#define NV_XVE_MSIX_CAP_HDR_ENABLE_ENABLED 0x00000001 /* RW--V */
|
||||
#define NV_XVE_MSIX_CAP_HDR_ENABLE_DISABLED 0x00000000 /* RWI-V */
|
||||
#define NV_XVE_PRIV_MISC_1 0x0000041C /* RW-4R */
|
||||
#define NV_XVE_PRIV_MISC_1_CYA_HIDE_MSIX_CAP 29:29 /* RWCVF */
|
||||
#define NV_XVE_PRIV_MISC_1_CYA_HIDE_MSIX_CAP_TRUE 0x00000001 /* RW--V */
|
||||
#define NV_XVE_PRIV_MISC_1_CYA_HIDE_MSIX_CAP_FALSE 0x00000000 /* RWC-V */
|
||||
#define NV_XVE_SRIOV_CAP_HDR3 0x00000BD8 /* R--4R */
|
||||
#define NV_XVE_SRIOV_CAP_HDR3_TOTAL_VFS 31:16 /* R-EVF */
|
||||
#define NV_XVE_SRIOV_CAP_HDR5 0x00000BE0 /* R--4R */
|
||||
|
@ -25,5 +25,9 @@
|
||||
#define __tu102_hwproject_h__
|
||||
|
||||
#define NV_CHIP_EXTENDED_SYSTEM_PHYSICAL_ADDRESS_BITS 47
|
||||
#define NV_SCAL_LITTER_NUM_FBPAS 16
|
||||
#define NV_FBPA_PRI_STRIDE 16384
|
||||
#define NV_LTC_PRI_STRIDE 8192
|
||||
#define NV_LTS_PRI_STRIDE 512
|
||||
|
||||
#endif // __tu102_hwproject_h__
|
||||
|
@ -439,6 +439,11 @@ NvlStatus nvlink_lib_register_link(nvlink_device *dev, nvlink_link *link);
|
||||
*/
|
||||
NvlStatus nvlink_lib_unregister_link(nvlink_link *link);
|
||||
|
||||
/*
|
||||
* Gets number of devices with type deviceType
|
||||
*/
|
||||
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
|
||||
|
||||
|
||||
/************************************************************************************************/
|
||||
/******************************* NVLink link management functions *******************************/
|
||||
|
@ -46,6 +46,11 @@ NvlStatus nvlink_lib_unload(void);
|
||||
*/
|
||||
NvlStatus nvlink_lib_ioctl_ctrl(nvlink_ioctrl_params *ctrl_params);
|
||||
|
||||
/*
|
||||
* Gets number of devices with type deviceType
|
||||
*/
|
||||
NvlStatus nvlink_lib_return_device_count_by_type(NvU32 deviceType, NvU32 *numDevices);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -198,3 +198,48 @@ nvlink_lib_is_registerd_device_with_reduced_config(void)
|
||||
|
||||
return bIsReducedConfg;
|
||||
}
|
||||
|
||||
/*
|
||||
* Get the number of devices that have the device type deviceType
|
||||
*/
|
||||
NvlStatus
|
||||
nvlink_lib_return_device_count_by_type
|
||||
(
|
||||
NvU32 deviceType,
|
||||
NvU32 *numDevices
|
||||
)
|
||||
{
|
||||
NvlStatus lock_status = NVL_SUCCESS;
|
||||
nvlink_device *dev = NULL;
|
||||
NvU32 device_count = 0;
|
||||
|
||||
if (nvlink_lib_is_initialized())
|
||||
{
|
||||
// Acquire top-level lock
|
||||
lock_status = nvlink_lib_top_lock_acquire();
|
||||
if (lock_status != NVL_SUCCESS)
|
||||
{
|
||||
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||
"%s: Failed to acquire top-level lock\n",
|
||||
__FUNCTION__));
|
||||
|
||||
return lock_status;
|
||||
}
|
||||
|
||||
// Top-level lock is now acquired
|
||||
|
||||
// Loop through device list
|
||||
FOR_EACH_DEVICE_REGISTERED(dev, nvlinkLibCtx.nv_devicelist_head, node)
|
||||
{
|
||||
if (dev->type == deviceType)
|
||||
{
|
||||
device_count++;
|
||||
}
|
||||
}
|
||||
|
||||
// Release top-level lock
|
||||
nvlink_lib_top_lock_release();
|
||||
}
|
||||
*numDevices = device_count;
|
||||
return NVL_SUCCESS;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -37,6 +37,17 @@ enum
|
||||
RM_SOE_IFR_BBX_SHUTDOWN,
|
||||
RM_SOE_IFR_BBX_SXID_ADD,
|
||||
RM_SOE_IFR_BBX_SXID_GET,
|
||||
RM_SOE_IFR_BBX_DATA_GET,
|
||||
};
|
||||
|
||||
enum
|
||||
{
|
||||
RM_SOE_IFR_BBX_GET_NONE,
|
||||
RM_SOE_IFR_BBX_GET_SXID,
|
||||
RM_SOE_IFR_BBX_GET_SYS_INFO,
|
||||
RM_SOE_IFR_BBX_GET_TIME_INFO,
|
||||
RM_SOE_IFR_BBX_GET_TEMP_DATA,
|
||||
RM_SOE_IFR_BBX_GET_TEMP_SAMPLES,
|
||||
};
|
||||
|
||||
typedef struct
|
||||
@ -75,6 +86,14 @@ typedef struct
|
||||
RM_FLCN_U64 dmaHandle;
|
||||
} RM_SOE_IFR_CMD_BBX_SXID_GET_PARAMS;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU8 cmdType;
|
||||
NvU32 sizeInBytes;
|
||||
RM_FLCN_U64 dmaHandle;
|
||||
NvU8 dataType;
|
||||
} RM_SOE_IFR_CMD_BBX_GET_DATA_PARAMS;
|
||||
|
||||
typedef union
|
||||
{
|
||||
NvU8 cmdType;
|
||||
@ -82,6 +101,7 @@ typedef union
|
||||
RM_SOE_IFR_CMD_BBX_INIT_PARAMS bbxInit;
|
||||
RM_SOE_IFR_CMD_BBX_SXID_ADD_PARAMS bbxSxidAdd;
|
||||
RM_SOE_IFR_CMD_BBX_SXID_GET_PARAMS bbxSxidGet;
|
||||
RM_SOE_IFR_CMD_BBX_GET_DATA_PARAMS bbxDataGet;
|
||||
} RM_SOE_IFR_CMD;
|
||||
|
||||
// entry of getSxid
|
||||
@ -99,4 +119,81 @@ typedef struct
|
||||
RM_SOE_BBX_SXID_ENTRY sxidLast[INFOROM_BBX_OBJ_XID_ENTRIES];
|
||||
} RM_SOE_BBX_GET_SXID_DATA;
|
||||
|
||||
// NVSwitch system version information returning with the command GET_SYS_INFO
|
||||
typedef struct
|
||||
{
|
||||
NvU32 driverLo; //Driver Version Low 32 bits
|
||||
NvU16 driverHi; //Driver Version High 16 bits
|
||||
NvU32 vbiosVersion; //VBIOS Version
|
||||
NvU8 vbiosVersionOem; //VBIOS OEM Version byte
|
||||
NvU8 osType; //OS Type (UNIX/WIN/WIN2K/WIN9x/OTHER)
|
||||
NvU32 osVersion; //OS Version (Build|MINOR|MAJOR)
|
||||
} RM_SOE_BBX_GET_SYS_INFO_DATA;
|
||||
|
||||
// NVSwitch time information returning with the command GET_TIME_INFO
|
||||
typedef struct
|
||||
{
|
||||
NvU32 timeStart; //Timestamp (EPOCH) when the driver was loaded on the GPU for the first time
|
||||
NvU32 timeEnd; //Timestamp (EPOCH) when the data was last flushed
|
||||
NvU32 timeRun; //Amount of time (in seconds) driver was loaded, and GPU has run
|
||||
NvU32 time24Hours; //Timestamp (EPOCH) of when the first 24 operational hours is hit
|
||||
NvU32 time100Hours; //Timestamp (EPOCH) of when the first 100 operational hours is hit
|
||||
} RM_SOE_BBX_GET_TIME_INFO_DATA;
|
||||
|
||||
#define RM_SOE_BBX_TEMP_DAY_ENTRIES 5
|
||||
#define RM_SOE_BBX_TEMP_WEEK_ENTRIES 5
|
||||
#define RM_SOE_BBX_TEMP_MNT_ENTRIES 5
|
||||
#define RM_SOE_BBX_TEMP_ALL_ENTRIES 5
|
||||
#define RM_SOE_BBX_TEMP_SUM_HOUR_ENTRIES 23
|
||||
#define RM_SOE_BBX_TEMP_SUM_DAY_ENTRIES 5
|
||||
#define RM_SOE_BBX_TEMP_SUM_MNT_ENTRIES 3
|
||||
#define RM_SOE_BBX_TEMP_HISTOGRAM_THLD_ENTRIES 20
|
||||
#define RM_SOE_BBX_TEMP_HISTOGRAM_TIME_ENTRIES 21
|
||||
#define RM_SOE_BBX_TEMP_HOURLY_MAX_ENTRIES 168
|
||||
#define RM_SOE_BBX_TEMP_COMPRESS_BUFFER_ENTRIES 1096
|
||||
#define RM_SOE_BBX_NUM_COMPRESSION_PERIODS 8
|
||||
|
||||
// NVSwitch Temperature Entry
|
||||
typedef struct
|
||||
{
|
||||
NvU16 value; //Temperature (SFXP 9.7 format in Celsius)
|
||||
NvU32 timestamp; //Timestamp (EPOCH) of when the entry is recorded
|
||||
} RM_SOE_BBX_TEMP_ENTRY;
|
||||
|
||||
// NVSwitch Temperature Data returning with the command GET_TEMP_DATA
|
||||
typedef struct
|
||||
{
|
||||
NvU32 tempMaxDayIdx;
|
||||
RM_SOE_BBX_TEMP_ENTRY tempMaxDay[RM_SOE_BBX_TEMP_DAY_ENTRIES];
|
||||
NvU32 tempMaxWeekIdx;
|
||||
RM_SOE_BBX_TEMP_ENTRY tempMaxWeek[RM_SOE_BBX_TEMP_WEEK_ENTRIES];
|
||||
NvU32 tempMaxMntIdx;
|
||||
RM_SOE_BBX_TEMP_ENTRY tempMaxMnt[RM_SOE_BBX_TEMP_MNT_ENTRIES];
|
||||
NvU32 tempMaxAllIdx;
|
||||
RM_SOE_BBX_TEMP_ENTRY tempMaxAll[RM_SOE_BBX_TEMP_ALL_ENTRIES];
|
||||
NvU32 tempMinDayIdx;
|
||||
RM_SOE_BBX_TEMP_ENTRY tempMinDay[RM_SOE_BBX_TEMP_DAY_ENTRIES];
|
||||
NvU32 tempMinWeekIdx;
|
||||
RM_SOE_BBX_TEMP_ENTRY tempMinWeek[RM_SOE_BBX_TEMP_WEEK_ENTRIES];
|
||||
NvU32 tempMinMntIdx;
|
||||
RM_SOE_BBX_TEMP_ENTRY tempMinMnt[RM_SOE_BBX_TEMP_MNT_ENTRIES];
|
||||
NvU32 tempMinAllIdx;
|
||||
RM_SOE_BBX_TEMP_ENTRY tempMinAll[RM_SOE_BBX_TEMP_ALL_ENTRIES];
|
||||
NvU32 tempSumDelta;
|
||||
NvU32 tempSumHour[RM_SOE_BBX_TEMP_SUM_HOUR_ENTRIES];
|
||||
NvU32 tempSumDay[RM_SOE_BBX_TEMP_SUM_DAY_ENTRIES];
|
||||
NvU32 tempSumMnt[RM_SOE_BBX_TEMP_SUM_MNT_ENTRIES];
|
||||
NvU32 tempHistogramThld[RM_SOE_BBX_TEMP_HISTOGRAM_THLD_ENTRIES];
|
||||
NvU32 tempHistogramTime[RM_SOE_BBX_TEMP_HISTOGRAM_TIME_ENTRIES];
|
||||
RM_SOE_BBX_TEMP_ENTRY tempHourlyMaxSample[RM_SOE_BBX_TEMP_HOURLY_MAX_ENTRIES];
|
||||
} RM_SOE_BBX_GET_TEMP_DATA;
|
||||
|
||||
// NVSwitch Temperature Compressed Samples returning with the command GET_TEMP_SAMPLES
|
||||
typedef struct
|
||||
{
|
||||
NvU32 compressionPeriodIdx;
|
||||
NvU32 compressionPeriod[RM_SOE_BBX_NUM_COMPRESSION_PERIODS];
|
||||
RM_SOE_BBX_TEMP_ENTRY tempCompressionBuffer[RM_SOE_BBX_TEMP_COMPRESS_BUFFER_ENTRIES];
|
||||
} RM_SOE_BBX_GET_TEMP_SAMPLES;
|
||||
|
||||
#endif // _SOEIFIFR_H_
|
||||
|
@ -830,6 +830,7 @@ typedef enum nvswitch_err_type
|
||||
NVSWITCH_ERR_HW_HOST_THERMAL_SHUTDOWN = 10006,
|
||||
NVSWITCH_ERR_HW_HOST_IO_FAILURE = 10007,
|
||||
NVSWITCH_ERR_HW_HOST_FIRMWARE_INITIALIZATION_FAILURE = 10008,
|
||||
NVSWITCH_ERR_HW_HOST_FIRMWARE_RECOVERY_MODE = 10009,
|
||||
NVSWITCH_ERR_HW_HOST_LAST,
|
||||
|
||||
|
||||
@ -2973,6 +2974,197 @@ typedef struct
|
||||
NVSWITCH_SXID_ENTRY sxidLast[NVSWITCH_SXID_ENTRIES_NUM];
|
||||
} NVSWITCH_GET_SXIDS_PARAMS;
|
||||
|
||||
/*
|
||||
* CTRL_NVSWITCH_GET_SYS_INFO
|
||||
*
|
||||
* Control to get the NVSwitch system version information from inforom cache
|
||||
*
|
||||
* Parameters:
|
||||
* driverLo [OUT]
|
||||
* The driver version low 32 bits. Example: driverLo = 54531 (Driver 545.31)
|
||||
* driverHi [OUT]
|
||||
* The driver version high 16 bits
|
||||
* vbiosVersion [OUT]
|
||||
* The vbios version number. Example: vbiosVersion=0x96104100 (release 96.10.41.00)
|
||||
* vbiosVersionOem [OUT]
|
||||
* The vbios OEM version byte.
|
||||
* osType [OUT]
|
||||
* The OS type. Example: osType=0x05 (UNIX)
|
||||
* osVersion [OUT]
|
||||
* The OS version number. [BUILD[31:16]|MINOR[15:8]|MAJOR[7:0]]
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 driverLo;
|
||||
NvU16 driverHi;
|
||||
NvU32 vbiosVersion;
|
||||
NvU8 vbiosVersionOem;
|
||||
NvU8 osType;
|
||||
NvU32 osVersion;
|
||||
} NVSWITCH_GET_SYS_INFO_PARAMS;
|
||||
|
||||
/*
|
||||
* CTRL_NVSWITCH_GET_TIME_INFO
|
||||
*
|
||||
* Control to get the NVSwitch time information from inforom cache
|
||||
*
|
||||
* Parameters:
|
||||
* timeStart [OUT]
|
||||
* The timestamp (EPOCH) when driver load onto the NVSwitch for the 1st time
|
||||
* timeEnd [OUT]
|
||||
* The timestamp (EPOCH) when the data was last flushed
|
||||
* timeRun [OUT]
|
||||
* The amount of time (in seconds) driver was loaded/running
|
||||
* time24Hours [OUT]
|
||||
* The timestamp (EPOCH) when the first 24 operational hours is hit
|
||||
* time100Hours [OUT]
|
||||
* The timestamp (EPOCH) when the first 100 operational hours is hit
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 timeStart;
|
||||
NvU32 timeEnd;
|
||||
NvU32 timeRun;
|
||||
NvU32 time24Hours;
|
||||
NvU32 time100Hours;
|
||||
} NVSWITCH_GET_TIME_INFO_PARAMS;
|
||||
|
||||
#define NVSWITCH_TEMP_DAY_ENTRIES 5
|
||||
#define NVSWITCH_TEMP_WEEK_ENTRIES 5
|
||||
#define NVSWITCH_TEMP_MNT_ENTRIES 5
|
||||
#define NVSWITCH_TEMP_ALL_ENTRIES 5
|
||||
#define NVSWITCH_TEMP_SUM_HOUR_ENTRIES 23
|
||||
#define NVSWITCH_TEMP_SUM_DAY_ENTRIES 5
|
||||
#define NVSWITCH_TEMP_SUM_MNT_ENTRIES 3
|
||||
#define NVSWITCH_TEMP_HISTOGRAM_THLD_ENTRIES 20
|
||||
#define NVSWITCH_TEMP_HISTOGRAM_TIME_ENTRIES 21
|
||||
#define NVSWITCH_TEMP_HOURLY_MAX_ENTRIES 168
|
||||
|
||||
/*
|
||||
* NVSWITCH_TEMP_ENTRY
|
||||
*
|
||||
* This structure represents the NVSwitch TEMP with its timestamp.
|
||||
*
|
||||
* value
|
||||
* This parameter specifies the NVSwitch Temperature
|
||||
* (SFXP 9.7 format in Celsius).
|
||||
*
|
||||
* timestamp
|
||||
* This parameter specifies the timestamp (EPOCH) of the entry.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
NvU16 value;
|
||||
NvU32 timestamp;
|
||||
} NVSWITCH_TEMP_ENTRY;
|
||||
|
||||
/*
|
||||
* CTRL_NVSWITCH_GET_TEMP_DATA
|
||||
*
|
||||
* Control to get the NVSwitch device historical temperature information from inforom cache
|
||||
*
|
||||
* Parameters:
|
||||
* tempMaxDayIdx [OUT]
|
||||
* The current index to the maximum day temperature array
|
||||
* tempMaxDay[] [OUT]
|
||||
* The maximum temperature array for last NVSWITCH_TEMP_DAY_ENTRIES days
|
||||
* tempMaxWeekIdx [OUT]
|
||||
* The current index to the maximum week temperature array
|
||||
* tempMaxWeek[] [OUT]
|
||||
* The maximum temperature array for last NVSWITCH_TEMP_WEEK_ENTRIES weeks
|
||||
* tempMaxMntIdx [OUT]
|
||||
* The current index to the maximum month temperature array
|
||||
* tempMaxMnt[] [OUT]
|
||||
* The maximum temperature array for last NVSWITCH_TEMP_MNT_ENTRIES months
|
||||
* tempMaxAllIdx [OUT]
|
||||
* The current index to the maximum temperature array
|
||||
* tempMaxAll[] [OUT]
|
||||
* The maximum temperature array for the device
|
||||
* tempMinDayIdx [OUT]
|
||||
* The current index to the minimum day temperature array
|
||||
* tempMinDay[] [OUT]
|
||||
* The minimum temperature array for last NVSWITCH_TEMP_DAY_ENTRIES days
|
||||
* tempMinWeekIdx [OUT]
|
||||
* The current index to the minimum week temperature array
|
||||
* tempMinWeek[] [OUT]
|
||||
* The minimum temperature array for last NVSWITCH_TEMP_WEEK_ENTRIES weeks
|
||||
* tempMinMntIdx [OUT]
|
||||
* The current index to the minimum month temperature array
|
||||
* tempMinMnt[] [OUT]
|
||||
* The minimum temperature array for last NVSWITCH_TEMP_MNT_ENTRIES months
|
||||
* tempMinAllIdx [OUT]
|
||||
* The current index to the minimum temperature array
|
||||
* tempMinAll[] [OUT]
|
||||
* The minimum temperature array for the device
|
||||
* tempSumDelta [OUT]
|
||||
* The total sum of temperature change in 0.1C granularity
|
||||
* tempSumHour[] [OUT]
|
||||
* The moving average of temperature per hour, for last NVSWITCH_TEMP_SUM_HOUR_ENTRIES hours
|
||||
* tempSumDay[] [OUT]
|
||||
* The moving average of temperature per day, for last NVSWITCH_TEMP_SUM_DAY_ENTRIES days
|
||||
* tempSumMnt[] [OUT]
|
||||
* The moving average of temperature per month, for last NVSWITCH_TEMP_SUM_MNT_ENTRIES months
|
||||
* tempHistogramThld[] [OUT]
|
||||
* The histogram of temperature crossing various thresholds (5/10/15/.../95/100)
|
||||
* tempHistogramTime[] [OUT]
|
||||
* The histogram of time was in various temperature ranges (0..5/5..10/.../100..)
|
||||
* tempHourlyMaxSample[] [OUT]
|
||||
* The maximum hourly temperature array for the device
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 tempMaxDayIdx;
|
||||
NVSWITCH_TEMP_ENTRY tempMaxDay[NVSWITCH_TEMP_DAY_ENTRIES];
|
||||
NvU32 tempMaxWeekIdx;
|
||||
NVSWITCH_TEMP_ENTRY tempMaxWeek[NVSWITCH_TEMP_WEEK_ENTRIES];
|
||||
NvU32 tempMaxMntIdx;
|
||||
NVSWITCH_TEMP_ENTRY tempMaxMnt[NVSWITCH_TEMP_MNT_ENTRIES];
|
||||
NvU32 tempMaxAllIdx;
|
||||
NVSWITCH_TEMP_ENTRY tempMaxAll[NVSWITCH_TEMP_ALL_ENTRIES];
|
||||
NvU32 tempMinDayIdx;
|
||||
NVSWITCH_TEMP_ENTRY tempMinDay[NVSWITCH_TEMP_DAY_ENTRIES];
|
||||
NvU32 tempMinWeekIdx;
|
||||
NVSWITCH_TEMP_ENTRY tempMinWeek[NVSWITCH_TEMP_WEEK_ENTRIES];
|
||||
NvU32 tempMinMntIdx;
|
||||
NVSWITCH_TEMP_ENTRY tempMinMnt[NVSWITCH_TEMP_MNT_ENTRIES];
|
||||
NvU32 tempMinAllIdx;
|
||||
NVSWITCH_TEMP_ENTRY tempMinAll[NVSWITCH_TEMP_ALL_ENTRIES];
|
||||
NvU32 tempSumDelta;
|
||||
NvU32 tempSumHour[NVSWITCH_TEMP_SUM_HOUR_ENTRIES];
|
||||
NvU32 tempSumDay[NVSWITCH_TEMP_SUM_DAY_ENTRIES];
|
||||
NvU32 tempSumMnt[NVSWITCH_TEMP_SUM_MNT_ENTRIES];
|
||||
NvU32 tempHistogramThld[NVSWITCH_TEMP_HISTOGRAM_THLD_ENTRIES];
|
||||
NvU32 tempHistogramTime[NVSWITCH_TEMP_HISTOGRAM_TIME_ENTRIES];
|
||||
NVSWITCH_TEMP_ENTRY tempHourlyMaxSample[NVSWITCH_TEMP_HOURLY_MAX_ENTRIES];
|
||||
} NVSWITCH_GET_TEMP_DATA_PARAMS;
|
||||
|
||||
#define NVSWITCH_TEMP_COMPRESS_BUFFER_ENTRIES 1096
|
||||
#define NVSWITCH_NUM_COMPRESSION_PERIODS 8
|
||||
|
||||
/*
|
||||
* CTRL_NVSWITCH_GET_TEMP_DATA
|
||||
*
|
||||
* Control to get the NVSwitch device temperature information from inforom cache
|
||||
*
|
||||
* Parameters:
|
||||
* compressionPeriodIdx [OUT]
|
||||
* The current index to the sample period array
|
||||
* compressionPeriod[] [OUT]
|
||||
* The samples period array (seconds)
|
||||
* tempCompressionBuffer[] [OUT]
|
||||
* The temperature array sampling at a specific period in compressionPeriod[]
|
||||
*/
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NvU32 compressionPeriodIdx;
|
||||
NvU32 compressionPeriod[NVSWITCH_NUM_COMPRESSION_PERIODS];
|
||||
NVSWITCH_TEMP_ENTRY tempCompressionBuffer[NVSWITCH_TEMP_COMPRESS_BUFFER_ENTRIES];
|
||||
} NVSWITCH_GET_TEMP_SAMPLES_PARAMS;
|
||||
|
||||
/*
|
||||
* CTRL_NVSWITCH_GET_FOM_VALUES
|
||||
* This command gives the FOM values to MODS
|
||||
@ -3848,6 +4040,10 @@ typedef struct
|
||||
#define CTRL_NVSWITCH_RESERVED_11 0x55
|
||||
#define CTRL_NVSWITCH_GET_BOARD_PART_NUMBER 0x56
|
||||
#define CTRL_NVSWITCH_GET_POWER 0x57
|
||||
#define CTRL_NVSWITCH_GET_SYS_INFO 0x58
|
||||
#define CTRL_NVSWITCH_GET_TIME_INFO 0x59
|
||||
#define CTRL_NVSWITCH_GET_TEMP_DATA 0x60
|
||||
#define CTRL_NVSWITCH_GET_TEMP_SAMPLES 0x61
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
@ -158,6 +158,7 @@
|
||||
_op(NvlStatus, nvswitch_bbx_unload, (nvswitch_device *device), _arch) \
|
||||
_op(NvlStatus, nvswitch_bbx_load, (nvswitch_device *device, NvU64 time_ns, NvU8 osType, NvU32 osVersion), _arch) \
|
||||
_op(NvlStatus, nvswitch_bbx_get_sxid, (nvswitch_device *device, NVSWITCH_GET_SXIDS_PARAMS * params), _arch) \
|
||||
_op(NvlStatus, nvswitch_bbx_get_data, (nvswitch_device *device, NvU8 dataType, void * params), _arch) \
|
||||
_op(NvlStatus, nvswitch_smbpbi_alloc, (nvswitch_device *device), _arch) \
|
||||
_op(NvlStatus, nvswitch_smbpbi_post_init_hal, (nvswitch_device *device), _arch) \
|
||||
_op(void, nvswitch_smbpbi_destroy_hal, (nvswitch_device *device), _arch) \
|
||||
@ -213,6 +214,7 @@
|
||||
_op(void, nvswitch_reset_persistent_link_hw_state, (nvswitch_device *device, NvU32 linkNumber), _arch)\
|
||||
_op(void, nvswitch_store_topology_information, (nvswitch_device *device, nvlink_link *link), _arch) \
|
||||
_op(void, nvswitch_init_lpwr_regs, (nvlink_link *link), _arch) \
|
||||
_op(void, nvswitch_program_l1_scratch_reg, (nvswitch_device *device, NvU32 linkNumber), _arch) \
|
||||
_op(NvlStatus, nvswitch_set_training_mode, (nvswitch_device *device), _arch) \
|
||||
_op(NvU32, nvswitch_get_sublink_width, (nvswitch_device *device, NvU32 linkNumber), _arch) \
|
||||
_op(NvBool, nvswitch_i2c_is_device_access_allowed, (nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead), _arch) \
|
||||
@ -234,6 +236,7 @@
|
||||
_op(NvlStatus, nvswitch_ctrl_therm_read_power, (nvswitch_device *device, NVSWITCH_GET_POWER_PARAMS *info), _arch) \
|
||||
_op(NvBool, nvswitch_does_link_need_termination_enabled, (nvswitch_device *device, nvlink_link *link), _arch) \
|
||||
_op(NvlStatus, nvswitch_link_termination_setup, (nvswitch_device *device, nvlink_link *link), _arch) \
|
||||
_op(NvlStatus, nvswitch_check_io_sanity, (nvswitch_device *device), _arch) \
|
||||
|
||||
#define NVSWITCH_HAL_FUNCTION_LIST_LS10(_op, _arch) \
|
||||
_op(NvlStatus, nvswitch_launch_ALI, (nvswitch_device *device), _arch) \
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -184,6 +184,7 @@ NvlStatus nvswitch_inforom_bbx_add_sxid(nvswitch_device *device,
|
||||
NvU32 data1, NvU32 data2);
|
||||
NvlStatus nvswitch_inforom_bbx_get_sxid(nvswitch_device *device,
|
||||
NVSWITCH_GET_SXIDS_PARAMS *params);
|
||||
NvlStatus nvswitch_inforom_bbx_get_data(nvswitch_device *device, NvU8 dataType, void *params);
|
||||
|
||||
// InfoROM DEM APIs
|
||||
NvlStatus nvswitch_inforom_dem_load(nvswitch_device *device);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -169,4 +169,12 @@ nvswitch_bbx_get_sxid_lr10
|
||||
NVSWITCH_GET_SXIDS_PARAMS * params
|
||||
);
|
||||
|
||||
NvlStatus
|
||||
nvswitch_bbx_get_data_lr10
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU8 dataType,
|
||||
void *params
|
||||
);
|
||||
|
||||
#endif //_INFOROM_LR10_H_
|
||||
|
@ -583,9 +583,12 @@ typedef struct
|
||||
NvBool bDisabledRemoteEndLinkMaskCached;
|
||||
} lr10_device;
|
||||
|
||||
#define NVSWITCH_NUM_DEVICES_PER_DELTA_LR10 6
|
||||
|
||||
typedef struct {
|
||||
NvU32 switchPhysicalId;
|
||||
NvU64 linkMask;
|
||||
NvU64 accessLinkMask;
|
||||
NvU64 trunkLinkMask;
|
||||
} lr10_links_connected_to_disabled_remote_end;
|
||||
|
||||
#define NVSWITCH_GET_CHIP_DEVICE_LR10(_device) \
|
||||
@ -649,6 +652,7 @@ void nvswitch_setup_link_loopback_mode_lr10(nvswitch_device *device, NvU32
|
||||
void nvswitch_reset_persistent_link_hw_state_lr10(nvswitch_device *device, NvU32 linkNumber);
|
||||
void nvswitch_store_topology_information_lr10(nvswitch_device *device, nvlink_link *link);
|
||||
void nvswitch_init_lpwr_regs_lr10(nvlink_link *link);
|
||||
void nvswitch_program_l1_scratch_reg_lr10(nvswitch_device *device, NvU32 linkNumber);
|
||||
NvlStatus nvswitch_set_training_mode_lr10(nvswitch_device *device);
|
||||
NvBool nvswitch_i2c_is_device_access_allowed_lr10(nvswitch_device *device, NvU32 port, NvU8 addr, NvBool bIsRead);
|
||||
NvU32 nvswitch_get_sublink_width_lr10(nvswitch_device *device,NvU32 linkNumber);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -154,4 +154,11 @@ nvswitch_bbx_get_sxid_ls10
|
||||
NVSWITCH_GET_SXIDS_PARAMS * params
|
||||
);
|
||||
|
||||
NvlStatus
|
||||
nvswitch_bbx_get_data_ls10
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU8 dataType,
|
||||
void *params
|
||||
);
|
||||
#endif //_INFOROM_LS10_H_
|
||||
|
@ -529,10 +529,20 @@ typedef struct
|
||||
{
|
||||
NvBool bLinkErrorsCallBackEnabled;
|
||||
NvBool bLinkStateCallBackEnabled;
|
||||
NvBool bResetAndDrainRetry;
|
||||
NvU64 lastRetrainTime;
|
||||
NvU64 lastLinkUpTime;
|
||||
} NVLINK_LINK_ERROR_REPORTING_STATE;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NVLINK_LINK_ERROR_INFO_ERR_MASKS fatalIntrMask;
|
||||
NVLINK_LINK_ERROR_INFO_ERR_MASKS nonFatalIntrMask;
|
||||
} NVLINK_LINK_ERROR_REPORTING_DATA;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
NVLINK_LINK_ERROR_REPORTING_STATE state;
|
||||
NVLINK_LINK_ERROR_REPORTING_DATA data;
|
||||
} NVLINK_LINK_ERROR_REPORTING;
|
||||
|
||||
typedef struct
|
||||
@ -834,7 +844,6 @@ typedef const struct
|
||||
#define nvswitch_setup_link_loopback_mode_ls10 nvswitch_setup_link_loopback_mode_lr10
|
||||
|
||||
#define nvswitch_link_lane_reversed_ls10 nvswitch_link_lane_reversed_lr10
|
||||
#define nvswitch_request_tl_link_state_ls10 nvswitch_request_tl_link_state_lr10
|
||||
|
||||
#define nvswitch_i2c_get_port_info_ls10 nvswitch_i2c_get_port_info_lr10
|
||||
#define nvswitch_i2c_set_hw_speed_mode_ls10 nvswitch_i2c_set_hw_speed_mode_lr10
|
||||
@ -929,6 +938,7 @@ void nvswitch_corelib_clear_link_state_lr10(nvlink_link *link);
|
||||
NvlStatus nvswitch_corelib_set_dl_link_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
|
||||
NvlStatus nvswitch_corelib_set_tx_mode_ls10(nvlink_link *link, NvU64 mode, NvU32 flags);
|
||||
void nvswitch_init_lpwr_regs_ls10(nvlink_link *link);
|
||||
void nvswitch_program_l1_scratch_reg_ls10(nvswitch_device *device, NvU32 linkNumber);
|
||||
|
||||
NvlStatus nvswitch_minion_service_falcon_interrupts_ls10(nvswitch_device *device, NvU32 instance);
|
||||
|
||||
@ -986,6 +996,7 @@ NvlStatus nvswitch_reset_and_drain_links_ls10(nvswitch_device *device, NvU64 lin
|
||||
void nvswitch_service_minion_all_links_ls10(nvswitch_device *device);
|
||||
NvlStatus nvswitch_ctrl_get_board_part_number_ls10(nvswitch_device *device, NVSWITCH_GET_BOARD_PART_NUMBER_VECTOR *p);
|
||||
void nvswitch_create_deferred_link_state_check_task_ls10(nvswitch_device *device, NvU32 nvlipt_instance, NvU32 link);
|
||||
NvlStatus nvswitch_request_tl_link_state_ls10(nvlink_link *link, NvU32 tlLinkState, NvBool bSync);
|
||||
|
||||
//
|
||||
// SU generated functions
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -46,6 +46,9 @@ typedef enum _MINION_STATUS
|
||||
MINION_ALARM_BUSY = 80,
|
||||
} MINION_STATUS;
|
||||
|
||||
#define LINKSTATUS_RESET 0x0
|
||||
#define LINKSTATUS_UNINIT 0x1
|
||||
#define LINKSTATUS_LANESHUTDOWN 0x13
|
||||
#define LINKSTATUS_EMERGENCY_SHUTDOWN 0x29
|
||||
#define LINKSTATUS_INITPHASE1 0x24
|
||||
#define LINKSTATUS_ACTIVE_PENDING 0x25
|
||||
#endif // _MINION_NVLINK_DEFINES_PUBLIC_H_
|
||||
|
@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
|
||||
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
|
||||
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
|
||||
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
|
||||
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
|
||||
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
|
||||
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
|
||||
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
|
||||
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
|
||||
@ -1370,7 +1370,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
|
||||
0xb232f900, 0xbdb2b2a1, 0x3ef00304, 0xbf00a6f0, 0x01009019, 0x93a61ab2, 0x0a090df4, 0xa6f73e03,
|
||||
0xf493a600, 0x020a091b, 0x00a6f73e, 0x00a6aa7e, 0x08f402a6, 0xfba4bddd, 0xf830f431, 0x0005dcdf,
|
||||
0xbf82f900, 0x0149feff, 0xb2289990, 0xb29fa0a3, 0x00a9b3b8, 0xb0b30084, 0x47fe7f00, 0x05a49801,
|
||||
0x54bd24bd, 0x779014bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00,
|
||||
0x14bd54bd, 0x779024bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00,
|
||||
0xf49fa6ff, 0x643d090b, 0x00a74f3e, 0x90015590, 0x04a60100, 0x33d908f4, 0x90070060, 0x24bc0111,
|
||||
0x03399820, 0x18f429a6, 0xbd01060b, 0xa7523e04, 0xb24bb200, 0x16fc7e1a, 0xf45aa600, 0x1190060d,
|
||||
0x06399801, 0x19a6f43d, 0x0f050cf4, 0xbd8f2001, 0xa7973ea4, 0xfe020a00, 0x99900149, 0xd99fbf28,
|
||||
@ -1420,7 +1420,7 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
|
||||
0x070b943a, 0xb200804c, 0xb7797e2d, 0x0ca1b000, 0xb600adb3, 0x05291801, 0x76042f18, 0xf4f00894,
|
||||
0xe59fffff, 0xe966ff09, 0x01980bf5, 0xffffe9e4, 0x08f589a6, 0xf4bd018e, 0x18902fbc, 0x9d330999,
|
||||
0x90018200, 0xf4b301ff, 0xfc3ef207, 0x8e3c00ae, 0xf59f26f2, 0xc4016d08, 0x94f0fffd, 0x529dbcff,
|
||||
0x0df456a6, 0x9065b205, 0xe4bd10d9, 0x3db029bc, 0x3ec43da4, 0xb100ada7, 0xf5006fd6, 0xb401450c,
|
||||
0x0df456a6, 0x9065b205, 0xa43d10d9, 0x3db029bc, 0x3ee4bdc4, 0xb100ada7, 0xf5006fd6, 0xb401450c,
|
||||
0xbe3c0b10, 0xf81e3c98, 0x0bf4f926, 0xff94f017, 0xfd009939, 0x9033049f, 0x010a0600, 0x0ce9bf3c,
|
||||
0x01ee9001, 0xa601dd90, 0xce08f4e5, 0xed00c933, 0xf0293f00, 0x0bf40894, 0x00a93308, 0x94bd00d0,
|
||||
0x91b03ab2, 0x1391b014, 0x301291b0, 0x4bfe5b91, 0x5bbb9001, 0x00a6f97e, 0xadb3a0b2, 0x3400ef00,
|
||||
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_dbg[] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
|
||||
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
|
||||
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x6073f3d9, 0x573ea3ef, 0xf0764322, 0xf8dacef7,
|
||||
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0xe0830635, 0xb9c7326b, 0x27f96395, 0x7078f754,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
|
@ -751,7 +751,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
|
||||
0x00f0b305, 0x0a09584a, 0x90014afe, 0xafb508aa, 0x010f9801, 0xb60093f0, 0xa9b50294, 0x02afb503,
|
||||
0xb2100918, 0x18a9351b, 0xb5020f98, 0x099804af, 0x05a9b503, 0xa0a000bf, 0x005b0b7e, 0xf001a6b0,
|
||||
0x9a120b9c, 0x59ab3e01, 0xfb020a00, 0xe27e1c15, 0x943d0059, 0xf001a6b0, 0xa6f00bac, 0xa29a3c01,
|
||||
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a09f8,
|
||||
0x548900f8, 0x9ebf0005, 0xb5019f98, 0x9ea0019f, 0x005a267e, 0x0801a4b3, 0x00f8a43d, 0xff0a02f8,
|
||||
0x12f900f8, 0x000f8c89, 0xf20a99bf, 0x380090b3, 0x000fa881, 0xf10a10bf, 0x2c0004b3, 0x000a747e,
|
||||
0x19a00109, 0x000f9889, 0x948990a0, 0xff0f0010, 0x90899fa0, 0x90a0000f, 0x000f9489, 0x587e9fa0,
|
||||
0x10a00037, 0x12f911fb, 0x000f8c89, 0xb4bd04bd, 0xb44c90a0, 0x0fac8a00, 0x0b947e00, 0x0cb4bd00,
|
||||
@ -1370,7 +1370,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
|
||||
0xb232f900, 0xbdb2b2a1, 0x3ef00304, 0xbf00a6f0, 0x01009019, 0x93a61ab2, 0x0a090df4, 0xa6f73e03,
|
||||
0xf493a600, 0x020a091b, 0x00a6f73e, 0x00a6aa7e, 0x08f402a6, 0xfba4bddd, 0xf830f431, 0x0005dcdf,
|
||||
0xbf82f900, 0x0149feff, 0xb2289990, 0xb29fa0a3, 0x00a9b3b8, 0xb0b30084, 0x47fe7f00, 0x05a49801,
|
||||
0x54bd24bd, 0x779014bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00,
|
||||
0x14bd54bd, 0x779024bd, 0xa7613e24, 0x0c3a9800, 0x02bc94bd, 0xb279a0b0, 0xb65f7e7c, 0x0f79bf00,
|
||||
0xf49fa6ff, 0x643d090b, 0x00a74f3e, 0x90015590, 0x04a60100, 0x33d908f4, 0x90070060, 0x24bc0111,
|
||||
0x03399820, 0x18f429a6, 0xbd01060b, 0xa7523e04, 0xb24bb200, 0x16fc7e1a, 0xf45aa600, 0x1190060d,
|
||||
0x06399801, 0x19a6f43d, 0x0f050cf4, 0xbd8f2001, 0xa7973ea4, 0xfe020a00, 0x99900149, 0xd99fbf28,
|
||||
@ -1420,7 +1420,7 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
|
||||
0x070b943a, 0xb200804c, 0xb7797e2d, 0x0ca1b000, 0xb600adb3, 0x05291801, 0x76042f18, 0xf4f00894,
|
||||
0xe59fffff, 0xe966ff09, 0x01980bf5, 0xffffe9e4, 0x08f589a6, 0xf4bd018e, 0x18902fbc, 0x9d330999,
|
||||
0x90018200, 0xf4b301ff, 0xfc3ef207, 0x8e3c00ae, 0xf59f26f2, 0xc4016d08, 0x94f0fffd, 0x529dbcff,
|
||||
0x0df456a6, 0x9065b205, 0xe4bd10d9, 0x3db029bc, 0x3ec43da4, 0xb100ada7, 0xf5006fd6, 0xb401450c,
|
||||
0x0df456a6, 0x9065b205, 0xa43d10d9, 0x3db029bc, 0x3ee4bdc4, 0xb100ada7, 0xf5006fd6, 0xb401450c,
|
||||
0xbe3c0b10, 0xf81e3c98, 0x0bf4f926, 0xff94f017, 0xfd009939, 0x9033049f, 0x010a0600, 0x0ce9bf3c,
|
||||
0x01ee9001, 0xa601dd90, 0xce08f4e5, 0xed00c933, 0xf0293f00, 0x0bf40894, 0x00a93308, 0x94bd00d0,
|
||||
0x91b03ab2, 0x1391b014, 0x301291b0, 0x4bfe5b91, 0x5bbb9001, 0x00a6f97e, 0xadb3a0b2, 0x3400ef00,
|
||||
@ -2269,8 +2269,8 @@ const NvU32 soe_ucode_data_lr10_prd[] = {
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x979b9cb7, 0x7359186e, 0x8b211603, 0x878da8fe,
|
||||
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0x269562e0, 0x626d8a06, 0xc3df044b, 0x11ecee8e,
|
||||
0xf0cc97fc, 0xc5e27e17, 0x63cc4ffc, 0xc48564fa, 0x6073f3d9, 0x573ea3ef, 0xf0764322, 0xf8dacef7,
|
||||
0x956b7a40, 0x90bcaaf7, 0xdea25edb, 0x9aaef423, 0xe0830635, 0xb9c7326b, 0x27f96395, 0x7078f754,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -130,3 +130,21 @@ nvswitch_inforom_bbx_get_sxid
|
||||
return status;
|
||||
}
|
||||
|
||||
NvlStatus
|
||||
nvswitch_inforom_bbx_get_data
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU8 dataType,
|
||||
void *params
|
||||
)
|
||||
{
|
||||
NvlStatus status;
|
||||
|
||||
status = device->hal.nvswitch_bbx_get_data(device, dataType, params);
|
||||
if (status != NVL_SUCCESS)
|
||||
{
|
||||
NVSWITCH_PRINT(device, ERROR, "%s: (type=%d) failed, status=%d\n", __FUNCTION__, dataType, status);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2020 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2019-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -32,6 +32,7 @@
|
||||
#include "nvVer.h"
|
||||
#include "regkey_nvswitch.h"
|
||||
#include "inforom/inforom_nvl_v3_nvswitch.h"
|
||||
#include "soe/soeififr.h"
|
||||
|
||||
//
|
||||
// TODO: Split individual object hals to their own respective files
|
||||
@ -1280,3 +1281,14 @@ nvswitch_bbx_get_sxid_lr10
|
||||
return -NVL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
NvlStatus
|
||||
nvswitch_bbx_get_data_lr10
|
||||
(
|
||||
nvswitch_device *device,
|
||||
NvU8 dataType,
|
||||
void *params
|
||||
)
|
||||
{
|
||||
return -NVL_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user