mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-01-20 06:52:11 +01:00
550.67
This commit is contained in:
parent
12933b2d3c
commit
3bf16b890c
@ -2,6 +2,8 @@
|
|||||||
|
|
||||||
## Release 550 Entries
|
## Release 550 Entries
|
||||||
|
|
||||||
|
### [550.67] 2024-03-19
|
||||||
|
|
||||||
### [550.54.15] 2024-03-18
|
### [550.54.15] 2024-03-18
|
||||||
|
|
||||||
### [550.54.14] 2024-02-23
|
### [550.54.14] 2024-02-23
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
# NVIDIA Linux Open GPU Kernel Module Source
|
# NVIDIA Linux Open GPU Kernel Module Source
|
||||||
|
|
||||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||||
version 550.54.15.
|
version 550.67.
|
||||||
|
|
||||||
|
|
||||||
## How to Build
|
## How to Build
|
||||||
@ -17,7 +17,7 @@ as root:
|
|||||||
|
|
||||||
Note that the kernel modules built here must be used with GSP
|
Note that the kernel modules built here must be used with GSP
|
||||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||||
550.54.15 driver release. This can be achieved by installing
|
550.67 driver release. This can be achieved by installing
|
||||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||||
option. E.g.,
|
option. E.g.,
|
||||||
|
|
||||||
@ -188,7 +188,7 @@ encountered specific to them.
|
|||||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||||
end user README here:
|
end user README here:
|
||||||
|
|
||||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.54.15/README/kernel_open.html
|
https://us.download.nvidia.com/XFree86/Linux-x86_64/550.67/README/kernel_open.html
|
||||||
|
|
||||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||||
Package for more details.
|
Package for more details.
|
||||||
@ -867,6 +867,7 @@ Subsystem Device ID.
|
|||||||
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
|
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
|
||||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
|
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
|
||||||
|
| NVIDIA GeForce RTX 4070 | 2709 |
|
||||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||||
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
|
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
|
||||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||||
@ -874,6 +875,7 @@ Subsystem Device ID.
|
|||||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||||
| NVIDIA GeForce RTX 4070 SUPER | 2783 |
|
| NVIDIA GeForce RTX 4070 SUPER | 2783 |
|
||||||
| NVIDIA GeForce RTX 4070 | 2786 |
|
| NVIDIA GeForce RTX 4070 | 2786 |
|
||||||
|
| NVIDIA GeForce RTX 4060 Ti | 2788 |
|
||||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||||
@ -896,6 +898,7 @@ Subsystem Device ID.
|
|||||||
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
|
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
|
||||||
| NVIDIA GeForce RTX 4060 Ti | 2803 |
|
| NVIDIA GeForce RTX 4060 Ti | 2803 |
|
||||||
| NVIDIA GeForce RTX 4060 Ti | 2805 |
|
| NVIDIA GeForce RTX 4060 Ti | 2805 |
|
||||||
|
| NVIDIA GeForce RTX 4060 | 2808 |
|
||||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||||
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
|
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
|
||||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||||
|
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
|||||||
EXTRA_CFLAGS += -I$(src)
|
EXTRA_CFLAGS += -I$(src)
|
||||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.54.15\"
|
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"550.67\"
|
||||||
|
|
||||||
ifneq ($(SYSSRCHOST1X),)
|
ifneq ($(SYSSRCHOST1X),)
|
||||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||||
@ -170,6 +170,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
|
|||||||
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
|
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
|
||||||
|
|
||||||
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
|
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
|
||||||
|
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
|
||||||
|
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
|
||||||
NV_CONFTEST_CFLAGS += -Wno-error
|
NV_CONFTEST_CFLAGS += -Wno-error
|
||||||
|
|
||||||
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
|
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
@ -1989,31 +1989,6 @@ static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
|
|||||||
return nvl->numa_info.use_auto_online;
|
return nvl->numa_info.use_auto_online;
|
||||||
}
|
}
|
||||||
|
|
||||||
typedef struct {
|
|
||||||
NvU64 base;
|
|
||||||
NvU64 size;
|
|
||||||
NvU32 nodeId;
|
|
||||||
int ret;
|
|
||||||
} remove_numa_memory_info_t;
|
|
||||||
|
|
||||||
static void offline_numa_memory_callback
|
|
||||||
(
|
|
||||||
void *args
|
|
||||||
)
|
|
||||||
{
|
|
||||||
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
|
||||||
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
|
|
||||||
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
|
||||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
|
|
||||||
pNumaInfo->base,
|
|
||||||
pNumaInfo->size);
|
|
||||||
#else
|
|
||||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
|
|
||||||
pNumaInfo->size);
|
|
||||||
#endif
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
NV_NUMA_STATUS_DISABLED = 0,
|
NV_NUMA_STATUS_DISABLED = 0,
|
||||||
|
@ -3096,6 +3096,22 @@ compile_test() {
|
|||||||
|
|
||||||
;;
|
;;
|
||||||
|
|
||||||
|
foll_longterm_present)
|
||||||
|
#
|
||||||
|
# Determine if FOLL_LONGTERM enum is present or not
|
||||||
|
#
|
||||||
|
# Added by commit 932f4a630a69 ("mm/gup: replace
|
||||||
|
# get_user_pages_longterm() with FOLL_LONGTERM") in
|
||||||
|
# v5.2
|
||||||
|
#
|
||||||
|
CODE="
|
||||||
|
#include <linux/mm.h>
|
||||||
|
int foll_longterm = FOLL_LONGTERM;
|
||||||
|
"
|
||||||
|
|
||||||
|
compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types"
|
||||||
|
;;
|
||||||
|
|
||||||
vfio_pin_pages_has_vfio_device_arg)
|
vfio_pin_pages_has_vfio_device_arg)
|
||||||
#
|
#
|
||||||
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
|
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
|
||||||
@ -5152,11 +5168,15 @@ compile_test() {
|
|||||||
# commit 49a3f51dfeee ("drm/gem: Use struct dma_buf_map in GEM
|
# commit 49a3f51dfeee ("drm/gem: Use struct dma_buf_map in GEM
|
||||||
# vmap ops and convert GEM backends") in v5.11.
|
# vmap ops and convert GEM backends") in v5.11.
|
||||||
#
|
#
|
||||||
|
# Note that the 'map' argument type is changed from 'struct dma_buf_map'
|
||||||
|
# to 'struct iosys_map' by commit 7938f4218168 ("dma-buf-map: Rename
|
||||||
|
# to iosys-map) in v5.18.
|
||||||
|
#
|
||||||
CODE="
|
CODE="
|
||||||
#include <drm/drm_gem.h>
|
#include <drm/drm_gem.h>
|
||||||
int conftest_drm_gem_object_vmap_has_map_arg(
|
int conftest_drm_gem_object_vmap_has_map_arg(
|
||||||
struct drm_gem_object *obj, struct dma_buf_map *map) {
|
struct drm_gem_object *obj) {
|
||||||
return obj->funcs->vmap(obj, map);
|
return obj->funcs->vmap(obj, NULL);
|
||||||
}"
|
}"
|
||||||
|
|
||||||
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"
|
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"
|
||||||
|
@ -1903,8 +1903,33 @@ void nv_drm_remove_devices(void)
|
|||||||
*/
|
*/
|
||||||
void nv_drm_suspend_resume(NvBool suspend)
|
void nv_drm_suspend_resume(NvBool suspend)
|
||||||
{
|
{
|
||||||
|
static DEFINE_MUTEX(nv_drm_suspend_mutex);
|
||||||
|
static NvU32 nv_drm_suspend_count = 0;
|
||||||
|
struct nv_drm_device *nv_dev;
|
||||||
|
|
||||||
|
mutex_lock(&nv_drm_suspend_mutex);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Count the number of times the driver is asked to suspend. Suspend all DRM
|
||||||
|
* devices on the first suspend call and resume them on the last resume
|
||||||
|
* call. This is necessary because the kernel may call nvkms_suspend()
|
||||||
|
* simultaneously for each GPU, but NVKMS itself also suspends all GPUs on
|
||||||
|
* the first call.
|
||||||
|
*/
|
||||||
|
if (suspend) {
|
||||||
|
if (nv_drm_suspend_count++ > 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
BUG_ON(nv_drm_suspend_count == 0);
|
||||||
|
|
||||||
|
if (--nv_drm_suspend_count > 0) {
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||||
struct nv_drm_device *nv_dev = dev_list;
|
nv_dev = dev_list;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* NVKMS shuts down all heads on suspend. Update DRM state accordingly.
|
* NVKMS shuts down all heads on suspend. Update DRM state accordingly.
|
||||||
@ -1930,6 +1955,9 @@ void nv_drm_suspend_resume(NvBool suspend)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
#endif /* NV_DRM_ATOMIC_MODESET_AVAILABLE */
|
||||||
|
|
||||||
|
done:
|
||||||
|
mutex_unlock(&nv_drm_suspend_mutex);
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif /* NV_DRM_AVAILABLE */
|
#endif /* NV_DRM_AVAILABLE */
|
||||||
|
@ -56,7 +56,11 @@
|
|||||||
#include "nv-lock.h"
|
#include "nv-lock.h"
|
||||||
#include "nv-chardev-numbers.h"
|
#include "nv-chardev-numbers.h"
|
||||||
|
|
||||||
#if !defined(CONFIG_RETPOLINE)
|
/*
|
||||||
|
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
|
||||||
|
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
|
||||||
|
*/
|
||||||
|
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
|
||||||
#include "nv-retpoline.h"
|
#include "nv-retpoline.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@ -499,8 +503,9 @@ nvkms_event_queue_changed(nvkms_per_open_handle_t *pOpenKernel,
|
|||||||
|
|
||||||
static void nvkms_suspend(NvU32 gpuId)
|
static void nvkms_suspend(NvU32 gpuId)
|
||||||
{
|
{
|
||||||
if (gpuId == 0) {
|
|
||||||
nvKmsKapiSuspendResume(NV_TRUE /* suspend */);
|
nvKmsKapiSuspendResume(NV_TRUE /* suspend */);
|
||||||
|
|
||||||
|
if (gpuId == 0) {
|
||||||
nvkms_write_lock_pm_lock();
|
nvkms_write_lock_pm_lock();
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -517,8 +522,9 @@ static void nvkms_resume(NvU32 gpuId)
|
|||||||
|
|
||||||
if (gpuId == 0) {
|
if (gpuId == 0) {
|
||||||
nvkms_write_unlock_pm_lock();
|
nvkms_write_unlock_pm_lock();
|
||||||
nvKmsKapiSuspendResume(NV_FALSE /* suspend */);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
nvKmsKapiSuspendResume(NV_FALSE /* suspend */);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -691,13 +691,17 @@ static NV_STATUS stress_test_all_gpus_in_va(uvm_va_space_t *va_space,
|
|||||||
if (uvm_test_rng_range_32(&rng, 0, 1) == 0) {
|
if (uvm_test_rng_range_32(&rng, 0, 1) == 0) {
|
||||||
NvU32 random_stream_index = uvm_test_rng_range_32(&rng, 0, num_streams - 1);
|
NvU32 random_stream_index = uvm_test_rng_range_32(&rng, 0, num_streams - 1);
|
||||||
uvm_test_stream_t *random_stream = &streams[random_stream_index];
|
uvm_test_stream_t *random_stream = &streams[random_stream_index];
|
||||||
|
|
||||||
|
if ((random_stream->push.gpu == gpu) || uvm_push_allow_dependencies_across_gpus()) {
|
||||||
uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
|
uvm_push_acquire_tracker(&stream->push, &random_stream->tracker);
|
||||||
|
|
||||||
snapshot_counter(&stream->push,
|
snapshot_counter(&stream->push,
|
||||||
random_stream->counter_mem,
|
random_stream->counter_mem,
|
||||||
stream->other_stream_counter_snapshots_mem,
|
stream->other_stream_counter_snapshots_mem,
|
||||||
i,
|
i,
|
||||||
random_stream->queued_counter_repeat);
|
random_stream->queued_counter_repeat);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
uvm_push_end(&stream->push);
|
uvm_push_end(&stream->push);
|
||||||
uvm_tracker_clear(&stream->tracker);
|
uvm_tracker_clear(&stream->tracker);
|
||||||
|
@ -51,8 +51,10 @@ NV_STATUS uvm_test_fault_buffer_flush(UVM_TEST_FAULT_BUFFER_FLUSH_PARAMS *params
|
|||||||
|
|
||||||
uvm_va_space_up_read(va_space);
|
uvm_va_space_up_read(va_space);
|
||||||
|
|
||||||
if (uvm_processor_mask_empty(retained_gpus))
|
if (uvm_processor_mask_empty(retained_gpus)) {
|
||||||
return NV_ERR_INVALID_DEVICE;
|
status = NV_ERR_INVALID_DEVICE;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
for (i = 0; i < params->iterations; i++) {
|
for (i = 0; i < params->iterations; i++) {
|
||||||
if (fatal_signal_pending(current)) {
|
if (fatal_signal_pending(current)) {
|
||||||
|
@ -409,4 +409,10 @@ NV_STATUS uvm_service_block_context_init(void);
|
|||||||
// Release fault service contexts if any exist.
|
// Release fault service contexts if any exist.
|
||||||
void uvm_service_block_context_exit(void);
|
void uvm_service_block_context_exit(void);
|
||||||
|
|
||||||
|
// Allocate a service block context
|
||||||
|
uvm_service_block_context_t *uvm_service_block_context_alloc(struct mm_struct *mm);
|
||||||
|
|
||||||
|
// Free a servic block context
|
||||||
|
void uvm_service_block_context_free(uvm_service_block_context_t *service_context);
|
||||||
|
|
||||||
#endif // __UVM_GLOBAL_H__
|
#endif // __UVM_GLOBAL_H__
|
||||||
|
@ -160,6 +160,10 @@ struct uvm_service_block_context_struct
|
|||||||
// Pages whose permissions need to be revoked from other processors
|
// Pages whose permissions need to be revoked from other processors
|
||||||
uvm_page_mask_t revocation_mask;
|
uvm_page_mask_t revocation_mask;
|
||||||
|
|
||||||
|
// Temporary mask used in service_va_block_locked() in
|
||||||
|
// uvm_gpu_access_counters.c.
|
||||||
|
uvm_processor_mask_t update_processors;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
// Per-processor mask with the pages that will be resident after
|
// Per-processor mask with the pages that will be resident after
|
||||||
@ -593,16 +597,21 @@ typedef enum
|
|||||||
UVM_GPU_LINK_MAX
|
UVM_GPU_LINK_MAX
|
||||||
} uvm_gpu_link_type_t;
|
} uvm_gpu_link_type_t;
|
||||||
|
|
||||||
// UVM does not support P2P copies on pre-Pascal GPUs. Pascal+ GPUs only
|
|
||||||
// support virtual addresses in P2P copies. Therefore, a peer identity mapping
|
|
||||||
// needs to be created.
|
|
||||||
// Ampere+ GPUs support physical peer copies, too, so identity mappings are not
|
|
||||||
// needed
|
|
||||||
typedef enum
|
typedef enum
|
||||||
{
|
{
|
||||||
|
// Peer copies can be disallowed for a variety of reasons. For example,
|
||||||
|
// P2P transfers are disabled in pre-Pascal GPUs because there is no
|
||||||
|
// compelling use case for direct peer migrations.
|
||||||
UVM_GPU_PEER_COPY_MODE_UNSUPPORTED,
|
UVM_GPU_PEER_COPY_MODE_UNSUPPORTED,
|
||||||
|
|
||||||
|
// Pascal+ GPUs support virtual addresses in P2P copies. Virtual peer copies
|
||||||
|
// require the creation of peer identity mappings.
|
||||||
UVM_GPU_PEER_COPY_MODE_VIRTUAL,
|
UVM_GPU_PEER_COPY_MODE_VIRTUAL,
|
||||||
|
|
||||||
|
// Ampere+ GPUs support virtual and physical peer copies. Physical peer
|
||||||
|
// copies do not depend on peer identity mappings.
|
||||||
UVM_GPU_PEER_COPY_MODE_PHYSICAL,
|
UVM_GPU_PEER_COPY_MODE_PHYSICAL,
|
||||||
|
|
||||||
UVM_GPU_PEER_COPY_MODE_COUNT
|
UVM_GPU_PEER_COPY_MODE_COUNT
|
||||||
} uvm_gpu_peer_copy_mode_t;
|
} uvm_gpu_peer_copy_mode_t;
|
||||||
|
|
||||||
|
@ -1087,12 +1087,12 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
|||||||
// pages to be serviced
|
// pages to be serviced
|
||||||
if (page_count > 0) {
|
if (page_count > 0) {
|
||||||
uvm_processor_id_t id;
|
uvm_processor_id_t id;
|
||||||
uvm_processor_mask_t update_processors;
|
uvm_processor_mask_t *update_processors = &service_context->update_processors;
|
||||||
|
|
||||||
uvm_processor_mask_and(&update_processors, &va_block->resident, &service_context->resident_processors);
|
uvm_processor_mask_and(update_processors, &va_block->resident, &service_context->resident_processors);
|
||||||
|
|
||||||
// Remove pages that are already resident in the destination processors
|
// Remove pages that are already resident in the destination processors
|
||||||
for_each_id_in_mask(id, &update_processors) {
|
for_each_id_in_mask(id, update_processors) {
|
||||||
bool migrate_pages;
|
bool migrate_pages;
|
||||||
uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(va_block, id, NUMA_NO_NODE);
|
uvm_page_mask_t *residency_mask = uvm_va_block_resident_mask_get(va_block, id, NUMA_NO_NODE);
|
||||||
UVM_ASSERT(residency_mask);
|
UVM_ASSERT(residency_mask);
|
||||||
|
@ -357,12 +357,18 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
|
|||||||
{
|
{
|
||||||
NV_STATUS status;
|
NV_STATUS status;
|
||||||
uvm_push_t push;
|
uvm_push_t push;
|
||||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
|
||||||
|
|
||||||
|
UVM_ASSERT(tracker != NULL);
|
||||||
|
|
||||||
|
status = uvm_tracker_add_tracker_safe(tracker, replay_tracker);
|
||||||
|
if (status != NV_OK)
|
||||||
|
return status;
|
||||||
|
|
||||||
if (global_cancel) {
|
if (global_cancel) {
|
||||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||||
UVM_CHANNEL_TYPE_MEMOPS,
|
UVM_CHANNEL_TYPE_MEMOPS,
|
||||||
&replayable_faults->replay_tracker,
|
tracker,
|
||||||
&push,
|
&push,
|
||||||
"Cancel targeting instance_ptr {0x%llx:%s}\n",
|
"Cancel targeting instance_ptr {0x%llx:%s}\n",
|
||||||
instance_ptr.address,
|
instance_ptr.address,
|
||||||
@ -371,7 +377,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
|
|||||||
else {
|
else {
|
||||||
status = uvm_push_begin_acquire(gpu->channel_manager,
|
status = uvm_push_begin_acquire(gpu->channel_manager,
|
||||||
UVM_CHANNEL_TYPE_MEMOPS,
|
UVM_CHANNEL_TYPE_MEMOPS,
|
||||||
&replayable_faults->replay_tracker,
|
tracker,
|
||||||
&push,
|
&push,
|
||||||
"Cancel targeting instance_ptr {0x%llx:%s} gpc %u client %u\n",
|
"Cancel targeting instance_ptr {0x%llx:%s} gpc %u client %u\n",
|
||||||
instance_ptr.address,
|
instance_ptr.address,
|
||||||
@ -382,14 +388,12 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
|
|||||||
|
|
||||||
UVM_ASSERT(status == NV_OK);
|
UVM_ASSERT(status == NV_OK);
|
||||||
if (status != NV_OK) {
|
if (status != NV_OK) {
|
||||||
UVM_ERR_PRINT("Failed to create push and acquire replay tracker before pushing cancel: %s, GPU %s\n",
|
UVM_ERR_PRINT("Failed to create push and acquire trackers before pushing cancel: %s, GPU %s\n",
|
||||||
nvstatusToString(status),
|
nvstatusToString(status),
|
||||||
uvm_gpu_name(gpu));
|
uvm_gpu_name(gpu));
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
uvm_push_acquire_tracker(&push, tracker);
|
|
||||||
|
|
||||||
if (global_cancel)
|
if (global_cancel)
|
||||||
gpu->parent->host_hal->cancel_faults_global(&push, instance_ptr);
|
gpu->parent->host_hal->cancel_faults_global(&push, instance_ptr);
|
||||||
else
|
else
|
||||||
@ -403,7 +407,9 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
|
|||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
UVM_ERR_PRINT("Failed to wait for pushed cancel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
UVM_ERR_PRINT("Failed to wait for pushed cancel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||||
|
|
||||||
uvm_tracker_clear(&replayable_faults->replay_tracker);
|
// The cancellation is complete, so the input trackers must be complete too.
|
||||||
|
uvm_tracker_clear(tracker);
|
||||||
|
uvm_tracker_clear(replay_tracker);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -92,7 +92,7 @@ typedef struct
|
|||||||
{
|
{
|
||||||
uvm_va_block_t *va_block;
|
uvm_va_block_t *va_block;
|
||||||
uvm_va_block_retry_t *va_block_retry;
|
uvm_va_block_retry_t *va_block_retry;
|
||||||
uvm_va_block_context_t *va_block_context;
|
uvm_service_block_context_t *service_context;
|
||||||
uvm_va_block_region_t region;
|
uvm_va_block_region_t region;
|
||||||
uvm_processor_id_t dest_id;
|
uvm_processor_id_t dest_id;
|
||||||
uvm_make_resident_cause_t cause;
|
uvm_make_resident_cause_t cause;
|
||||||
@ -713,7 +713,7 @@ void uvm_hmm_migrate_finish(uvm_va_block_t *va_block)
|
|||||||
// Migrate the given range [start end] within a va_block to dest_id.
|
// Migrate the given range [start end] within a va_block to dest_id.
|
||||||
static NV_STATUS hmm_migrate_range(uvm_va_block_t *va_block,
|
static NV_STATUS hmm_migrate_range(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_retry_t *va_block_retry,
|
uvm_va_block_retry_t *va_block_retry,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
NvU64 start,
|
NvU64 start,
|
||||||
NvU64 end,
|
NvU64 end,
|
||||||
@ -737,7 +737,7 @@ static NV_STATUS hmm_migrate_range(uvm_va_block_t *va_block,
|
|||||||
va_block_retry,
|
va_block_retry,
|
||||||
uvm_va_block_migrate_locked(va_block,
|
uvm_va_block_migrate_locked(va_block,
|
||||||
va_block_retry,
|
va_block_retry,
|
||||||
va_block_context,
|
service_context,
|
||||||
region,
|
region,
|
||||||
dest_id,
|
dest_id,
|
||||||
mode,
|
mode,
|
||||||
@ -916,14 +916,14 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
|
|||||||
NvU64 end,
|
NvU64 end,
|
||||||
uvm_va_block_t **out_va_block)
|
uvm_va_block_t **out_va_block)
|
||||||
{
|
{
|
||||||
uvm_va_block_context_t *va_block_context;
|
uvm_service_block_context_t *service_context;
|
||||||
uvm_va_space_t *va_space;
|
uvm_va_space_t *va_space;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
uvm_va_block_region_t region;
|
uvm_va_block_region_t region;
|
||||||
NvU64 addr, from, to;
|
NvU64 addr, from, to;
|
||||||
uvm_va_block_t *new;
|
uvm_va_block_t *new;
|
||||||
NV_STATUS status;
|
NV_STATUS status = NV_OK;
|
||||||
|
|
||||||
if (va_block->start < start) {
|
if (va_block->start < start) {
|
||||||
status = hmm_split_block(va_block, start - 1, &new);
|
status = hmm_split_block(va_block, start - 1, &new);
|
||||||
@ -942,15 +942,18 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
|
|||||||
// Keep the right part, the left part will be deleted.
|
// Keep the right part, the left part will be deleted.
|
||||||
}
|
}
|
||||||
|
|
||||||
*out_va_block = va_block;
|
|
||||||
|
|
||||||
// Migrate any GPU data to sysmem before destroying the HMM va_block.
|
// Migrate any GPU data to sysmem before destroying the HMM va_block.
|
||||||
// We do this because the new va_range might be for a UVM external
|
// We do this because the new va_range might be for a UVM external
|
||||||
// allocation which could be converting an address range that was first
|
// allocation which could be converting an address range that was first
|
||||||
// operated on by UVM-HMM and the exteral allocation should see that data.
|
// operated on by UVM-HMM and the exteral allocation should see that data.
|
||||||
va_space = va_block->hmm.va_space;
|
va_space = va_block->hmm.va_space;
|
||||||
mm = va_space->va_space_mm.mm;
|
mm = va_space->va_space_mm.mm;
|
||||||
va_block_context = uvm_va_space_block_context(va_space, mm);
|
|
||||||
|
service_context = uvm_service_block_context_alloc(mm);
|
||||||
|
if (!service_context)
|
||||||
|
return NV_ERR_NO_MEMORY;
|
||||||
|
|
||||||
|
*out_va_block = va_block;
|
||||||
|
|
||||||
for (addr = va_block->start; addr < va_block->end; addr = to + 1) {
|
for (addr = va_block->start; addr < va_block->end; addr = to + 1) {
|
||||||
vma = find_vma_intersection(mm, addr, va_block->end);
|
vma = find_vma_intersection(mm, addr, va_block->end);
|
||||||
@ -964,21 +967,23 @@ static NV_STATUS split_block_if_needed(uvm_va_block_t *va_block,
|
|||||||
if (!uvm_hmm_vma_is_valid(vma, from, false))
|
if (!uvm_hmm_vma_is_valid(vma, from, false))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
va_block_context->hmm.vma = vma;
|
service_context->block_context->hmm.vma = vma;
|
||||||
|
|
||||||
status = hmm_migrate_range(va_block,
|
status = hmm_migrate_range(va_block,
|
||||||
NULL,
|
NULL,
|
||||||
va_block_context,
|
service_context,
|
||||||
UVM_ID_CPU,
|
UVM_ID_CPU,
|
||||||
from,
|
from,
|
||||||
to,
|
to,
|
||||||
UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP,
|
UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP,
|
||||||
NULL);
|
NULL);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NV_OK;
|
uvm_service_block_context_free(service_context);
|
||||||
|
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Normally, the HMM va_block is destroyed when the va_space is destroyed
|
// Normally, the HMM va_block is destroyed when the va_space is destroyed
|
||||||
@ -1089,12 +1094,17 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
|||||||
NvU64 end,
|
NvU64 end,
|
||||||
uvm_tracker_t *out_tracker)
|
uvm_tracker_t *out_tracker)
|
||||||
{
|
{
|
||||||
uvm_processor_mask_t set_accessed_by_processors;
|
uvm_processor_mask_t *set_accessed_by_processors;
|
||||||
const uvm_va_policy_t *old_policy;
|
const uvm_va_policy_t *old_policy;
|
||||||
uvm_va_policy_node_t *node;
|
uvm_va_policy_node_t *node;
|
||||||
uvm_va_block_region_t region;
|
uvm_va_block_region_t region;
|
||||||
uvm_processor_id_t id;
|
uvm_processor_id_t id;
|
||||||
NV_STATUS status, tracker_status;
|
NV_STATUS status = NV_OK;
|
||||||
|
NV_STATUS tracker_status;
|
||||||
|
|
||||||
|
set_accessed_by_processors = uvm_processor_mask_cache_alloc();
|
||||||
|
if (!set_accessed_by_processors)
|
||||||
|
return NV_ERR_NO_MEMORY;
|
||||||
|
|
||||||
// Note that we can't just call uvm_va_policy_set_range() for the whole
|
// Note that we can't just call uvm_va_policy_set_range() for the whole
|
||||||
// range [addr end] because we need to examine the old value of
|
// range [addr end] because we need to examine the old value of
|
||||||
@ -1107,25 +1117,27 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
|||||||
// If the old preferred location is a valid processor ID, remote
|
// If the old preferred location is a valid processor ID, remote
|
||||||
// mappings should be established to the new preferred location if
|
// mappings should be established to the new preferred location if
|
||||||
// accessed-by is set.
|
// accessed-by is set.
|
||||||
uvm_processor_mask_zero(&set_accessed_by_processors);
|
uvm_processor_mask_zero(set_accessed_by_processors);
|
||||||
|
|
||||||
if (UVM_ID_IS_VALID(old_policy->preferred_location) &&
|
if (UVM_ID_IS_VALID(old_policy->preferred_location) &&
|
||||||
uvm_processor_mask_test(&old_policy->accessed_by, old_policy->preferred_location))
|
uvm_processor_mask_test(&old_policy->accessed_by, old_policy->preferred_location))
|
||||||
uvm_processor_mask_set(&set_accessed_by_processors, old_policy->preferred_location);
|
uvm_processor_mask_set(set_accessed_by_processors, old_policy->preferred_location);
|
||||||
|
|
||||||
if (!uvm_va_policy_set_preferred_location(va_block,
|
if (!uvm_va_policy_set_preferred_location(va_block,
|
||||||
region,
|
region,
|
||||||
preferred_location,
|
preferred_location,
|
||||||
preferred_cpu_nid,
|
preferred_cpu_nid,
|
||||||
old_policy))
|
old_policy)) {
|
||||||
return NV_ERR_NO_MEMORY;
|
status = NV_ERR_NO_MEMORY;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
// Establish new remote mappings if the old preferred location had
|
// Establish new remote mappings if the old preferred location had
|
||||||
// accessed-by set.
|
// accessed-by set.
|
||||||
for_each_id_in_mask(id, &set_accessed_by_processors) {
|
for_each_id_in_mask(id, set_accessed_by_processors) {
|
||||||
status = uvm_va_block_set_accessed_by_locked(va_block, va_block_context, id, region, out_tracker);
|
status = uvm_va_block_set_accessed_by_locked(va_block, va_block_context, id, region, out_tracker);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Even though the UVM_VA_BLOCK_RETRY_LOCKED() may unlock and relock
|
// Even though the UVM_VA_BLOCK_RETRY_LOCKED() may unlock and relock
|
||||||
@ -1143,10 +1155,11 @@ static NV_STATUS hmm_set_preferred_location_locked(uvm_va_block_t *va_block,
|
|||||||
status = tracker_status;
|
status = tracker_status;
|
||||||
|
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NV_OK;
|
uvm_processor_mask_cache_free(set_accessed_by_processors);
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
NV_STATUS uvm_hmm_set_preferred_location(uvm_va_space_t *va_space,
|
||||||
@ -2128,6 +2141,7 @@ static NV_STATUS migrate_alloc_on_cpu(uvm_va_block_t *va_block,
|
|||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_context_t *devmem_fault_context)
|
static NV_STATUS uvm_hmm_devmem_fault_alloc_and_copy(uvm_hmm_devmem_fault_context_t *devmem_fault_context)
|
||||||
{
|
{
|
||||||
uvm_processor_id_t processor_id;
|
uvm_processor_id_t processor_id;
|
||||||
@ -2400,6 +2414,7 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
|||||||
{
|
{
|
||||||
uvm_va_block_region_t region = service_context->region;
|
uvm_va_block_region_t region = service_context->region;
|
||||||
struct page **pages = service_context->block_context->hmm.pages;
|
struct page **pages = service_context->block_context->hmm.pages;
|
||||||
|
struct vm_area_struct *vma = service_context->block_context->hmm.vma;
|
||||||
int npages;
|
int npages;
|
||||||
uvm_page_index_t page_index;
|
uvm_page_index_t page_index;
|
||||||
uvm_make_resident_cause_t cause;
|
uvm_make_resident_cause_t cause;
|
||||||
@ -2417,12 +2432,9 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
|||||||
else
|
else
|
||||||
cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
|
cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
|
||||||
|
|
||||||
status = uvm_hmm_va_block_migrate_locked(va_block,
|
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, vma, region));
|
||||||
va_block_retry,
|
|
||||||
service_context->block_context,
|
status = uvm_hmm_va_block_migrate_locked(va_block, va_block_retry, service_context, UVM_ID_CPU, region, cause);
|
||||||
UVM_ID_CPU,
|
|
||||||
region,
|
|
||||||
cause);
|
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
goto done;
|
goto done;
|
||||||
|
|
||||||
@ -2439,7 +2451,7 @@ static NV_STATUS hmm_block_atomic_fault_locked(uvm_processor_id_t processor_id,
|
|||||||
// mmap() files so we check for that here and report a fatal fault.
|
// mmap() files so we check for that here and report a fatal fault.
|
||||||
// Otherwise with the current Linux 6.1 make_device_exclusive_range(),
|
// Otherwise with the current Linux 6.1 make_device_exclusive_range(),
|
||||||
// it doesn't make the page exclusive and we end up in an endless loop.
|
// it doesn't make the page exclusive and we end up in an endless loop.
|
||||||
if (service_context->block_context->hmm.vma->vm_flags & (VM_SHARED | VM_HUGETLB)) {
|
if (vma->vm_flags & (VM_SHARED | VM_HUGETLB)) {
|
||||||
status = NV_ERR_NOT_SUPPORTED;
|
status = NV_ERR_NOT_SUPPORTED;
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
@ -2662,6 +2674,8 @@ static NV_STATUS dmamap_src_sysmem_pages(uvm_va_block_t *va_block,
|
|||||||
uvm_page_index_t page_index;
|
uvm_page_index_t page_index;
|
||||||
NV_STATUS status = NV_OK;
|
NV_STATUS status = NV_OK;
|
||||||
|
|
||||||
|
UVM_ASSERT(service_context);
|
||||||
|
|
||||||
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
||||||
struct page *src_page;
|
struct page *src_page;
|
||||||
|
|
||||||
@ -2966,7 +2980,7 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
|||||||
{
|
{
|
||||||
uvm_va_block_t *va_block;
|
uvm_va_block_t *va_block;
|
||||||
uvm_va_block_retry_t *va_block_retry;
|
uvm_va_block_retry_t *va_block_retry;
|
||||||
uvm_va_block_context_t *va_block_context;
|
uvm_service_block_context_t *service_context;
|
||||||
const unsigned long *src_pfns;
|
const unsigned long *src_pfns;
|
||||||
unsigned long *dst_pfns;
|
unsigned long *dst_pfns;
|
||||||
uvm_va_block_region_t region;
|
uvm_va_block_region_t region;
|
||||||
@ -2976,9 +2990,9 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
|||||||
|
|
||||||
va_block = uvm_hmm_migrate_event->va_block;
|
va_block = uvm_hmm_migrate_event->va_block;
|
||||||
va_block_retry = uvm_hmm_migrate_event->va_block_retry;
|
va_block_retry = uvm_hmm_migrate_event->va_block_retry;
|
||||||
va_block_context = uvm_hmm_migrate_event->va_block_context;
|
service_context = uvm_hmm_migrate_event->service_context;
|
||||||
src_pfns = va_block_context->hmm.src_pfns;
|
src_pfns = service_context->block_context->hmm.src_pfns;
|
||||||
dst_pfns = va_block_context->hmm.dst_pfns;
|
dst_pfns = service_context->block_context->hmm.dst_pfns;
|
||||||
region = uvm_hmm_migrate_event->region;
|
region = uvm_hmm_migrate_event->region;
|
||||||
dest_id = uvm_hmm_migrate_event->dest_id;
|
dest_id = uvm_hmm_migrate_event->dest_id;
|
||||||
page_mask = &uvm_hmm_migrate_event->page_mask;
|
page_mask = &uvm_hmm_migrate_event->page_mask;
|
||||||
@ -2994,7 +3008,7 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
|||||||
region,
|
region,
|
||||||
page_mask,
|
page_mask,
|
||||||
&uvm_hmm_migrate_event->same_devmem_page_mask,
|
&uvm_hmm_migrate_event->same_devmem_page_mask,
|
||||||
va_block_context);
|
service_context->block_context);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
status = dmamap_src_sysmem_pages(va_block,
|
status = dmamap_src_sysmem_pages(va_block,
|
||||||
@ -3004,14 +3018,15 @@ static NV_STATUS uvm_hmm_migrate_alloc_and_copy(struct vm_area_struct *vma,
|
|||||||
region,
|
region,
|
||||||
page_mask,
|
page_mask,
|
||||||
dest_id,
|
dest_id,
|
||||||
NULL);
|
service_context);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
return status;
|
||||||
|
|
||||||
status = uvm_va_block_make_resident_copy(va_block,
|
status = uvm_va_block_make_resident_copy(va_block,
|
||||||
va_block_retry,
|
va_block_retry,
|
||||||
va_block_context,
|
service_context->block_context,
|
||||||
dest_id,
|
dest_id,
|
||||||
region,
|
region,
|
||||||
page_mask,
|
page_mask,
|
||||||
@ -3050,7 +3065,7 @@ static NV_STATUS uvm_hmm_migrate_finalize(uvm_hmm_migrate_event_t *uvm_hmm_migra
|
|||||||
|
|
||||||
va_block = uvm_hmm_migrate_event->va_block;
|
va_block = uvm_hmm_migrate_event->va_block;
|
||||||
va_block_retry = uvm_hmm_migrate_event->va_block_retry;
|
va_block_retry = uvm_hmm_migrate_event->va_block_retry;
|
||||||
va_block_context = uvm_hmm_migrate_event->va_block_context;
|
va_block_context = uvm_hmm_migrate_event->service_context->block_context;
|
||||||
region = uvm_hmm_migrate_event->region;
|
region = uvm_hmm_migrate_event->region;
|
||||||
dest_id = uvm_hmm_migrate_event->dest_id;
|
dest_id = uvm_hmm_migrate_event->dest_id;
|
||||||
page_mask = &uvm_hmm_migrate_event->page_mask;
|
page_mask = &uvm_hmm_migrate_event->page_mask;
|
||||||
@ -3090,12 +3105,13 @@ static NV_STATUS uvm_hmm_migrate_finalize(uvm_hmm_migrate_event_t *uvm_hmm_migra
|
|||||||
// TODO: Bug 3900785: investigate ways to implement async migration.
|
// TODO: Bug 3900785: investigate ways to implement async migration.
|
||||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_retry_t *va_block_retry,
|
uvm_va_block_retry_t *va_block_retry,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
uvm_make_resident_cause_t cause)
|
uvm_make_resident_cause_t cause)
|
||||||
{
|
{
|
||||||
uvm_hmm_migrate_event_t uvm_hmm_migrate_event;
|
uvm_hmm_migrate_event_t uvm_hmm_migrate_event;
|
||||||
|
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||||
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
struct vm_area_struct *vma = va_block_context->hmm.vma;
|
||||||
NvU64 start;
|
NvU64 start;
|
||||||
NvU64 end;
|
NvU64 end;
|
||||||
@ -3106,6 +3122,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
|||||||
UVM_ASSERT(vma);
|
UVM_ASSERT(vma);
|
||||||
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
|
UVM_ASSERT(va_block_context->mm == vma->vm_mm);
|
||||||
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
uvm_assert_mmap_lock_locked(va_block_context->mm);
|
||||||
|
UVM_ASSERT(uvm_hmm_check_context_vma_is_valid(va_block, vma, region));
|
||||||
uvm_assert_rwsem_locked(&va_block->hmm.va_space->lock);
|
uvm_assert_rwsem_locked(&va_block->hmm.va_space->lock);
|
||||||
uvm_assert_mutex_locked(&va_block->hmm.migrate_lock);
|
uvm_assert_mutex_locked(&va_block->hmm.migrate_lock);
|
||||||
uvm_assert_mutex_locked(&va_block->lock);
|
uvm_assert_mutex_locked(&va_block->lock);
|
||||||
@ -3116,7 +3133,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
|||||||
|
|
||||||
uvm_hmm_migrate_event.va_block = va_block;
|
uvm_hmm_migrate_event.va_block = va_block;
|
||||||
uvm_hmm_migrate_event.va_block_retry = va_block_retry;
|
uvm_hmm_migrate_event.va_block_retry = va_block_retry;
|
||||||
uvm_hmm_migrate_event.va_block_context = va_block_context;
|
uvm_hmm_migrate_event.service_context = service_context;
|
||||||
uvm_hmm_migrate_event.region = region;
|
uvm_hmm_migrate_event.region = region;
|
||||||
uvm_hmm_migrate_event.dest_id = dest_id;
|
uvm_hmm_migrate_event.dest_id = dest_id;
|
||||||
uvm_hmm_migrate_event.cause = cause;
|
uvm_hmm_migrate_event.cause = cause;
|
||||||
@ -3202,7 +3219,7 @@ NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
|||||||
}
|
}
|
||||||
|
|
||||||
NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
NvU64 base,
|
NvU64 base,
|
||||||
NvU64 length,
|
NvU64 length,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
@ -3214,11 +3231,12 @@ NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
|||||||
uvm_va_block_retry_t va_block_retry;
|
uvm_va_block_retry_t va_block_retry;
|
||||||
NvU64 addr, end, last_address;
|
NvU64 addr, end, last_address;
|
||||||
NV_STATUS status = NV_OK;
|
NV_STATUS status = NV_OK;
|
||||||
|
uvm_va_block_context_t *block_context = service_context->block_context;
|
||||||
|
|
||||||
if (!uvm_hmm_is_enabled(va_space))
|
if (!uvm_hmm_is_enabled(va_space))
|
||||||
return NV_ERR_INVALID_ADDRESS;
|
return NV_ERR_INVALID_ADDRESS;
|
||||||
|
|
||||||
mm = va_block_context->mm;
|
mm = block_context->mm;
|
||||||
UVM_ASSERT(mm == va_space->va_space_mm.mm);
|
UVM_ASSERT(mm == va_space->va_space_mm.mm);
|
||||||
uvm_assert_mmap_lock_locked(mm);
|
uvm_assert_mmap_lock_locked(mm);
|
||||||
uvm_assert_rwsem_locked(&va_space->lock);
|
uvm_assert_rwsem_locked(&va_space->lock);
|
||||||
@ -3228,7 +3246,7 @@ NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
|||||||
for (addr = base; addr < last_address; addr = end + 1) {
|
for (addr = base; addr < last_address; addr = end + 1) {
|
||||||
struct vm_area_struct *vma;
|
struct vm_area_struct *vma;
|
||||||
|
|
||||||
status = hmm_va_block_find_create(va_space, addr, false, &va_block_context->hmm.vma, &va_block);
|
status = hmm_va_block_find_create(va_space, addr, false, &block_context->hmm.vma, &va_block);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
return status;
|
||||||
|
|
||||||
@ -3236,18 +3254,11 @@ NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
|||||||
if (end > last_address)
|
if (end > last_address)
|
||||||
end = last_address;
|
end = last_address;
|
||||||
|
|
||||||
vma = va_block_context->hmm.vma;
|
vma = block_context->hmm.vma;
|
||||||
if (end > vma->vm_end - 1)
|
if (end > vma->vm_end - 1)
|
||||||
end = vma->vm_end - 1;
|
end = vma->vm_end - 1;
|
||||||
|
|
||||||
status = hmm_migrate_range(va_block,
|
status = hmm_migrate_range(va_block, &va_block_retry, service_context, dest_id, addr, end, mode, out_tracker);
|
||||||
&va_block_retry,
|
|
||||||
va_block_context,
|
|
||||||
dest_id,
|
|
||||||
addr,
|
|
||||||
end,
|
|
||||||
mode,
|
|
||||||
out_tracker);
|
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
@ -3283,12 +3294,13 @@ NV_STATUS uvm_hmm_va_block_evict_chunk_prep(uvm_va_block_t *va_block,
|
|||||||
// Note that the caller must initialize va_block_context->hmm.src_pfns by
|
// Note that the caller must initialize va_block_context->hmm.src_pfns by
|
||||||
// calling uvm_hmm_va_block_evict_chunk_prep() before calling this.
|
// calling uvm_hmm_va_block_evict_chunk_prep() before calling this.
|
||||||
static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
const uvm_page_mask_t *pages_to_evict,
|
const uvm_page_mask_t *pages_to_evict,
|
||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
uvm_make_resident_cause_t cause,
|
uvm_make_resident_cause_t cause,
|
||||||
bool *out_accessed_by_set)
|
bool *out_accessed_by_set)
|
||||||
{
|
{
|
||||||
|
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||||
NvU64 start = uvm_va_block_region_start(va_block, region);
|
NvU64 start = uvm_va_block_region_start(va_block, region);
|
||||||
NvU64 end = uvm_va_block_region_end(va_block, region);
|
NvU64 end = uvm_va_block_region_end(va_block, region);
|
||||||
unsigned long *src_pfns = va_block_context->hmm.src_pfns;
|
unsigned long *src_pfns = va_block_context->hmm.src_pfns;
|
||||||
@ -3296,7 +3308,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
|||||||
uvm_hmm_migrate_event_t uvm_hmm_migrate_event = {
|
uvm_hmm_migrate_event_t uvm_hmm_migrate_event = {
|
||||||
.va_block = va_block,
|
.va_block = va_block,
|
||||||
.va_block_retry = NULL,
|
.va_block_retry = NULL,
|
||||||
.va_block_context = va_block_context,
|
.service_context = service_context,
|
||||||
.region = region,
|
.region = region,
|
||||||
.dest_id = UVM_ID_CPU,
|
.dest_id = UVM_ID_CPU,
|
||||||
.cause = cause,
|
.cause = cause,
|
||||||
@ -3329,13 +3341,7 @@ static NV_STATUS hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
|||||||
// TODO: Bug 3660922: Need to handle read duplication at some point.
|
// TODO: Bug 3660922: Need to handle read duplication at some point.
|
||||||
UVM_ASSERT(uvm_page_mask_region_empty(cpu_resident_mask, region));
|
UVM_ASSERT(uvm_page_mask_region_empty(cpu_resident_mask, region));
|
||||||
|
|
||||||
status = migrate_alloc_on_cpu(va_block,
|
status = migrate_alloc_on_cpu(va_block, src_pfns, dst_pfns, region, page_mask, NULL, va_block_context);
|
||||||
src_pfns,
|
|
||||||
dst_pfns,
|
|
||||||
region,
|
|
||||||
page_mask,
|
|
||||||
NULL,
|
|
||||||
va_block_context);
|
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
goto err;
|
goto err;
|
||||||
|
|
||||||
@ -3369,13 +3375,13 @@ err:
|
|||||||
}
|
}
|
||||||
|
|
||||||
NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
const uvm_page_mask_t *pages_to_evict,
|
const uvm_page_mask_t *pages_to_evict,
|
||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
bool *out_accessed_by_set)
|
bool *out_accessed_by_set)
|
||||||
{
|
{
|
||||||
return hmm_va_block_evict_chunks(va_block,
|
return hmm_va_block_evict_chunks(va_block,
|
||||||
va_block_context,
|
service_context,
|
||||||
pages_to_evict,
|
pages_to_evict,
|
||||||
region,
|
region,
|
||||||
UVM_MAKE_RESIDENT_CAUSE_EVICTION,
|
UVM_MAKE_RESIDENT_CAUSE_EVICTION,
|
||||||
@ -3384,11 +3390,12 @@ NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
|||||||
|
|
||||||
NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||||
uvm_gpu_t *gpu,
|
uvm_gpu_t *gpu,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
const uvm_page_mask_t *pages_to_evict,
|
const uvm_page_mask_t *pages_to_evict,
|
||||||
uvm_va_block_region_t region)
|
uvm_va_block_region_t region)
|
||||||
{
|
{
|
||||||
unsigned long *src_pfns = va_block_context->hmm.src_pfns;
|
uvm_va_block_context_t *block_context = service_context->block_context;
|
||||||
|
unsigned long *src_pfns = block_context->hmm.src_pfns;
|
||||||
uvm_va_block_gpu_state_t *gpu_state;
|
uvm_va_block_gpu_state_t *gpu_state;
|
||||||
uvm_page_index_t page_index;
|
uvm_page_index_t page_index;
|
||||||
uvm_gpu_chunk_t *gpu_chunk;
|
uvm_gpu_chunk_t *gpu_chunk;
|
||||||
@ -3401,7 +3408,7 @@ NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
|||||||
UVM_ASSERT(gpu_state->chunks);
|
UVM_ASSERT(gpu_state->chunks);
|
||||||
|
|
||||||
// Fill in the src_pfns[] with the ZONE_DEVICE private PFNs of the GPU.
|
// Fill in the src_pfns[] with the ZONE_DEVICE private PFNs of the GPU.
|
||||||
memset(src_pfns, 0, sizeof(va_block_context->hmm.src_pfns));
|
memset(src_pfns, 0, sizeof(block_context->hmm.src_pfns));
|
||||||
|
|
||||||
// TODO: Bug 3368756: add support for large GPU pages.
|
// TODO: Bug 3368756: add support for large GPU pages.
|
||||||
for_each_va_block_page_in_region_mask(page_index, pages_to_evict, region) {
|
for_each_va_block_page_in_region_mask(page_index, pages_to_evict, region) {
|
||||||
@ -3409,7 +3416,7 @@ NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
|||||||
gpu,
|
gpu,
|
||||||
uvm_va_block_cpu_page_address(va_block, page_index));
|
uvm_va_block_cpu_page_address(va_block, page_index));
|
||||||
status = uvm_hmm_va_block_evict_chunk_prep(va_block,
|
status = uvm_hmm_va_block_evict_chunk_prep(va_block,
|
||||||
va_block_context,
|
block_context,
|
||||||
gpu_chunk,
|
gpu_chunk,
|
||||||
uvm_va_block_region_for_page(page_index));
|
uvm_va_block_region_for_page(page_index));
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
@ -3417,7 +3424,7 @@ NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
|||||||
}
|
}
|
||||||
|
|
||||||
return hmm_va_block_evict_chunks(va_block,
|
return hmm_va_block_evict_chunks(va_block,
|
||||||
va_block_context,
|
service_context,
|
||||||
pages_to_evict,
|
pages_to_evict,
|
||||||
region,
|
region,
|
||||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE,
|
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE,
|
||||||
|
@ -287,16 +287,17 @@ typedef struct
|
|||||||
uvm_va_block_retry_t *va_block_retry,
|
uvm_va_block_retry_t *va_block_retry,
|
||||||
uvm_service_block_context_t *service_context);
|
uvm_service_block_context_t *service_context);
|
||||||
|
|
||||||
// This is called to migrate a region within a HMM va_block.
|
// This is called to migrate a region within a HMM va_block. service_context
|
||||||
// va_block_context must not be NULL and va_block_context->hmm.vma
|
// must not be NULL, service_context->va_block_context must not be NULL and
|
||||||
// must be valid.
|
// service_context->va_block_context->hmm.vma must be valid.
|
||||||
|
//
|
||||||
// Special return values (besides things like NV_ERR_NO_MEMORY):
|
// Special return values (besides things like NV_ERR_NO_MEMORY):
|
||||||
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that one or more pages could
|
// NV_WARN_MORE_PROCESSING_REQUIRED indicates that one or more pages could
|
||||||
// not be migrated and that a retry might succeed after unlocking the
|
// not be migrated and that a retry might succeed after unlocking the
|
||||||
// va_block lock, va_space lock, and mmap lock.
|
// va_block lock, va_space lock, and mmap lock.
|
||||||
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_retry_t *va_block_retry,
|
uvm_va_block_retry_t *va_block_retry,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
uvm_make_resident_cause_t cause);
|
uvm_make_resident_cause_t cause);
|
||||||
@ -304,13 +305,14 @@ typedef struct
|
|||||||
// This is called to migrate an address range of HMM allocations via
|
// This is called to migrate an address range of HMM allocations via
|
||||||
// UvmMigrate().
|
// UvmMigrate().
|
||||||
//
|
//
|
||||||
// va_block_context must not be NULL. The caller is not required to set
|
// service_context and service_context->va_block_context must not be NULL.
|
||||||
// va_block_context->hmm.vma.
|
// The caller is not required to set
|
||||||
|
// service_context->va_block_context->hmm.vma.
|
||||||
//
|
//
|
||||||
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
|
// Locking: the va_space->va_space_mm.mm mmap_lock must be locked and
|
||||||
// the va_space read lock must be held.
|
// the va_space read lock must be held.
|
||||||
NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
NvU64 base,
|
NvU64 base,
|
||||||
NvU64 length,
|
NvU64 length,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
@ -329,27 +331,31 @@ typedef struct
|
|||||||
uvm_gpu_chunk_t *gpu_chunk,
|
uvm_gpu_chunk_t *gpu_chunk,
|
||||||
uvm_va_block_region_t chunk_region);
|
uvm_va_block_region_t chunk_region);
|
||||||
|
|
||||||
// Migrate pages to system memory for the given page mask.
|
// Migrate pages to system memory for the given page mask. Note that the
|
||||||
// Note that the mmap lock is not held and there is no MM retained.
|
// mmap lock is not held and there is no MM retained. This must be called
|
||||||
// This must be called after uvm_hmm_va_block_evict_chunk_prep() has
|
// after uvm_hmm_va_block_evict_chunk_prep() has initialized
|
||||||
// initialized va_block_context->hmm.src_pfns[] for the source GPU physical
|
// service_context->va_block_context->hmm.src_pfns[] for the source GPU
|
||||||
// PFNs being migrated. Note that the input mask 'pages_to_evict' can be
|
// physical PFNs being migrated. Note that the input mask 'pages_to_evict'
|
||||||
// modified. If any of the evicted pages has the accessed by policy set,
|
// can be modified. If any of the evicted pages has the accessed by policy
|
||||||
// then record that by setting out_accessed_by_set.
|
// set, then record that by setting out_accessed_by_set.
|
||||||
|
// The caller is not required to set
|
||||||
|
// service_context->va_block_context->hmm.vma, it will be cleared in
|
||||||
|
// uvm_hmm_va_block_evict_chunks().
|
||||||
// Locking: the va_block lock must be locked.
|
// Locking: the va_block lock must be locked.
|
||||||
NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
const uvm_page_mask_t *pages_to_evict,
|
const uvm_page_mask_t *pages_to_evict,
|
||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
bool *out_accessed_by_set);
|
bool *out_accessed_by_set);
|
||||||
|
|
||||||
// Migrate pages from the given GPU to system memory for the given page
|
// Migrate pages from the given GPU to system memory for the given page mask
|
||||||
// mask and region. va_block_context must not be NULL.
|
// and region. uvm_service_block_context_t and
|
||||||
// Note that the mmap lock is not held and there is no MM retained.
|
// uvm_service_block_context_t->va_block_context must not be NULL. Note that
|
||||||
|
// the mmap lock is not held and there is no MM retained.
|
||||||
// Locking: the va_block lock must be locked.
|
// Locking: the va_block lock must be locked.
|
||||||
NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||||
uvm_gpu_t *gpu,
|
uvm_gpu_t *gpu,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
const uvm_page_mask_t *pages_to_evict,
|
const uvm_page_mask_t *pages_to_evict,
|
||||||
uvm_va_block_region_t region);
|
uvm_va_block_region_t region);
|
||||||
|
|
||||||
@ -572,7 +578,7 @@ typedef struct
|
|||||||
|
|
||||||
static NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
static NV_STATUS uvm_hmm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_retry_t *va_block_retry,
|
uvm_va_block_retry_t *va_block_retry,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
uvm_make_resident_cause_t cause)
|
uvm_make_resident_cause_t cause)
|
||||||
@ -581,7 +587,7 @@ typedef struct
|
|||||||
}
|
}
|
||||||
|
|
||||||
static NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
static NV_STATUS uvm_hmm_migrate_ranges(uvm_va_space_t *va_space,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
NvU64 base,
|
NvU64 base,
|
||||||
NvU64 length,
|
NvU64 length,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
@ -606,7 +612,7 @@ typedef struct
|
|||||||
}
|
}
|
||||||
|
|
||||||
static NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
static NV_STATUS uvm_hmm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
const uvm_page_mask_t *pages_to_evict,
|
const uvm_page_mask_t *pages_to_evict,
|
||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
bool *out_accessed_by_set)
|
bool *out_accessed_by_set)
|
||||||
@ -616,7 +622,7 @@ typedef struct
|
|||||||
|
|
||||||
static NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
static NV_STATUS uvm_hmm_va_block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
||||||
uvm_gpu_t *gpu,
|
uvm_gpu_t *gpu,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
const uvm_page_mask_t *pages_to_evict,
|
const uvm_page_mask_t *pages_to_evict,
|
||||||
uvm_va_block_region_t region)
|
uvm_va_block_region_t region)
|
||||||
{
|
{
|
||||||
|
@ -27,6 +27,24 @@
|
|||||||
#include "uvm_mem.h"
|
#include "uvm_mem.h"
|
||||||
#include "uvm_hopper_fault_buffer.h"
|
#include "uvm_hopper_fault_buffer.h"
|
||||||
|
|
||||||
|
static uvm_gpu_peer_copy_mode_t hopper_peer_copy_mode(uvm_parent_gpu_t *parent_gpu)
|
||||||
|
{
|
||||||
|
// In Confidential Computing the Copy Engine supports encrypted copies
|
||||||
|
// between peers. But in Hopper these transfers require significant
|
||||||
|
// software support (ex: unprotected vidmem), so in practice they are not
|
||||||
|
// allowed.
|
||||||
|
if (g_uvm_global.conf_computing_enabled)
|
||||||
|
return UVM_GPU_PEER_COPY_MODE_UNSUPPORTED;
|
||||||
|
|
||||||
|
// TODO: Bug 4174553: In some Grace Hopper setups, physical peer copies
|
||||||
|
// result on errors. Force peer copies to use virtual addressing until the
|
||||||
|
// issue is clarified.
|
||||||
|
if (uvm_parent_gpu_is_coherent(parent_gpu))
|
||||||
|
return UVM_GPU_PEER_COPY_MODE_VIRTUAL;
|
||||||
|
|
||||||
|
return g_uvm_global.peer_copy_mode;
|
||||||
|
}
|
||||||
|
|
||||||
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||||
{
|
{
|
||||||
parent_gpu->tlb_batch.va_invalidate_supported = true;
|
parent_gpu->tlb_batch.va_invalidate_supported = true;
|
||||||
@ -58,14 +76,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
|||||||
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);
|
parent_gpu->flat_vidmem_va_base = (64 * UVM_SIZE_1PB) + (32 * UVM_SIZE_1TB);
|
||||||
|
|
||||||
// Physical CE writes to vidmem are non-coherent with respect to the CPU on
|
// Physical CE writes to vidmem are non-coherent with respect to the CPU on
|
||||||
// GH180.
|
// Grace Hopper.
|
||||||
parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
|
parent_gpu->ce_phys_vidmem_write_supported = !uvm_parent_gpu_is_coherent(parent_gpu);
|
||||||
|
|
||||||
// TODO: Bug 4174553: [HGX-SkinnyJoe][GH180] channel errors discussion/debug
|
parent_gpu->peer_copy_mode = hopper_peer_copy_mode(parent_gpu);
|
||||||
// portion for the uvm tests became nonresponsive after
|
|
||||||
// some time and then failed even after reboot
|
|
||||||
parent_gpu->peer_copy_mode = uvm_parent_gpu_is_coherent(parent_gpu) ?
|
|
||||||
UVM_GPU_PEER_COPY_MODE_VIRTUAL : g_uvm_global.peer_copy_mode;
|
|
||||||
|
|
||||||
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
// All GR context buffers may be mapped to 57b wide VAs. All "compute" units
|
||||||
// accessing GR context buffers support the 57-bit VA range.
|
// accessing GR context buffers support the 57-bit VA range.
|
||||||
|
@ -480,7 +480,6 @@ static NvU64 encrypt_iv_address(uvm_push_t *push, uvm_gpu_address_t dst)
|
|||||||
return iv_address;
|
return iv_address;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Bug 3842953: adapt CE encrypt/decrypt for p2p encrypted transfers
|
|
||||||
void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
|
void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
|
||||||
uvm_gpu_address_t dst,
|
uvm_gpu_address_t dst,
|
||||||
uvm_gpu_address_t src,
|
uvm_gpu_address_t src,
|
||||||
@ -530,7 +529,6 @@ void uvm_hal_hopper_ce_encrypt(uvm_push_t *push,
|
|||||||
encrypt_or_decrypt(push, dst, src, size);
|
encrypt_or_decrypt(push, dst, src, size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Bug 3842953: adapt CE encrypt/decrypt for p2p encrypted transfers
|
|
||||||
void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
|
void uvm_hal_hopper_ce_decrypt(uvm_push_t *push,
|
||||||
uvm_gpu_address_t dst,
|
uvm_gpu_address_t dst,
|
||||||
uvm_gpu_address_t src,
|
uvm_gpu_address_t src,
|
||||||
|
@ -970,7 +970,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
|||||||
{
|
{
|
||||||
uvm_va_range_t *va_range = NULL;
|
uvm_va_range_t *va_range = NULL;
|
||||||
uvm_gpu_t *mapping_gpu;
|
uvm_gpu_t *mapping_gpu;
|
||||||
uvm_processor_mask_t mapped_gpus;
|
uvm_processor_mask_t *mapped_gpus;
|
||||||
NV_STATUS status = NV_OK;
|
NV_STATUS status = NV_OK;
|
||||||
size_t i;
|
size_t i;
|
||||||
uvm_map_rm_params_t map_rm_params;
|
uvm_map_rm_params_t map_rm_params;
|
||||||
@ -988,6 +988,10 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
|||||||
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
if (params->gpuAttributesCount == 0 || params->gpuAttributesCount > UVM_MAX_GPUS_V2)
|
||||||
return NV_ERR_INVALID_ARGUMENT;
|
return NV_ERR_INVALID_ARGUMENT;
|
||||||
|
|
||||||
|
mapped_gpus = uvm_processor_mask_cache_alloc();
|
||||||
|
if (!mapped_gpus)
|
||||||
|
return NV_ERR_NO_MEMORY;
|
||||||
|
|
||||||
uvm_va_space_down_read_rm(va_space);
|
uvm_va_space_down_read_rm(va_space);
|
||||||
va_range = uvm_va_range_find(va_space, params->base);
|
va_range = uvm_va_range_find(va_space, params->base);
|
||||||
|
|
||||||
@ -995,10 +999,11 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
|||||||
va_range->type != UVM_VA_RANGE_TYPE_EXTERNAL ||
|
va_range->type != UVM_VA_RANGE_TYPE_EXTERNAL ||
|
||||||
va_range->node.end < params->base + params->length - 1) {
|
va_range->node.end < params->base + params->length - 1) {
|
||||||
uvm_va_space_up_read_rm(va_space);
|
uvm_va_space_up_read_rm(va_space);
|
||||||
|
uvm_processor_mask_cache_free(mapped_gpus);
|
||||||
return NV_ERR_INVALID_ADDRESS;
|
return NV_ERR_INVALID_ADDRESS;
|
||||||
}
|
}
|
||||||
|
|
||||||
uvm_processor_mask_zero(&mapped_gpus);
|
uvm_processor_mask_zero(mapped_gpus);
|
||||||
for (i = 0; i < params->gpuAttributesCount; i++) {
|
for (i = 0; i < params->gpuAttributesCount; i++) {
|
||||||
if (uvm_api_mapping_type_invalid(params->perGpuAttributes[i].gpuMappingType) ||
|
if (uvm_api_mapping_type_invalid(params->perGpuAttributes[i].gpuMappingType) ||
|
||||||
uvm_api_caching_type_invalid(params->perGpuAttributes[i].gpuCachingType) ||
|
uvm_api_caching_type_invalid(params->perGpuAttributes[i].gpuCachingType) ||
|
||||||
@ -1034,7 +1039,7 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
|||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
goto error;
|
goto error;
|
||||||
|
|
||||||
uvm_processor_mask_set(&mapped_gpus, mapping_gpu->id);
|
uvm_processor_mask_set(mapped_gpus, mapping_gpu->id);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Wait for outstanding page table operations to finish across all GPUs. We
|
// Wait for outstanding page table operations to finish across all GPUs. We
|
||||||
@ -1043,6 +1048,8 @@ static NV_STATUS uvm_map_external_allocation(uvm_va_space_t *va_space, UVM_MAP_E
|
|||||||
status = uvm_tracker_wait_deinit(&tracker);
|
status = uvm_tracker_wait_deinit(&tracker);
|
||||||
|
|
||||||
uvm_va_space_up_read_rm(va_space);
|
uvm_va_space_up_read_rm(va_space);
|
||||||
|
uvm_processor_mask_cache_free(mapped_gpus);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
|
|
||||||
error:
|
error:
|
||||||
@ -1051,7 +1058,7 @@ error:
|
|||||||
(void)uvm_tracker_wait_deinit(&tracker);
|
(void)uvm_tracker_wait_deinit(&tracker);
|
||||||
|
|
||||||
// Tear down only those mappings we created during this call
|
// Tear down only those mappings we created during this call
|
||||||
for_each_va_space_gpu_in_mask(mapping_gpu, va_space, &mapped_gpus) {
|
for_each_va_space_gpu_in_mask(mapping_gpu, va_space, mapped_gpus) {
|
||||||
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
uvm_ext_gpu_range_tree_t *range_tree = uvm_ext_gpu_range_tree(va_range, mapping_gpu);
|
||||||
uvm_ext_gpu_map_t *ext_map, *ext_map_next;
|
uvm_ext_gpu_map_t *ext_map, *ext_map_next;
|
||||||
|
|
||||||
@ -1067,6 +1074,7 @@ error:
|
|||||||
}
|
}
|
||||||
|
|
||||||
uvm_va_space_up_read_rm(va_space);
|
uvm_va_space_up_read_rm(va_space);
|
||||||
|
uvm_processor_mask_cache_free(mapped_gpus);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
@ -1356,9 +1364,7 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
|||||||
{
|
{
|
||||||
uvm_va_range_t *va_range;
|
uvm_va_range_t *va_range;
|
||||||
NV_STATUS status = NV_OK;
|
NV_STATUS status = NV_OK;
|
||||||
// TODO: Bug 4351121: retained_mask should be pre-allocated, not on the
|
uvm_processor_mask_t *retained_mask = NULL;
|
||||||
// stack.
|
|
||||||
uvm_processor_mask_t retained_mask;
|
|
||||||
LIST_HEAD(deferred_free_list);
|
LIST_HEAD(deferred_free_list);
|
||||||
|
|
||||||
if (uvm_api_range_invalid_4k(base, length))
|
if (uvm_api_range_invalid_4k(base, length))
|
||||||
@ -1391,17 +1397,25 @@ static NV_STATUS uvm_free(uvm_va_space_t *va_space, NvU64 base, NvU64 length)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL) {
|
if (va_range->type == UVM_VA_RANGE_TYPE_EXTERNAL) {
|
||||||
|
retained_mask = va_range->external.retained_mask;
|
||||||
|
|
||||||
|
// Set the retained_mask to NULL to prevent
|
||||||
|
// uvm_va_range_destroy_external() from freeing the mask.
|
||||||
|
va_range->external.retained_mask = NULL;
|
||||||
|
|
||||||
|
UVM_ASSERT(retained_mask);
|
||||||
|
|
||||||
// External ranges may have deferred free work, so the GPUs may have to
|
// External ranges may have deferred free work, so the GPUs may have to
|
||||||
// be retained. Construct the mask of all the GPUs that need to be
|
// be retained. Construct the mask of all the GPUs that need to be
|
||||||
// retained.
|
// retained.
|
||||||
uvm_processor_mask_and(&retained_mask, &va_range->external.mapped_gpus, &va_space->registered_gpus);
|
uvm_processor_mask_and(retained_mask, &va_range->external.mapped_gpus, &va_space->registered_gpus);
|
||||||
}
|
}
|
||||||
|
|
||||||
uvm_va_range_destroy(va_range, &deferred_free_list);
|
uvm_va_range_destroy(va_range, &deferred_free_list);
|
||||||
|
|
||||||
// If there is deferred work, retain the required GPUs.
|
// If there is deferred work, retain the required GPUs.
|
||||||
if (!list_empty(&deferred_free_list))
|
if (!list_empty(&deferred_free_list))
|
||||||
uvm_global_gpu_retain(&retained_mask);
|
uvm_global_gpu_retain(retained_mask);
|
||||||
|
|
||||||
out:
|
out:
|
||||||
uvm_va_space_up_write(va_space);
|
uvm_va_space_up_write(va_space);
|
||||||
@ -1409,9 +1423,13 @@ out:
|
|||||||
if (!list_empty(&deferred_free_list)) {
|
if (!list_empty(&deferred_free_list)) {
|
||||||
UVM_ASSERT(status == NV_OK);
|
UVM_ASSERT(status == NV_OK);
|
||||||
uvm_deferred_free_object_list(&deferred_free_list);
|
uvm_deferred_free_object_list(&deferred_free_list);
|
||||||
uvm_global_gpu_release(&retained_mask);
|
uvm_global_gpu_release(retained_mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Free the mask allocated in uvm_va_range_create_external() since
|
||||||
|
// uvm_va_range_destroy() won't free this mask.
|
||||||
|
uvm_processor_mask_cache_free(retained_mask);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -214,13 +214,14 @@ static NV_STATUS block_migrate_add_mappings(uvm_va_block_t *va_block,
|
|||||||
|
|
||||||
NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_retry_t *va_block_retry,
|
uvm_va_block_retry_t *va_block_retry,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
uvm_migrate_mode_t mode,
|
uvm_migrate_mode_t mode,
|
||||||
uvm_tracker_t *out_tracker)
|
uvm_tracker_t *out_tracker)
|
||||||
{
|
{
|
||||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||||
|
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||||
NV_STATUS status, tracker_status = NV_OK;
|
NV_STATUS status, tracker_status = NV_OK;
|
||||||
|
|
||||||
uvm_assert_mutex_locked(&va_block->lock);
|
uvm_assert_mutex_locked(&va_block->lock);
|
||||||
@ -229,7 +230,7 @@ NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
|||||||
if (uvm_va_block_is_hmm(va_block)) {
|
if (uvm_va_block_is_hmm(va_block)) {
|
||||||
status = uvm_hmm_va_block_migrate_locked(va_block,
|
status = uvm_hmm_va_block_migrate_locked(va_block,
|
||||||
va_block_retry,
|
va_block_retry,
|
||||||
va_block_context,
|
service_context,
|
||||||
dest_id,
|
dest_id,
|
||||||
region,
|
region,
|
||||||
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
UVM_MAKE_RESIDENT_CAUSE_API_MIGRATE);
|
||||||
@ -438,7 +439,7 @@ static void preunmap_multi_block(uvm_va_range_t *va_range,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
NvU64 start,
|
NvU64 start,
|
||||||
NvU64 end,
|
NvU64 end,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
@ -470,10 +471,11 @@ static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
|||||||
max(start, va_block->start),
|
max(start, va_block->start),
|
||||||
min(end, va_block->end));
|
min(end, va_block->end));
|
||||||
|
|
||||||
status = UVM_VA_BLOCK_LOCK_RETRY(va_block, &va_block_retry,
|
status = UVM_VA_BLOCK_LOCK_RETRY(va_block,
|
||||||
|
&va_block_retry,
|
||||||
uvm_va_block_migrate_locked(va_block,
|
uvm_va_block_migrate_locked(va_block,
|
||||||
&va_block_retry,
|
&va_block_retry,
|
||||||
va_block_context,
|
service_context,
|
||||||
region,
|
region,
|
||||||
dest_id,
|
dest_id,
|
||||||
mode,
|
mode,
|
||||||
@ -486,7 +488,7 @@ static NV_STATUS uvm_va_range_migrate_multi_block(uvm_va_range_t *va_range,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
NvU64 start,
|
NvU64 start,
|
||||||
NvU64 end,
|
NvU64 end,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
@ -510,7 +512,7 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
|||||||
preunmap_range_end = min(preunmap_range_end - 1, end);
|
preunmap_range_end = min(preunmap_range_end - 1, end);
|
||||||
|
|
||||||
preunmap_multi_block(va_range,
|
preunmap_multi_block(va_range,
|
||||||
va_block_context,
|
service_context->block_context,
|
||||||
preunmap_range_start,
|
preunmap_range_start,
|
||||||
preunmap_range_end,
|
preunmap_range_end,
|
||||||
dest_id);
|
dest_id);
|
||||||
@ -520,7 +522,7 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
|||||||
}
|
}
|
||||||
|
|
||||||
status = uvm_va_range_migrate_multi_block(va_range,
|
status = uvm_va_range_migrate_multi_block(va_range,
|
||||||
va_block_context,
|
service_context,
|
||||||
preunmap_range_start,
|
preunmap_range_start,
|
||||||
preunmap_range_end,
|
preunmap_range_end,
|
||||||
dest_id,
|
dest_id,
|
||||||
@ -536,7 +538,7 @@ static NV_STATUS uvm_va_range_migrate(uvm_va_range_t *va_range,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
uvm_va_range_t *first_va_range,
|
uvm_va_range_t *first_va_range,
|
||||||
NvU64 base,
|
NvU64 base,
|
||||||
NvU64 length,
|
NvU64 length,
|
||||||
@ -552,13 +554,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
|||||||
|
|
||||||
if (!first_va_range) {
|
if (!first_va_range) {
|
||||||
// For HMM, we iterate over va_blocks since there is no va_range.
|
// For HMM, we iterate over va_blocks since there is no va_range.
|
||||||
return uvm_hmm_migrate_ranges(va_space,
|
return uvm_hmm_migrate_ranges(va_space, service_context, base, length, dest_id, mode, out_tracker);
|
||||||
va_block_context,
|
|
||||||
base,
|
|
||||||
length,
|
|
||||||
dest_id,
|
|
||||||
mode,
|
|
||||||
out_tracker);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
UVM_ASSERT(first_va_range == uvm_va_space_iter_first(va_space, base, base));
|
UVM_ASSERT(first_va_range == uvm_va_space_iter_first(va_space, base, base));
|
||||||
@ -587,7 +583,9 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
|||||||
if (!iter.migratable) {
|
if (!iter.migratable) {
|
||||||
// Only return NV_WARN_MORE_PROCESSING_REQUIRED if the pages aren't
|
// Only return NV_WARN_MORE_PROCESSING_REQUIRED if the pages aren't
|
||||||
// already resident at dest_id.
|
// already resident at dest_id.
|
||||||
if (!uvm_va_policy_preferred_location_equal(policy, dest_id, va_block_context->make_resident.dest_nid))
|
if (!uvm_va_policy_preferred_location_equal(policy,
|
||||||
|
dest_id,
|
||||||
|
service_context->block_context->make_resident.dest_nid))
|
||||||
skipped_migrate = true;
|
skipped_migrate = true;
|
||||||
}
|
}
|
||||||
else if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, dest_id) &&
|
else if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, dest_id) &&
|
||||||
@ -599,7 +597,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
|||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
status = uvm_va_range_migrate(va_range,
|
status = uvm_va_range_migrate(va_range,
|
||||||
va_block_context,
|
service_context,
|
||||||
iter.start,
|
iter.start,
|
||||||
iter.end,
|
iter.end,
|
||||||
dest_id,
|
dest_id,
|
||||||
@ -636,7 +634,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
|||||||
uvm_tracker_t *out_tracker)
|
uvm_tracker_t *out_tracker)
|
||||||
{
|
{
|
||||||
NV_STATUS status = NV_OK;
|
NV_STATUS status = NV_OK;
|
||||||
uvm_va_block_context_t *va_block_context;
|
uvm_service_block_context_t *service_context;
|
||||||
bool do_mappings;
|
bool do_mappings;
|
||||||
bool do_two_passes;
|
bool do_two_passes;
|
||||||
bool is_single_block;
|
bool is_single_block;
|
||||||
@ -654,11 +652,11 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
|||||||
else if (!first_va_range)
|
else if (!first_va_range)
|
||||||
return NV_ERR_INVALID_ADDRESS;
|
return NV_ERR_INVALID_ADDRESS;
|
||||||
|
|
||||||
va_block_context = uvm_va_block_context_alloc(mm);
|
service_context = uvm_service_block_context_alloc(mm);
|
||||||
if (!va_block_context)
|
if (!service_context)
|
||||||
return NV_ERR_NO_MEMORY;
|
return NV_ERR_NO_MEMORY;
|
||||||
|
|
||||||
va_block_context->make_resident.dest_nid = dest_nid;
|
service_context->block_context->make_resident.dest_nid = dest_nid;
|
||||||
|
|
||||||
// We perform two passes (unless the migration only covers a single VA
|
// We perform two passes (unless the migration only covers a single VA
|
||||||
// block or UVM_MIGRATE_FLAG_SKIP_CPU_MAP is passed). This helps in the
|
// block or UVM_MIGRATE_FLAG_SKIP_CPU_MAP is passed). This helps in the
|
||||||
@ -688,7 +686,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
|||||||
should_do_cpu_preunmap = migration_should_do_cpu_preunmap(va_space, UVM_MIGRATE_PASS_FIRST, is_single_block);
|
should_do_cpu_preunmap = migration_should_do_cpu_preunmap(va_space, UVM_MIGRATE_PASS_FIRST, is_single_block);
|
||||||
|
|
||||||
status = uvm_migrate_ranges(va_space,
|
status = uvm_migrate_ranges(va_space,
|
||||||
va_block_context,
|
service_context,
|
||||||
first_va_range,
|
first_va_range,
|
||||||
base,
|
base,
|
||||||
length,
|
length,
|
||||||
@ -706,7 +704,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
|||||||
should_do_cpu_preunmap = migration_should_do_cpu_preunmap(va_space, pass, is_single_block);
|
should_do_cpu_preunmap = migration_should_do_cpu_preunmap(va_space, pass, is_single_block);
|
||||||
|
|
||||||
status = uvm_migrate_ranges(va_space,
|
status = uvm_migrate_ranges(va_space,
|
||||||
va_block_context,
|
service_context,
|
||||||
first_va_range,
|
first_va_range,
|
||||||
base,
|
base,
|
||||||
length,
|
length,
|
||||||
@ -716,7 +714,7 @@ static NV_STATUS uvm_migrate(uvm_va_space_t *va_space,
|
|||||||
out_tracker);
|
out_tracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
uvm_va_block_context_free(va_block_context);
|
uvm_service_block_context_free(service_context);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -2357,6 +2357,8 @@ NV_STATUS uvm_mmu_create_peer_identity_mappings(uvm_gpu_t *gpu, uvm_gpu_t *peer)
|
|||||||
NvU64 phys_offset;
|
NvU64 phys_offset;
|
||||||
uvm_gpu_identity_mapping_t *peer_mapping;
|
uvm_gpu_identity_mapping_t *peer_mapping;
|
||||||
|
|
||||||
|
UVM_ASSERT(gpu->parent->peer_copy_mode < UVM_GPU_PEER_COPY_MODE_COUNT);
|
||||||
|
|
||||||
if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
|
if (gpu->parent->peer_copy_mode != UVM_GPU_PEER_COPY_MODE_VIRTUAL || peer->mem_info.size == 0)
|
||||||
return NV_OK;
|
return NV_OK;
|
||||||
|
|
||||||
|
@ -901,6 +901,7 @@ static pinned_page_t *find_pinned_page(block_thrashing_info_t *block_thrashing,
|
|||||||
//
|
//
|
||||||
static NV_STATUS thrashing_pin_page(va_space_thrashing_info_t *va_space_thrashing,
|
static NV_STATUS thrashing_pin_page(va_space_thrashing_info_t *va_space_thrashing,
|
||||||
uvm_va_block_t *va_block,
|
uvm_va_block_t *va_block,
|
||||||
|
uvm_va_block_context_t *va_block_context,
|
||||||
block_thrashing_info_t *block_thrashing,
|
block_thrashing_info_t *block_thrashing,
|
||||||
page_thrashing_info_t *page_thrashing,
|
page_thrashing_info_t *page_thrashing,
|
||||||
uvm_page_index_t page_index,
|
uvm_page_index_t page_index,
|
||||||
@ -908,17 +909,17 @@ static NV_STATUS thrashing_pin_page(va_space_thrashing_info_t *va_space_thrashin
|
|||||||
uvm_processor_id_t residency,
|
uvm_processor_id_t residency,
|
||||||
uvm_processor_id_t requester)
|
uvm_processor_id_t requester)
|
||||||
{
|
{
|
||||||
uvm_processor_mask_t current_residency;
|
uvm_processor_mask_t *current_residency = &va_block_context->scratch_processor_mask;
|
||||||
|
|
||||||
uvm_assert_mutex_locked(&va_block->lock);
|
uvm_assert_mutex_locked(&va_block->lock);
|
||||||
UVM_ASSERT(!uvm_processor_mask_test(&page_thrashing->throttled_processors, requester));
|
UVM_ASSERT(!uvm_processor_mask_test(&page_thrashing->throttled_processors, requester));
|
||||||
|
|
||||||
uvm_va_block_page_resident_processors(va_block, page_index, ¤t_residency);
|
uvm_va_block_page_resident_processors(va_block, page_index, current_residency);
|
||||||
|
|
||||||
// If we are pinning the page for the first time or we are pinning it on a
|
// If we are pinning the page for the first time or we are pinning it on a
|
||||||
// different location that the current location, reset the throttling state
|
// different location that the current location, reset the throttling state
|
||||||
// to make sure that we flush any pending ThrottlingEnd events.
|
// to make sure that we flush any pending ThrottlingEnd events.
|
||||||
if (!page_thrashing->pinned || !uvm_processor_mask_test(¤t_residency, residency))
|
if (!page_thrashing->pinned || !uvm_processor_mask_test(current_residency, residency))
|
||||||
thrashing_throttling_reset_page(va_block, block_thrashing, page_thrashing, page_index);
|
thrashing_throttling_reset_page(va_block, block_thrashing, page_thrashing, page_index);
|
||||||
|
|
||||||
if (!page_thrashing->pinned) {
|
if (!page_thrashing->pinned) {
|
||||||
@ -1120,8 +1121,7 @@ static NV_STATUS unmap_remote_pinned_pages(uvm_va_block_t *va_block,
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
uvm_page_mask_copy(&va_block_context->caller_page_mask,
|
uvm_page_mask_copy(&va_block_context->caller_page_mask, &block_thrashing->pinned_pages.mask);
|
||||||
&block_thrashing->pinned_pages.mask);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
status = uvm_va_block_unmap(va_block,
|
status = uvm_va_block_unmap(va_block,
|
||||||
@ -1148,7 +1148,7 @@ NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_bl
|
|||||||
uvm_va_block_region_t region)
|
uvm_va_block_region_t region)
|
||||||
{
|
{
|
||||||
block_thrashing_info_t *block_thrashing;
|
block_thrashing_info_t *block_thrashing;
|
||||||
uvm_processor_mask_t unmap_processors;
|
uvm_processor_mask_t *unmap_processors = &va_block_context->unmap_processors_mask;
|
||||||
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
|
const uvm_va_policy_t *policy = uvm_va_policy_get_region(va_block, region);
|
||||||
|
|
||||||
uvm_assert_mutex_locked(&va_block->lock);
|
uvm_assert_mutex_locked(&va_block->lock);
|
||||||
@ -1162,9 +1162,9 @@ NV_STATUS uvm_perf_thrashing_unmap_remote_pinned_pages_all(uvm_va_block_t *va_bl
|
|||||||
|
|
||||||
// Unmap all mapped processors (that are not SetAccessedBy) with
|
// Unmap all mapped processors (that are not SetAccessedBy) with
|
||||||
// no copy of the page
|
// no copy of the page
|
||||||
uvm_processor_mask_andnot(&unmap_processors, &va_block->mapped, &policy->accessed_by);
|
uvm_processor_mask_andnot(unmap_processors, &va_block->mapped, &policy->accessed_by);
|
||||||
|
|
||||||
return unmap_remote_pinned_pages(va_block, va_block_context, block_thrashing, region, &unmap_processors);
|
return unmap_remote_pinned_pages(va_block, va_block_context, block_thrashing, region, unmap_processors);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that we are not migrating pages away from its pinned location and
|
// Check that we are not migrating pages away from its pinned location and
|
||||||
@ -1391,22 +1391,23 @@ static bool thrashing_processors_can_access(uvm_va_space_t *va_space,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static bool thrashing_processors_have_fast_access_to(uvm_va_space_t *va_space,
|
static bool thrashing_processors_have_fast_access_to(uvm_va_space_t *va_space,
|
||||||
|
uvm_va_block_context_t *va_block_context,
|
||||||
page_thrashing_info_t *page_thrashing,
|
page_thrashing_info_t *page_thrashing,
|
||||||
uvm_processor_id_t to)
|
uvm_processor_id_t to)
|
||||||
{
|
{
|
||||||
uvm_processor_mask_t fast_to;
|
uvm_processor_mask_t *fast_to = &va_block_context->fast_access_mask;
|
||||||
|
|
||||||
if (UVM_ID_IS_INVALID(to))
|
if (UVM_ID_IS_INVALID(to))
|
||||||
return false;
|
return false;
|
||||||
|
|
||||||
// Combine NVLINK and native atomics mask since we could have PCIe
|
// Combine NVLINK and native atomics mask since we could have PCIe
|
||||||
// atomics in the future
|
// atomics in the future
|
||||||
uvm_processor_mask_and(&fast_to,
|
uvm_processor_mask_and(fast_to,
|
||||||
&va_space->has_nvlink[uvm_id_value(to)],
|
&va_space->has_nvlink[uvm_id_value(to)],
|
||||||
&va_space->has_native_atomics[uvm_id_value(to)]);
|
&va_space->has_native_atomics[uvm_id_value(to)]);
|
||||||
uvm_processor_mask_set(&fast_to, to);
|
uvm_processor_mask_set(fast_to, to);
|
||||||
|
|
||||||
return uvm_processor_mask_subset(&page_thrashing->processors, &fast_to);
|
return uvm_processor_mask_subset(&page_thrashing->processors, fast_to);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void thrashing_processors_common_locations(uvm_va_space_t *va_space,
|
static void thrashing_processors_common_locations(uvm_va_space_t *va_space,
|
||||||
@ -1488,7 +1489,7 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
|||||||
hint.pin.residency = preferred_location;
|
hint.pin.residency = preferred_location;
|
||||||
}
|
}
|
||||||
else if (!preferred_location_is_thrashing(preferred_location, page_thrashing) &&
|
else if (!preferred_location_is_thrashing(preferred_location, page_thrashing) &&
|
||||||
thrashing_processors_have_fast_access_to(va_space, page_thrashing, closest_resident_id)) {
|
thrashing_processors_have_fast_access_to(va_space, va_block_context, page_thrashing, closest_resident_id)){
|
||||||
// This is a fast path for those scenarios in which all thrashing
|
// This is a fast path for those scenarios in which all thrashing
|
||||||
// processors have fast (NVLINK + native atomics) access to the current
|
// processors have fast (NVLINK + native atomics) access to the current
|
||||||
// residency. This is skipped if the preferred location is thrashing and
|
// residency. This is skipped if the preferred location is thrashing and
|
||||||
@ -1545,15 +1546,15 @@ static uvm_perf_thrashing_hint_t get_hint_for_migration_thrashing(va_space_thras
|
|||||||
hint.pin.residency = requester;
|
hint.pin.residency = requester;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
uvm_processor_mask_t common_locations;
|
uvm_processor_mask_t *common_locations = &va_block_context->scratch_processor_mask;
|
||||||
|
|
||||||
thrashing_processors_common_locations(va_space, page_thrashing, &common_locations);
|
thrashing_processors_common_locations(va_space, page_thrashing, common_locations);
|
||||||
if (uvm_processor_mask_empty(&common_locations)) {
|
if (uvm_processor_mask_empty(common_locations)) {
|
||||||
hint.pin.residency = requester;
|
hint.pin.residency = requester;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
// Find the common location that is closest to the requester
|
// Find the common location that is closest to the requester
|
||||||
hint.pin.residency = uvm_processor_mask_find_closest_id(va_space, &common_locations, requester);
|
hint.pin.residency = uvm_processor_mask_find_closest_id(va_space, common_locations, requester);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1725,6 +1726,7 @@ done:
|
|||||||
if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN) {
|
if (hint.type == UVM_PERF_THRASHING_HINT_TYPE_PIN) {
|
||||||
NV_STATUS status = thrashing_pin_page(va_space_thrashing,
|
NV_STATUS status = thrashing_pin_page(va_space_thrashing,
|
||||||
va_block,
|
va_block,
|
||||||
|
va_block_context,
|
||||||
block_thrashing,
|
block_thrashing,
|
||||||
page_thrashing,
|
page_thrashing,
|
||||||
page_index,
|
page_index,
|
||||||
|
@ -1207,35 +1207,38 @@ static NV_STATUS test_cpu_chunk_numa_alloc(uvm_va_space_t *va_space)
|
|||||||
NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct file *filp)
|
NV_STATUS uvm_test_cpu_chunk_api(UVM_TEST_CPU_CHUNK_API_PARAMS *params, struct file *filp)
|
||||||
{
|
{
|
||||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||||
uvm_processor_mask_t test_gpus;
|
uvm_processor_mask_t *test_gpus;
|
||||||
uvm_gpu_t *gpu;
|
uvm_gpu_t *gpu;
|
||||||
NV_STATUS status = NV_OK;
|
NV_STATUS status = NV_OK;
|
||||||
|
|
||||||
uvm_va_space_down_read(va_space);
|
test_gpus = uvm_processor_mask_cache_alloc();
|
||||||
uvm_processor_mask_and(&test_gpus,
|
if (!test_gpus)
|
||||||
&va_space->registered_gpus,
|
return NV_ERR_NO_MEMORY;
|
||||||
&va_space->accessible_from[uvm_id_value(UVM_ID_CPU)]);
|
|
||||||
|
|
||||||
for_each_va_space_gpu_in_mask(gpu, va_space, &test_gpus) {
|
uvm_va_space_down_read(va_space);
|
||||||
|
uvm_processor_mask_and(test_gpus, &va_space->registered_gpus, &va_space->accessible_from[uvm_id_value(UVM_ID_CPU)]);
|
||||||
|
|
||||||
|
for_each_va_space_gpu_in_mask(gpu, va_space, test_gpus) {
|
||||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_basic(gpu, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE), done);
|
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_basic(gpu, UVM_CPU_CHUNK_ALLOC_FLAGS_NONE), done);
|
||||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_basic(gpu, UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO), done);
|
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_basic(gpu, UVM_CPU_CHUNK_ALLOC_FLAGS_ZERO), done);
|
||||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge(gpu), done);
|
TEST_NV_CHECK_GOTO(test_cpu_chunk_split_and_merge(gpu), done);
|
||||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_dirty(gpu), done);
|
TEST_NV_CHECK_GOTO(test_cpu_chunk_dirty(gpu), done);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_free(va_space, &test_gpus), done);
|
TEST_NV_CHECK_GOTO(test_cpu_chunk_free(va_space, test_gpus), done);
|
||||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_numa_alloc(va_space), done);
|
TEST_NV_CHECK_GOTO(test_cpu_chunk_numa_alloc(va_space), done);
|
||||||
|
|
||||||
if (uvm_processor_mask_get_gpu_count(&test_gpus) >= 3) {
|
if (uvm_processor_mask_get_gpu_count(test_gpus) >= 3) {
|
||||||
uvm_gpu_t *gpu2, *gpu3;
|
uvm_gpu_t *gpu2, *gpu3;
|
||||||
|
|
||||||
gpu = uvm_processor_mask_find_first_va_space_gpu(&test_gpus, va_space);
|
gpu = uvm_processor_mask_find_first_va_space_gpu(test_gpus, va_space);
|
||||||
gpu2 = uvm_processor_mask_find_next_va_space_gpu(&test_gpus, va_space, gpu);
|
gpu2 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu);
|
||||||
gpu3 = uvm_processor_mask_find_next_va_space_gpu(&test_gpus, va_space, gpu2);
|
gpu3 = uvm_processor_mask_find_next_va_space_gpu(test_gpus, va_space, gpu2);
|
||||||
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
|
TEST_NV_CHECK_GOTO(test_cpu_chunk_mapping_array(gpu, gpu2, gpu3), done);
|
||||||
}
|
}
|
||||||
|
|
||||||
done:
|
done:
|
||||||
uvm_va_space_up_read(va_space);
|
uvm_va_space_up_read(va_space);
|
||||||
|
uvm_processor_mask_cache_free(test_gpus);
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
@ -720,7 +720,6 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
|
|||||||
uvm_page_mask_t *break_read_duplication_pages = &va_block_context->caller_page_mask;
|
uvm_page_mask_t *break_read_duplication_pages = &va_block_context->caller_page_mask;
|
||||||
const uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
const uvm_va_policy_t *policy = uvm_va_range_get_policy(va_block->va_range);
|
||||||
uvm_processor_id_t preferred_location = policy->preferred_location;
|
uvm_processor_id_t preferred_location = policy->preferred_location;
|
||||||
uvm_processor_mask_t accessed_by = policy->accessed_by;
|
|
||||||
|
|
||||||
uvm_assert_mutex_locked(&va_block->lock);
|
uvm_assert_mutex_locked(&va_block->lock);
|
||||||
|
|
||||||
@ -779,7 +778,7 @@ static NV_STATUS va_block_unset_read_duplication_locked(uvm_va_block_t *va_block
|
|||||||
}
|
}
|
||||||
|
|
||||||
// 2- Re-establish SetAccessedBy mappings
|
// 2- Re-establish SetAccessedBy mappings
|
||||||
for_each_id_in_mask(processor_id, &accessed_by) {
|
for_each_id_in_mask(processor_id, &policy->accessed_by) {
|
||||||
status = uvm_va_block_set_accessed_by_locked(va_block,
|
status = uvm_va_block_set_accessed_by_locked(va_block,
|
||||||
va_block_context,
|
va_block_context,
|
||||||
processor_id,
|
processor_id,
|
||||||
|
@ -25,6 +25,7 @@
|
|||||||
#include "uvm_forward_decl.h"
|
#include "uvm_forward_decl.h"
|
||||||
#include "uvm_push.h"
|
#include "uvm_push.h"
|
||||||
#include "uvm_channel.h"
|
#include "uvm_channel.h"
|
||||||
|
#include "uvm_global.h"
|
||||||
#include "uvm_hal.h"
|
#include "uvm_hal.h"
|
||||||
#include "uvm_kvmalloc.h"
|
#include "uvm_kvmalloc.h"
|
||||||
#include "uvm_linux.h"
|
#include "uvm_linux.h"
|
||||||
@ -55,6 +56,13 @@ static uvm_push_acquire_info_t *push_acquire_info_from_push(uvm_push_t *push)
|
|||||||
return &channel->push_acquire_infos[push->push_info_index];
|
return &channel->push_acquire_infos[push->push_info_index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool uvm_push_allow_dependencies_across_gpus(void)
|
||||||
|
{
|
||||||
|
// In Confidential Computing a GPU semaphore release cannot be waited on
|
||||||
|
// (acquired by) any other GPU, due to a mix of HW and SW constraints.
|
||||||
|
return !g_uvm_global.conf_computing_enabled;
|
||||||
|
}
|
||||||
|
|
||||||
// Acquire a single tracker entry. Subsequently pushed GPU work will not start
|
// Acquire a single tracker entry. Subsequently pushed GPU work will not start
|
||||||
// before the work tracked by tracker entry is complete.
|
// before the work tracked by tracker entry is complete.
|
||||||
static void push_acquire_tracker_entry(uvm_push_t *push,
|
static void push_acquire_tracker_entry(uvm_push_t *push,
|
||||||
@ -77,9 +85,14 @@ static void push_acquire_tracker_entry(uvm_push_t *push,
|
|||||||
if (channel == entry_channel)
|
if (channel == entry_channel)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
semaphore_va = uvm_channel_tracking_semaphore_get_gpu_va_in_channel(entry_channel, channel);
|
|
||||||
gpu = uvm_channel_get_gpu(channel);
|
gpu = uvm_channel_get_gpu(channel);
|
||||||
|
|
||||||
|
// If dependencies across GPUs are disallowed, the caller is required to
|
||||||
|
// previously wait on such dependencies.
|
||||||
|
if (gpu != uvm_tracker_entry_gpu(tracker_entry))
|
||||||
|
UVM_ASSERT(uvm_push_allow_dependencies_across_gpus());
|
||||||
|
|
||||||
|
semaphore_va = uvm_channel_tracking_semaphore_get_gpu_va_in_channel(entry_channel, channel);
|
||||||
gpu->parent->host_hal->semaphore_acquire(push, semaphore_va, (NvU32)tracker_entry->value);
|
gpu->parent->host_hal->semaphore_acquire(push, semaphore_va, (NvU32)tracker_entry->value);
|
||||||
|
|
||||||
if (push_acquire_info) {
|
if (push_acquire_info) {
|
||||||
@ -188,6 +201,17 @@ static void push_fill_info(uvm_push_t *push,
|
|||||||
push_set_description(push, format, args);
|
push_set_description(push, format, args);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static NV_STATUS wait_for_other_gpus_if_needed(uvm_tracker_t *tracker, uvm_gpu_t *gpu)
|
||||||
|
{
|
||||||
|
if (tracker == NULL)
|
||||||
|
return NV_OK;
|
||||||
|
|
||||||
|
if (uvm_push_allow_dependencies_across_gpus())
|
||||||
|
return NV_OK;
|
||||||
|
|
||||||
|
return uvm_tracker_wait_for_other_gpus(tracker, gpu);
|
||||||
|
}
|
||||||
|
|
||||||
static NV_STATUS push_begin_acquire_with_info(uvm_channel_t *channel,
|
static NV_STATUS push_begin_acquire_with_info(uvm_channel_t *channel,
|
||||||
uvm_tracker_t *tracker,
|
uvm_tracker_t *tracker,
|
||||||
uvm_push_t *push,
|
uvm_push_t *push,
|
||||||
@ -234,6 +258,10 @@ NV_STATUS __uvm_push_begin_acquire_with_info(uvm_channel_manager_t *manager,
|
|||||||
UVM_ASSERT(dst_gpu != manager->gpu);
|
UVM_ASSERT(dst_gpu != manager->gpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status = wait_for_other_gpus_if_needed(tracker, manager->gpu);
|
||||||
|
if (status != NV_OK)
|
||||||
|
return status;
|
||||||
|
|
||||||
status = push_reserve_channel(manager, type, dst_gpu, &channel);
|
status = push_reserve_channel(manager, type, dst_gpu, &channel);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
return status;
|
||||||
@ -262,6 +290,10 @@ NV_STATUS __uvm_push_begin_acquire_on_channel_with_info(uvm_channel_t *channel,
|
|||||||
va_list args;
|
va_list args;
|
||||||
NV_STATUS status;
|
NV_STATUS status;
|
||||||
|
|
||||||
|
status = wait_for_other_gpus_if_needed(tracker, uvm_channel_get_gpu(channel));
|
||||||
|
if (status != NV_OK)
|
||||||
|
return status;
|
||||||
|
|
||||||
status = uvm_channel_reserve(channel, 1);
|
status = uvm_channel_reserve(channel, 1);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
return status;
|
||||||
@ -276,9 +308,8 @@ NV_STATUS __uvm_push_begin_acquire_on_channel_with_info(uvm_channel_t *channel,
|
|||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
__attribute__ ((format(printf, 7, 8)))
|
__attribute__ ((format(printf, 6, 7)))
|
||||||
NV_STATUS __uvm_push_begin_acquire_on_reserved_channel_with_info(uvm_channel_t *channel,
|
NV_STATUS __uvm_push_begin_on_reserved_channel_with_info(uvm_channel_t *channel,
|
||||||
uvm_tracker_t *tracker,
|
|
||||||
uvm_push_t *push,
|
uvm_push_t *push,
|
||||||
const char *filename,
|
const char *filename,
|
||||||
const char *function,
|
const char *function,
|
||||||
@ -289,7 +320,7 @@ NV_STATUS __uvm_push_begin_acquire_on_reserved_channel_with_info(uvm_channel_t *
|
|||||||
NV_STATUS status;
|
NV_STATUS status;
|
||||||
|
|
||||||
va_start(args, format);
|
va_start(args, format);
|
||||||
status = push_begin_acquire_with_info(channel, tracker, push, filename, function, line, format, args);
|
status = push_begin_acquire_with_info(channel, NULL, push, filename, function, line, format, args);
|
||||||
va_end(args);
|
va_end(args);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
@ -308,6 +339,7 @@ bool uvm_push_info_is_tracking_acquires(void)
|
|||||||
void uvm_push_end(uvm_push_t *push)
|
void uvm_push_end(uvm_push_t *push)
|
||||||
{
|
{
|
||||||
uvm_push_flag_t flag;
|
uvm_push_flag_t flag;
|
||||||
|
|
||||||
uvm_channel_end_push(push);
|
uvm_channel_end_push(push);
|
||||||
|
|
||||||
flag = find_first_bit(push->flags, UVM_PUSH_FLAG_COUNT);
|
flag = find_first_bit(push->flags, UVM_PUSH_FLAG_COUNT);
|
||||||
@ -319,6 +351,7 @@ void uvm_push_end(uvm_push_t *push)
|
|||||||
NV_STATUS uvm_push_wait(uvm_push_t *push)
|
NV_STATUS uvm_push_wait(uvm_push_t *push)
|
||||||
{
|
{
|
||||||
uvm_tracker_entry_t entry;
|
uvm_tracker_entry_t entry;
|
||||||
|
|
||||||
uvm_push_get_tracker_entry(push, &entry);
|
uvm_push_get_tracker_entry(push, &entry);
|
||||||
|
|
||||||
return uvm_tracker_wait_for_entry(&entry);
|
return uvm_tracker_wait_for_entry(&entry);
|
||||||
|
@ -208,9 +208,8 @@ NV_STATUS __uvm_push_begin_acquire_on_channel_with_info(uvm_channel_t *channel,
|
|||||||
const char *format, ...);
|
const char *format, ...);
|
||||||
|
|
||||||
// Internal helper for uvm_push_begin_on_reserved channel
|
// Internal helper for uvm_push_begin_on_reserved channel
|
||||||
__attribute__ ((format(printf, 7, 8)))
|
__attribute__ ((format(printf, 6, 7)))
|
||||||
NV_STATUS __uvm_push_begin_acquire_on_reserved_channel_with_info(uvm_channel_t *channel,
|
NV_STATUS __uvm_push_begin_on_reserved_channel_with_info(uvm_channel_t *channel,
|
||||||
uvm_tracker_t *tracker,
|
|
||||||
uvm_push_t *push,
|
uvm_push_t *push,
|
||||||
const char *filename,
|
const char *filename,
|
||||||
const char *function,
|
const char *function,
|
||||||
@ -270,7 +269,7 @@ NV_STATUS __uvm_push_begin_acquire_on_reserved_channel_with_info(uvm_channel_t *
|
|||||||
// Locking: on success acquires the concurrent push semaphore until
|
// Locking: on success acquires the concurrent push semaphore until
|
||||||
// uvm_push_end()
|
// uvm_push_end()
|
||||||
#define uvm_push_begin_on_reserved_channel(channel, push, format, ...) \
|
#define uvm_push_begin_on_reserved_channel(channel, push, format, ...) \
|
||||||
__uvm_push_begin_acquire_on_reserved_channel_with_info((channel), NULL, (push), \
|
__uvm_push_begin_on_reserved_channel_with_info((channel), (push), \
|
||||||
__FILE__, __FUNCTION__, __LINE__, (format), ##__VA_ARGS__)
|
__FILE__, __FUNCTION__, __LINE__, (format), ##__VA_ARGS__)
|
||||||
|
|
||||||
// Same as uvm_push_begin_on_channel except it also acquires the input tracker
|
// Same as uvm_push_begin_on_channel except it also acquires the input tracker
|
||||||
@ -324,6 +323,11 @@ static void uvm_push_get_tracker_entry(uvm_push_t *push, uvm_tracker_entry_t *en
|
|||||||
// Subsequently pushed GPU work will not start before all the work tracked by
|
// Subsequently pushed GPU work will not start before all the work tracked by
|
||||||
// tracker is complete.
|
// tracker is complete.
|
||||||
// Notably a NULL tracker is handled the same way as an empty tracker.
|
// Notably a NULL tracker is handled the same way as an empty tracker.
|
||||||
|
//
|
||||||
|
// If dependencies across GPUs are not allowed in the current configuration
|
||||||
|
// (see uvm_push_allow_dependencies_across_gpus), the caller is responsible for
|
||||||
|
// ensuring that the input tracker does not contain dependencies on GPUs other
|
||||||
|
// than the one associated with the push.
|
||||||
void uvm_push_acquire_tracker(uvm_push_t *push, uvm_tracker_t *tracker);
|
void uvm_push_acquire_tracker(uvm_push_t *push, uvm_tracker_t *tracker);
|
||||||
|
|
||||||
// Set a push flag
|
// Set a push flag
|
||||||
@ -480,4 +484,8 @@ static uvm_push_info_t *uvm_push_info_from_push(uvm_push_t *push)
|
|||||||
return &channel->push_infos[push->push_info_index];
|
return &channel->push_infos[push->push_info_index];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Returns true if a push is allowed to depend on pushes on other GPUs: work
|
||||||
|
// dependencies across GPUs are permitted.
|
||||||
|
bool uvm_push_allow_dependencies_across_gpus(void);
|
||||||
|
|
||||||
#endif // __UVM_PUSH_H__
|
#endif // __UVM_PUSH_H__
|
||||||
|
@ -182,7 +182,7 @@ static NV_STATUS uvm_range_group_va_range_migrate_block_locked(uvm_va_range_t *v
|
|||||||
NV_STATUS status;
|
NV_STATUS status;
|
||||||
NV_STATUS tracker_status;
|
NV_STATUS tracker_status;
|
||||||
uvm_gpu_id_t gpu_id;
|
uvm_gpu_id_t gpu_id;
|
||||||
uvm_processor_mask_t map_mask;
|
uvm_processor_mask_t *map_mask = &va_block_context->caller_processor_mask;
|
||||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
|
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
|
||||||
|
|
||||||
// Set the migration CPU NUMA node from the policy.
|
// Set the migration CPU NUMA node from the policy.
|
||||||
@ -212,6 +212,7 @@ static NV_STATUS uvm_range_group_va_range_migrate_block_locked(uvm_va_range_t *v
|
|||||||
NULL,
|
NULL,
|
||||||
UVM_MAKE_RESIDENT_CAUSE_API_SET_RANGE_GROUP);
|
UVM_MAKE_RESIDENT_CAUSE_API_SET_RANGE_GROUP);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
return status;
|
||||||
|
|
||||||
@ -228,12 +229,12 @@ static NV_STATUS uvm_range_group_va_range_migrate_block_locked(uvm_va_range_t *v
|
|||||||
goto out;
|
goto out;
|
||||||
|
|
||||||
// 2- Map faultable SetAccessedBy GPUs.
|
// 2- Map faultable SetAccessedBy GPUs.
|
||||||
uvm_processor_mask_and(&map_mask,
|
uvm_processor_mask_and(map_mask,
|
||||||
&uvm_va_range_get_policy(va_range)->accessed_by,
|
&uvm_va_range_get_policy(va_range)->accessed_by,
|
||||||
&va_range->va_space->can_access[uvm_id_value(policy->preferred_location)]);
|
&va_range->va_space->can_access[uvm_id_value(policy->preferred_location)]);
|
||||||
uvm_processor_mask_andnot(&map_mask, &map_mask, &va_range->uvm_lite_gpus);
|
uvm_processor_mask_andnot(map_mask, map_mask, &va_range->uvm_lite_gpus);
|
||||||
|
|
||||||
for_each_gpu_id_in_mask(gpu_id, &map_mask) {
|
for_each_gpu_id_in_mask(gpu_id, map_mask) {
|
||||||
status = uvm_va_block_add_mappings(va_block,
|
status = uvm_va_block_add_mappings(va_block,
|
||||||
va_block_context,
|
va_block_context,
|
||||||
gpu_id,
|
gpu_id,
|
||||||
|
@ -1538,12 +1538,18 @@ void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block,
|
|||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
const uvm_page_mask_t *page_mask)
|
const uvm_page_mask_t *page_mask)
|
||||||
{
|
{
|
||||||
|
uvm_processor_mask_t *resident_processors;
|
||||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||||
|
|
||||||
if (!va_space->tools.enabled)
|
if (!va_space->tools.enabled)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
resident_processors = uvm_processor_mask_cache_alloc();
|
||||||
|
if (!resident_processors)
|
||||||
|
return;
|
||||||
|
|
||||||
uvm_down_read(&va_space->tools.lock);
|
uvm_down_read(&va_space->tools.lock);
|
||||||
|
|
||||||
if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicate, UvmToolsEventQueueVersion_V1)) {
|
if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicate, UvmToolsEventQueueVersion_V1)) {
|
||||||
UvmEventEntry_V1 entry;
|
UvmEventEntry_V1 entry;
|
||||||
UvmEventReadDuplicateInfo_V1 *info_read_duplicate = &entry.eventData.readDuplicate;
|
UvmEventReadDuplicateInfo_V1 *info_read_duplicate = &entry.eventData.readDuplicate;
|
||||||
@ -1556,20 +1562,20 @@ void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block,
|
|||||||
info_read_duplicate->timeStamp = NV_GETTIME();
|
info_read_duplicate->timeStamp = NV_GETTIME();
|
||||||
|
|
||||||
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
||||||
uvm_processor_mask_t resident_processors;
|
|
||||||
uvm_processor_id_t id;
|
uvm_processor_id_t id;
|
||||||
|
|
||||||
info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index);
|
info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index);
|
||||||
info_read_duplicate->processors = 0;
|
info_read_duplicate->processors = 0;
|
||||||
|
|
||||||
uvm_va_block_page_resident_processors(va_block, page_index, &resident_processors);
|
uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
|
||||||
for_each_id_in_mask(id, &resident_processors)
|
|
||||||
__set_bit(uvm_parent_id_value_from_processor_id(id),
|
for_each_id_in_mask(id, resident_processors)
|
||||||
(unsigned long *)&info_read_duplicate->processors);
|
__set_bit(uvm_parent_id_value_from_processor_id(id), (unsigned long *)&info_read_duplicate->processors);
|
||||||
|
|
||||||
uvm_tools_record_event_v1(va_space, &entry);
|
uvm_tools_record_event_v1(va_space, &entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicate, UvmToolsEventQueueVersion_V2)) {
|
if (tools_is_event_enabled_version(va_space, UvmEventTypeReadDuplicate, UvmToolsEventQueueVersion_V2)) {
|
||||||
UvmEventEntry_V2 entry;
|
UvmEventEntry_V2 entry;
|
||||||
UvmEventReadDuplicateInfo_V2 *info_read_duplicate = &entry.eventData.readDuplicate;
|
UvmEventReadDuplicateInfo_V2 *info_read_duplicate = &entry.eventData.readDuplicate;
|
||||||
@ -1582,21 +1588,23 @@ void uvm_tools_record_read_duplicate(uvm_va_block_t *va_block,
|
|||||||
info_read_duplicate->timeStamp = NV_GETTIME();
|
info_read_duplicate->timeStamp = NV_GETTIME();
|
||||||
|
|
||||||
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
for_each_va_block_page_in_region_mask(page_index, page_mask, region) {
|
||||||
uvm_processor_mask_t resident_processors;
|
|
||||||
uvm_processor_id_t id;
|
uvm_processor_id_t id;
|
||||||
|
|
||||||
info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index);
|
info_read_duplicate->address = uvm_va_block_cpu_page_address(va_block, page_index);
|
||||||
memset(info_read_duplicate->processors, 0, sizeof(info_read_duplicate->processors));
|
memset(info_read_duplicate->processors, 0, sizeof(info_read_duplicate->processors));
|
||||||
|
|
||||||
uvm_va_block_page_resident_processors(va_block, page_index, &resident_processors);
|
uvm_va_block_page_resident_processors(va_block, page_index, resident_processors);
|
||||||
for_each_id_in_mask(id, &resident_processors)
|
|
||||||
__set_bit(uvm_id_value(id),
|
for_each_id_in_mask(id, resident_processors)
|
||||||
(unsigned long *)info_read_duplicate->processors);
|
__set_bit(uvm_id_value(id), (unsigned long *)info_read_duplicate->processors);
|
||||||
|
|
||||||
uvm_tools_record_event_v2(va_space, &entry);
|
uvm_tools_record_event_v2(va_space, &entry);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
uvm_up_read(&va_space->tools.lock);
|
uvm_up_read(&va_space->tools.lock);
|
||||||
|
|
||||||
|
uvm_processor_mask_cache_free(resident_processors);
|
||||||
}
|
}
|
||||||
|
|
||||||
void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
|
void uvm_tools_record_read_duplicate_invalidate(uvm_va_block_t *va_block,
|
||||||
|
@ -200,18 +200,26 @@ NV_STATUS uvm_tracker_add_tracker(uvm_tracker_t *dst, uvm_tracker_t *src)
|
|||||||
NV_STATUS status;
|
NV_STATUS status;
|
||||||
uvm_tracker_entry_t *src_entry;
|
uvm_tracker_entry_t *src_entry;
|
||||||
|
|
||||||
|
UVM_ASSERT(dst != NULL);
|
||||||
|
|
||||||
|
if (src == NULL)
|
||||||
|
return NV_OK;
|
||||||
|
|
||||||
if (src == dst)
|
if (src == dst)
|
||||||
return NV_OK;
|
return NV_OK;
|
||||||
|
|
||||||
|
if (uvm_tracker_is_empty(src))
|
||||||
|
return NV_OK;
|
||||||
|
|
||||||
status = uvm_tracker_reserve(dst, src->size);
|
status = uvm_tracker_reserve(dst, src->size);
|
||||||
if (status == NV_ERR_NO_MEMORY) {
|
if (status == NV_ERR_NO_MEMORY) {
|
||||||
uvm_tracker_remove_completed(dst);
|
uvm_tracker_remove_completed(dst);
|
||||||
uvm_tracker_remove_completed(src);
|
uvm_tracker_remove_completed(src);
|
||||||
status = reserve_for_entries_from_tracker(dst, src);
|
status = reserve_for_entries_from_tracker(dst, src);
|
||||||
}
|
}
|
||||||
if (status != NV_OK) {
|
|
||||||
|
if (status != NV_OK)
|
||||||
return status;
|
return status;
|
||||||
}
|
|
||||||
|
|
||||||
for_each_tracker_entry(src_entry, src) {
|
for_each_tracker_entry(src_entry, src) {
|
||||||
status = uvm_tracker_add_entry(dst, src_entry);
|
status = uvm_tracker_add_entry(dst, src_entry);
|
||||||
|
@ -9341,35 +9341,43 @@ void uvm_va_block_unmap_preferred_location_uvm_lite(uvm_va_block_t *va_block, uv
|
|||||||
//
|
//
|
||||||
// Notably the caller needs to support allocation-retry as
|
// Notably the caller needs to support allocation-retry as
|
||||||
// uvm_va_block_migrate_locked() requires that.
|
// uvm_va_block_migrate_locked() requires that.
|
||||||
static NV_STATUS block_evict_pages_from_gpu(uvm_va_block_t *va_block,
|
static NV_STATUS block_evict_pages_from_gpu(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm)
|
||||||
uvm_va_block_context_t *va_block_context,
|
|
||||||
uvm_gpu_t *gpu)
|
|
||||||
{
|
{
|
||||||
NV_STATUS status = NV_OK;
|
NV_STATUS status = NV_OK;
|
||||||
const uvm_page_mask_t *resident = uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE);
|
const uvm_page_mask_t *resident = uvm_va_block_resident_mask_get(va_block, gpu->id, NUMA_NO_NODE);
|
||||||
uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
|
uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
|
||||||
uvm_va_block_region_t subregion;
|
uvm_va_block_region_t subregion;
|
||||||
|
uvm_service_block_context_t *service_context;
|
||||||
|
|
||||||
|
service_context = uvm_service_block_context_alloc(mm);
|
||||||
|
if (!service_context)
|
||||||
|
return NV_ERR_NO_MEMORY;
|
||||||
|
|
||||||
// Move all subregions resident on the GPU to the CPU
|
// Move all subregions resident on the GPU to the CPU
|
||||||
for_each_va_block_subregion_in_mask(subregion, resident, region) {
|
for_each_va_block_subregion_in_mask(subregion, resident, region) {
|
||||||
if (uvm_va_block_is_hmm(va_block)) {
|
if (uvm_va_block_is_hmm(va_block)) {
|
||||||
status = uvm_hmm_va_block_evict_pages_from_gpu(va_block, gpu, va_block_context, resident, subregion);
|
status = uvm_hmm_va_block_evict_pages_from_gpu(va_block, gpu, service_context, resident, subregion);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
status = uvm_va_block_migrate_locked(va_block,
|
status = uvm_va_block_migrate_locked(va_block,
|
||||||
NULL,
|
NULL,
|
||||||
va_block_context,
|
service_context,
|
||||||
subregion,
|
subregion,
|
||||||
UVM_ID_CPU,
|
UVM_ID_CPU,
|
||||||
UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP,
|
UVM_MIGRATE_MODE_MAKE_RESIDENT_AND_MAP,
|
||||||
NULL);
|
NULL);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (status == NV_OK)
|
||||||
UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, gpu->id));
|
UVM_ASSERT(!uvm_processor_mask_test(&va_block->resident, gpu->id));
|
||||||
return NV_OK;
|
|
||||||
|
uvm_service_block_context_free(service_context);
|
||||||
|
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
void uvm_va_block_unregister_gpu_locked(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm)
|
void uvm_va_block_unregister_gpu_locked(uvm_va_block_t *va_block, uvm_gpu_t *gpu, struct mm_struct *mm)
|
||||||
@ -9393,7 +9401,7 @@ void uvm_va_block_unregister_gpu_locked(uvm_va_block_t *va_block, uvm_gpu_t *gpu
|
|||||||
// we don't rely on any state of the block across the call.
|
// we don't rely on any state of the block across the call.
|
||||||
// TODO: Bug 4494289: Prevent setting the global error on allocation
|
// TODO: Bug 4494289: Prevent setting the global error on allocation
|
||||||
// failures.
|
// failures.
|
||||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, NULL, block_evict_pages_from_gpu(va_block, va_block_context, gpu));
|
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block, NULL, block_evict_pages_from_gpu(va_block, gpu, mm));
|
||||||
if (status != NV_OK) {
|
if (status != NV_OK) {
|
||||||
UVM_ERR_PRINT("Failed to evict GPU pages on GPU unregister: %s, GPU %s\n",
|
UVM_ERR_PRINT("Failed to evict GPU pages on GPU unregister: %s, GPU %s\n",
|
||||||
nvstatusToString(status),
|
nvstatusToString(status),
|
||||||
@ -12981,6 +12989,7 @@ NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
|||||||
uvm_va_block_region_t chunk_region;
|
uvm_va_block_region_t chunk_region;
|
||||||
size_t num_gpu_chunks = block_num_gpu_chunks(va_block, gpu);
|
size_t num_gpu_chunks = block_num_gpu_chunks(va_block, gpu);
|
||||||
size_t chunks_to_evict = 0;
|
size_t chunks_to_evict = 0;
|
||||||
|
uvm_service_block_context_t *service_context;
|
||||||
uvm_va_block_context_t *block_context;
|
uvm_va_block_context_t *block_context;
|
||||||
uvm_page_mask_t *pages_to_evict;
|
uvm_page_mask_t *pages_to_evict;
|
||||||
uvm_va_block_test_t *va_block_test = uvm_va_block_get_test(va_block);
|
uvm_va_block_test_t *va_block_test = uvm_va_block_get_test(va_block);
|
||||||
@ -13008,13 +13017,17 @@ NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
|||||||
// allocations. If mappings need to be created,
|
// allocations. If mappings need to be created,
|
||||||
// block_add_eviction_mappings() will be scheduled below.
|
// block_add_eviction_mappings() will be scheduled below.
|
||||||
mm = uvm_va_space_mm_retain(va_space);
|
mm = uvm_va_space_mm_retain(va_space);
|
||||||
block_context = uvm_va_block_context_alloc(mm);
|
|
||||||
if (!block_context) {
|
service_context = uvm_service_block_context_alloc(mm);
|
||||||
|
if (!service_context) {
|
||||||
if (mm)
|
if (mm)
|
||||||
uvm_va_space_mm_release(va_space);
|
uvm_va_space_mm_release(va_space);
|
||||||
|
|
||||||
return NV_ERR_NO_MEMORY;
|
return NV_ERR_NO_MEMORY;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
block_context = service_context->block_context;
|
||||||
|
|
||||||
pages_to_evict = &block_context->caller_page_mask;
|
pages_to_evict = &block_context->caller_page_mask;
|
||||||
uvm_page_mask_zero(pages_to_evict);
|
uvm_page_mask_zero(pages_to_evict);
|
||||||
chunk_region.outer = 0;
|
chunk_region.outer = 0;
|
||||||
@ -13051,7 +13064,7 @@ NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
|||||||
|
|
||||||
if (uvm_va_block_is_hmm(va_block)) {
|
if (uvm_va_block_is_hmm(va_block)) {
|
||||||
status = uvm_hmm_va_block_evict_chunks(va_block,
|
status = uvm_hmm_va_block_evict_chunks(va_block,
|
||||||
block_context,
|
service_context,
|
||||||
pages_to_evict,
|
pages_to_evict,
|
||||||
uvm_va_block_region_from_block(va_block),
|
uvm_va_block_region_from_block(va_block),
|
||||||
&accessed_by_set);
|
&accessed_by_set);
|
||||||
@ -13168,7 +13181,8 @@ NV_STATUS uvm_va_block_evict_chunks(uvm_va_block_t *va_block,
|
|||||||
}
|
}
|
||||||
|
|
||||||
out:
|
out:
|
||||||
uvm_va_block_context_free(block_context);
|
uvm_service_block_context_free(service_context);
|
||||||
|
|
||||||
if (mm)
|
if (mm)
|
||||||
uvm_va_space_mm_release(va_space);
|
uvm_va_space_mm_release(va_space);
|
||||||
|
|
||||||
|
@ -1504,17 +1504,19 @@ uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu
|
|||||||
// The caller needs to handle allocation-retry. va_block_retry can be NULL if
|
// The caller needs to handle allocation-retry. va_block_retry can be NULL if
|
||||||
// the destination is the CPU.
|
// the destination is the CPU.
|
||||||
//
|
//
|
||||||
// va_block_context must not be NULL and policy for the region must match. See
|
// service_context and service_context->block_context must not be NULL and
|
||||||
// the comments for uvm_va_block_check_policy_is_valid(). If va_block is a HMM
|
// policy for the region must match. See the comments for
|
||||||
// block, va_block_context->hmm.vma must be valid. See the comments for
|
// uvm_va_block_check_policy_is_valid(). If va_block is a HMM block,
|
||||||
|
// service->block_context->hmm.vma must be valid. See the comments for
|
||||||
// uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
// uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||||
//
|
//
|
||||||
// LOCKING: The caller must hold the va_block lock. If va_block_context->mm !=
|
// LOCKING: The caller must hold the va_block lock. If
|
||||||
// NULL, va_block_context->mm->mmap_lock must be held in at least
|
// service_context->va_block_context->mm != NULL,
|
||||||
// read mode.
|
// service_context->va_block_context->mm->mmap_lock must be held in at
|
||||||
|
// least read mode.
|
||||||
NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
NV_STATUS uvm_va_block_migrate_locked(uvm_va_block_t *va_block,
|
||||||
uvm_va_block_retry_t *va_block_retry,
|
uvm_va_block_retry_t *va_block_retry,
|
||||||
uvm_va_block_context_t *va_block_context,
|
uvm_service_block_context_t *service_context,
|
||||||
uvm_va_block_region_t region,
|
uvm_va_block_region_t region,
|
||||||
uvm_processor_id_t dest_id,
|
uvm_processor_id_t dest_id,
|
||||||
uvm_migrate_mode_t mode,
|
uvm_migrate_mode_t mode,
|
||||||
|
@ -167,6 +167,10 @@ typedef struct
|
|||||||
// block APIs.
|
// block APIs.
|
||||||
uvm_page_mask_t caller_page_mask;
|
uvm_page_mask_t caller_page_mask;
|
||||||
|
|
||||||
|
// Available as scratch space for the caller. Not used by any of the VA
|
||||||
|
// block APIs.
|
||||||
|
uvm_processor_mask_t caller_processor_mask;
|
||||||
|
|
||||||
// Available as scratch space for the internal APIs. This is like a caller-
|
// Available as scratch space for the internal APIs. This is like a caller-
|
||||||
// save register: it shouldn't be used across function calls which also take
|
// save register: it shouldn't be used across function calls which also take
|
||||||
// this block_context.
|
// this block_context.
|
||||||
@ -180,9 +184,15 @@ typedef struct
|
|||||||
// this va_block_context.
|
// this va_block_context.
|
||||||
uvm_processor_mask_t scratch_processor_mask;
|
uvm_processor_mask_t scratch_processor_mask;
|
||||||
|
|
||||||
// Temporary mask in block_add_eviction_mappings().
|
// Temporary mask used in block_add_eviction_mappings().
|
||||||
uvm_processor_mask_t map_processors_eviction;
|
uvm_processor_mask_t map_processors_eviction;
|
||||||
|
|
||||||
|
// Temporary mask used in uvm_perf_thrashing_unmap_remote_pinned_pages_all.
|
||||||
|
uvm_processor_mask_t unmap_processors_mask;
|
||||||
|
|
||||||
|
// Temporary mask used in thrashing_processors_have_fast_access().
|
||||||
|
uvm_processor_mask_t fast_access_mask;
|
||||||
|
|
||||||
// State used by uvm_va_block_make_resident
|
// State used by uvm_va_block_make_resident
|
||||||
struct uvm_make_resident_context_struct
|
struct uvm_make_resident_context_struct
|
||||||
{
|
{
|
||||||
|
@ -222,6 +222,7 @@ NV_STATUS uvm_va_range_create_external(uvm_va_space_t *va_space,
|
|||||||
{
|
{
|
||||||
NV_STATUS status;
|
NV_STATUS status;
|
||||||
uvm_va_range_t *va_range = NULL;
|
uvm_va_range_t *va_range = NULL;
|
||||||
|
uvm_processor_mask_t *retained_mask = NULL;
|
||||||
NvU32 i;
|
NvU32 i;
|
||||||
|
|
||||||
status = uvm_va_range_alloc_reclaim(va_space,
|
status = uvm_va_range_alloc_reclaim(va_space,
|
||||||
@ -233,6 +234,16 @@ NV_STATUS uvm_va_range_create_external(uvm_va_space_t *va_space,
|
|||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
return status;
|
||||||
|
|
||||||
|
UVM_ASSERT(!va_range->external.retained_mask);
|
||||||
|
|
||||||
|
retained_mask = uvm_processor_mask_cache_alloc();
|
||||||
|
if (!retained_mask) {
|
||||||
|
status = NV_ERR_NO_MEMORY;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
|
||||||
|
va_range->external.retained_mask = retained_mask;
|
||||||
|
|
||||||
for (i = 0; i < ARRAY_SIZE(va_range->external.gpu_ranges); i++) {
|
for (i = 0; i < ARRAY_SIZE(va_range->external.gpu_ranges); i++) {
|
||||||
uvm_mutex_init(&va_range->external.gpu_ranges[i].lock, UVM_LOCK_ORDER_EXT_RANGE_TREE);
|
uvm_mutex_init(&va_range->external.gpu_ranges[i].lock, UVM_LOCK_ORDER_EXT_RANGE_TREE);
|
||||||
uvm_range_tree_init(&va_range->external.gpu_ranges[i].tree);
|
uvm_range_tree_init(&va_range->external.gpu_ranges[i].tree);
|
||||||
@ -249,6 +260,7 @@ NV_STATUS uvm_va_range_create_external(uvm_va_space_t *va_space,
|
|||||||
|
|
||||||
error:
|
error:
|
||||||
uvm_va_range_destroy(va_range, NULL);
|
uvm_va_range_destroy(va_range, NULL);
|
||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -438,6 +450,8 @@ static void uvm_va_range_destroy_external(uvm_va_range_t *va_range, struct list_
|
|||||||
{
|
{
|
||||||
uvm_gpu_t *gpu;
|
uvm_gpu_t *gpu;
|
||||||
|
|
||||||
|
uvm_processor_mask_cache_free(va_range->external.retained_mask);
|
||||||
|
|
||||||
if (uvm_processor_mask_empty(&va_range->external.mapped_gpus))
|
if (uvm_processor_mask_empty(&va_range->external.mapped_gpus))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
@ -1318,7 +1332,6 @@ static NV_STATUS range_unmap_mask(uvm_va_range_t *va_range,
|
|||||||
if (uvm_processor_mask_empty(mask))
|
if (uvm_processor_mask_empty(mask))
|
||||||
return NV_OK;
|
return NV_OK;
|
||||||
|
|
||||||
|
|
||||||
for_each_va_block_in_va_range(va_range, block) {
|
for_each_va_block_in_va_range(va_range, block) {
|
||||||
NV_STATUS status;
|
NV_STATUS status;
|
||||||
uvm_va_block_region_t region = uvm_va_block_region_from_block(block);
|
uvm_va_block_region_t region = uvm_va_block_region_from_block(block);
|
||||||
@ -1338,14 +1351,19 @@ static NV_STATUS range_unmap_mask(uvm_va_range_t *va_range,
|
|||||||
|
|
||||||
static NV_STATUS range_unmap(uvm_va_range_t *va_range, uvm_processor_id_t processor, uvm_tracker_t *out_tracker)
|
static NV_STATUS range_unmap(uvm_va_range_t *va_range, uvm_processor_id_t processor, uvm_tracker_t *out_tracker)
|
||||||
{
|
{
|
||||||
uvm_processor_mask_t mask;
|
uvm_processor_mask_t *mask;
|
||||||
|
uvm_va_space_t *va_space = va_range->va_space;
|
||||||
|
|
||||||
|
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||||
|
|
||||||
|
mask = &va_space->unmap_mask;
|
||||||
|
|
||||||
UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_MANAGED, "type 0x%x\n", va_range->type);
|
UVM_ASSERT_MSG(va_range->type == UVM_VA_RANGE_TYPE_MANAGED, "type 0x%x\n", va_range->type);
|
||||||
|
|
||||||
uvm_processor_mask_zero(&mask);
|
uvm_processor_mask_zero(mask);
|
||||||
uvm_processor_mask_set(&mask, processor);
|
uvm_processor_mask_set(mask, processor);
|
||||||
|
|
||||||
return range_unmap_mask(va_range, &mask, out_tracker);
|
return range_unmap_mask(va_range, mask, out_tracker);
|
||||||
}
|
}
|
||||||
|
|
||||||
static NV_STATUS range_map_uvm_lite_gpus(uvm_va_range_t *va_range, uvm_tracker_t *out_tracker)
|
static NV_STATUS range_map_uvm_lite_gpus(uvm_va_range_t *va_range, uvm_tracker_t *out_tracker)
|
||||||
@ -1434,10 +1452,10 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
|
|||||||
struct mm_struct *mm,
|
struct mm_struct *mm,
|
||||||
uvm_tracker_t *out_tracker)
|
uvm_tracker_t *out_tracker)
|
||||||
{
|
{
|
||||||
NV_STATUS status;
|
NV_STATUS status = NV_OK;
|
||||||
uvm_processor_mask_t all_uvm_lite_gpus;
|
uvm_processor_mask_t *all_uvm_lite_gpus = NULL;
|
||||||
uvm_processor_mask_t new_uvm_lite_gpus;
|
uvm_processor_mask_t *new_uvm_lite_gpus = NULL;
|
||||||
uvm_processor_mask_t set_accessed_by_processors;
|
uvm_processor_mask_t *set_accessed_by_processors = NULL;
|
||||||
uvm_range_group_range_iter_t iter;
|
uvm_range_group_range_iter_t iter;
|
||||||
uvm_range_group_range_t *rgr = NULL;
|
uvm_range_group_range_t *rgr = NULL;
|
||||||
uvm_va_space_t *va_space = va_range->va_space;
|
uvm_va_space_t *va_space = va_range->va_space;
|
||||||
@ -1448,9 +1466,27 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
|
|||||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||||
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
UVM_ASSERT(va_range->type == UVM_VA_RANGE_TYPE_MANAGED);
|
||||||
|
|
||||||
|
all_uvm_lite_gpus = uvm_processor_mask_cache_alloc();
|
||||||
|
if (!all_uvm_lite_gpus) {
|
||||||
|
status = NV_ERR_NO_MEMORY;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
new_uvm_lite_gpus = uvm_processor_mask_cache_alloc();
|
||||||
|
if (!new_uvm_lite_gpus) {
|
||||||
|
status = NV_ERR_NO_MEMORY;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
|
set_accessed_by_processors = uvm_processor_mask_cache_alloc();
|
||||||
|
if (!set_accessed_by_processors) {
|
||||||
|
status = NV_ERR_NO_MEMORY;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
va_range_policy = uvm_va_range_get_policy(va_range);
|
va_range_policy = uvm_va_range_get_policy(va_range);
|
||||||
if (uvm_va_policy_preferred_location_equal(va_range_policy, preferred_location, preferred_cpu_nid))
|
if (uvm_va_policy_preferred_location_equal(va_range_policy, preferred_location, preferred_cpu_nid))
|
||||||
return NV_OK;
|
goto out;
|
||||||
|
|
||||||
// Mark all range group ranges within this VA range as migrated since the preferred location has changed.
|
// Mark all range group ranges within this VA range as migrated since the preferred location has changed.
|
||||||
uvm_range_group_for_each_range_in(rgr, va_space, va_range->node.start, va_range->node.end) {
|
uvm_range_group_for_each_range_in(rgr, va_space, va_range->node.start, va_range->node.end) {
|
||||||
@ -1463,14 +1499,15 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
|
|||||||
// Calculate the new UVM-Lite GPUs mask, but don't update va_range state so
|
// Calculate the new UVM-Lite GPUs mask, but don't update va_range state so
|
||||||
// that we can keep block_page_check_mappings() happy while updating the
|
// that we can keep block_page_check_mappings() happy while updating the
|
||||||
// mappings.
|
// mappings.
|
||||||
calc_uvm_lite_gpus_mask(va_space, preferred_location, &va_range_policy->accessed_by, &new_uvm_lite_gpus);
|
calc_uvm_lite_gpus_mask(va_space, preferred_location, &va_range_policy->accessed_by, new_uvm_lite_gpus);
|
||||||
|
|
||||||
// If the range contains non-migratable range groups, check that new UVM-Lite GPUs
|
// If the range contains non-migratable range groups, check that new UVM-Lite GPUs
|
||||||
// can all map the new preferred location.
|
// can all map the new preferred location.
|
||||||
if (!uvm_range_group_all_migratable(va_space, va_range->node.start, va_range->node.end) &&
|
if (!uvm_range_group_all_migratable(va_space, va_range->node.start, va_range->node.end) &&
|
||||||
UVM_ID_IS_VALID(preferred_location) &&
|
UVM_ID_IS_VALID(preferred_location) &&
|
||||||
!uvm_processor_mask_subset(&new_uvm_lite_gpus, &va_space->accessible_from[uvm_id_value(preferred_location)])) {
|
!uvm_processor_mask_subset(new_uvm_lite_gpus, &va_space->accessible_from[uvm_id_value(preferred_location)])) {
|
||||||
return NV_ERR_INVALID_DEVICE;
|
status = NV_ERR_INVALID_DEVICE;
|
||||||
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (UVM_ID_IS_INVALID(preferred_location)) {
|
if (UVM_ID_IS_INVALID(preferred_location)) {
|
||||||
@ -1479,7 +1516,7 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
|
|||||||
// Clear the range group assocation for any unmigratable ranges if there is no preferred location
|
// Clear the range group assocation for any unmigratable ranges if there is no preferred location
|
||||||
status = uvm_range_group_assign_range(va_space, NULL, iter.start, iter.end);
|
status = uvm_range_group_assign_range(va_space, NULL, iter.start, iter.end);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
goto out;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -1489,33 +1526,33 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
|
|||||||
// have stale mappings to the old preferred location.
|
// have stale mappings to the old preferred location.
|
||||||
// - GPUs that will continue to be UVM-Lite GPUs or are new UVM-Lite GPUs
|
// - GPUs that will continue to be UVM-Lite GPUs or are new UVM-Lite GPUs
|
||||||
// need to be unmapped so that the new preferred location can be mapped.
|
// need to be unmapped so that the new preferred location can be mapped.
|
||||||
uvm_processor_mask_or(&all_uvm_lite_gpus, &va_range->uvm_lite_gpus, &new_uvm_lite_gpus);
|
uvm_processor_mask_or(all_uvm_lite_gpus, &va_range->uvm_lite_gpus, new_uvm_lite_gpus);
|
||||||
status = range_unmap_mask(va_range, &all_uvm_lite_gpus, out_tracker);
|
status = range_unmap_mask(va_range, all_uvm_lite_gpus, out_tracker);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
goto out;
|
||||||
|
|
||||||
// GPUs that stop being UVM-Lite, but are in the accessed_by mask need to
|
// GPUs that stop being UVM-Lite, but are in the accessed_by mask need to
|
||||||
// have any possible mappings established.
|
// have any possible mappings established.
|
||||||
uvm_processor_mask_andnot(&set_accessed_by_processors, &va_range->uvm_lite_gpus, &new_uvm_lite_gpus);
|
uvm_processor_mask_andnot(set_accessed_by_processors, &va_range->uvm_lite_gpus, new_uvm_lite_gpus);
|
||||||
|
|
||||||
// A GPU which had been in UVM-Lite mode before must still be in UVM-Lite
|
// A GPU which had been in UVM-Lite mode before must still be in UVM-Lite
|
||||||
// mode if it is the new preferred location. Otherwise we'd have to be more
|
// mode if it is the new preferred location. Otherwise we'd have to be more
|
||||||
// careful below to not establish remote mappings to the new preferred
|
// careful below to not establish remote mappings to the new preferred
|
||||||
// location.
|
// location.
|
||||||
if (UVM_ID_IS_GPU(preferred_location))
|
if (UVM_ID_IS_GPU(preferred_location))
|
||||||
UVM_ASSERT(!uvm_processor_mask_test(&set_accessed_by_processors, preferred_location));
|
UVM_ASSERT(!uvm_processor_mask_test(set_accessed_by_processors, preferred_location));
|
||||||
|
|
||||||
// The old preferred location should establish new remote mappings if it has
|
// The old preferred location should establish new remote mappings if it has
|
||||||
// accessed-by set.
|
// accessed-by set.
|
||||||
if (UVM_ID_IS_VALID(va_range_policy->preferred_location))
|
if (UVM_ID_IS_VALID(va_range_policy->preferred_location))
|
||||||
uvm_processor_mask_set(&set_accessed_by_processors, va_range_policy->preferred_location);
|
uvm_processor_mask_set(set_accessed_by_processors, va_range_policy->preferred_location);
|
||||||
|
|
||||||
uvm_processor_mask_and(&set_accessed_by_processors, &set_accessed_by_processors, &va_range_policy->accessed_by);
|
uvm_processor_mask_and(set_accessed_by_processors, set_accessed_by_processors, &va_range_policy->accessed_by);
|
||||||
|
|
||||||
// Now update the va_range state
|
// Now update the va_range state
|
||||||
va_range_policy->preferred_location = preferred_location;
|
va_range_policy->preferred_location = preferred_location;
|
||||||
va_range_policy->preferred_nid = preferred_cpu_nid;
|
va_range_policy->preferred_nid = preferred_cpu_nid;
|
||||||
uvm_processor_mask_copy(&va_range->uvm_lite_gpus, &new_uvm_lite_gpus);
|
uvm_processor_mask_copy(&va_range->uvm_lite_gpus, new_uvm_lite_gpus);
|
||||||
|
|
||||||
va_block_context = uvm_va_space_block_context(va_space, mm);
|
va_block_context = uvm_va_space_block_context(va_space, mm);
|
||||||
|
|
||||||
@ -1523,10 +1560,10 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
|
|||||||
uvm_processor_id_t id;
|
uvm_processor_id_t id;
|
||||||
uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
|
uvm_va_block_region_t region = uvm_va_block_region_from_block(va_block);
|
||||||
|
|
||||||
for_each_id_in_mask(id, &set_accessed_by_processors) {
|
for_each_id_in_mask(id, set_accessed_by_processors) {
|
||||||
status = uvm_va_block_set_accessed_by(va_block, va_block_context, id);
|
status = uvm_va_block_set_accessed_by(va_block, va_block_context, id);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Also, mark CPU pages as dirty and remove remote mappings from the new
|
// Also, mark CPU pages as dirty and remove remote mappings from the new
|
||||||
@ -1549,13 +1586,20 @@ NV_STATUS uvm_va_range_set_preferred_location(uvm_va_range_t *va_range,
|
|||||||
uvm_mutex_unlock(&va_block->lock);
|
uvm_mutex_unlock(&va_block->lock);
|
||||||
|
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
// And lastly map all of the current UVM-Lite GPUs to the resident pages on
|
// And lastly map all of the current UVM-Lite GPUs to the resident pages on
|
||||||
// the new preferred location. Anything that's not resident right now will
|
// the new preferred location. Anything that's not resident right now will
|
||||||
// get mapped on the next PreventMigration().
|
// get mapped on the next PreventMigration().
|
||||||
return range_map_uvm_lite_gpus(va_range, out_tracker);
|
status = range_map_uvm_lite_gpus(va_range, out_tracker);
|
||||||
|
|
||||||
|
out:
|
||||||
|
uvm_processor_mask_cache_free(set_accessed_by_processors);
|
||||||
|
uvm_processor_mask_cache_free(new_uvm_lite_gpus);
|
||||||
|
uvm_processor_mask_cache_free(all_uvm_lite_gpus);
|
||||||
|
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
NV_STATUS uvm_va_range_set_accessed_by(uvm_va_range_t *va_range,
|
NV_STATUS uvm_va_range_set_accessed_by(uvm_va_range_t *va_range,
|
||||||
@ -1563,50 +1607,60 @@ NV_STATUS uvm_va_range_set_accessed_by(uvm_va_range_t *va_range,
|
|||||||
struct mm_struct *mm,
|
struct mm_struct *mm,
|
||||||
uvm_tracker_t *out_tracker)
|
uvm_tracker_t *out_tracker)
|
||||||
{
|
{
|
||||||
NV_STATUS status;
|
NV_STATUS status = NV_OK;
|
||||||
uvm_va_block_t *va_block;
|
uvm_va_block_t *va_block;
|
||||||
uvm_processor_mask_t new_uvm_lite_gpus;
|
|
||||||
uvm_va_space_t *va_space = va_range->va_space;
|
uvm_va_space_t *va_space = va_range->va_space;
|
||||||
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
|
uvm_va_policy_t *policy = uvm_va_range_get_policy(va_range);
|
||||||
uvm_va_block_context_t *va_block_context;
|
uvm_va_block_context_t *va_block_context = uvm_va_space_block_context(va_space, mm);
|
||||||
|
uvm_processor_mask_t *new_uvm_lite_gpus;
|
||||||
|
|
||||||
|
// va_block_context->scratch_processor_mask cannot be used since
|
||||||
|
// range_unmap() calls uvm_va_space_block_context(), which re-
|
||||||
|
// initializes the VA block context structure.
|
||||||
|
new_uvm_lite_gpus = uvm_processor_mask_cache_alloc();
|
||||||
|
if (!new_uvm_lite_gpus)
|
||||||
|
return NV_ERR_NO_MEMORY;
|
||||||
|
|
||||||
// If the range belongs to a non-migratable range group and that processor_id is a non-faultable GPU,
|
// If the range belongs to a non-migratable range group and that processor_id is a non-faultable GPU,
|
||||||
// check it can map the preferred location
|
// check it can map the preferred location
|
||||||
if (!uvm_range_group_all_migratable(va_space, va_range->node.start, va_range->node.end) &&
|
if (!uvm_range_group_all_migratable(va_space, va_range->node.start, va_range->node.end) &&
|
||||||
UVM_ID_IS_GPU(processor_id) &&
|
UVM_ID_IS_GPU(processor_id) &&
|
||||||
!uvm_processor_mask_test(&va_space->faultable_processors, processor_id) &&
|
!uvm_processor_mask_test(&va_space->faultable_processors, processor_id) &&
|
||||||
!uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(policy->preferred_location)], processor_id))
|
!uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(policy->preferred_location)], processor_id)) {
|
||||||
return NV_ERR_INVALID_DEVICE;
|
status = NV_ERR_INVALID_DEVICE;
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
|
||||||
uvm_processor_mask_set(&policy->accessed_by, processor_id);
|
uvm_processor_mask_set(&policy->accessed_by, processor_id);
|
||||||
|
|
||||||
// If a GPU is already a UVM-Lite GPU then there is nothing else to do.
|
// If a GPU is already a UVM-Lite GPU then there is nothing else to do.
|
||||||
if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, processor_id))
|
if (uvm_processor_mask_test(&va_range->uvm_lite_gpus, processor_id))
|
||||||
return NV_OK;
|
goto out;
|
||||||
|
|
||||||
// Calculate the new UVM-Lite GPUs mask, but don't update it in the va range
|
// Calculate the new UVM-Lite GPUs mask, but don't update it in the va range
|
||||||
// yet so that we can keep block_page_check_mappings() happy while updating
|
// yet so that we can keep block_page_check_mappings() happy while updating
|
||||||
// the mappings.
|
// the mappings.
|
||||||
calc_uvm_lite_gpus_mask(va_space, policy->preferred_location, &policy->accessed_by, &new_uvm_lite_gpus);
|
calc_uvm_lite_gpus_mask(va_space, policy->preferred_location, &policy->accessed_by, new_uvm_lite_gpus);
|
||||||
|
|
||||||
if (uvm_processor_mask_test(&new_uvm_lite_gpus, processor_id)) {
|
if (uvm_processor_mask_test(new_uvm_lite_gpus, processor_id)) {
|
||||||
// GPUs that become UVM-Lite GPUs need to unmap everything so that they
|
// GPUs that become UVM-Lite GPUs need to unmap everything so that they
|
||||||
// can map the preferred location.
|
// can map the preferred location.
|
||||||
status = range_unmap(va_range, processor_id, out_tracker);
|
status = range_unmap(va_range, processor_id, out_tracker);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
uvm_processor_mask_copy(&va_range->uvm_lite_gpus, &new_uvm_lite_gpus);
|
uvm_processor_mask_copy(&va_range->uvm_lite_gpus, new_uvm_lite_gpus);
|
||||||
va_block_context = uvm_va_space_block_context(va_space, mm);
|
|
||||||
|
|
||||||
for_each_va_block_in_va_range(va_range, va_block) {
|
for_each_va_block_in_va_range(va_range, va_block) {
|
||||||
status = uvm_va_block_set_accessed_by(va_block, va_block_context, processor_id);
|
status = uvm_va_block_set_accessed_by(va_block, va_block_context, processor_id);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
return status;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
return NV_OK;
|
out:
|
||||||
|
uvm_processor_mask_cache_free(new_uvm_lite_gpus);
|
||||||
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
void uvm_va_range_unset_accessed_by(uvm_va_range_t *va_range,
|
void uvm_va_range_unset_accessed_by(uvm_va_range_t *va_range,
|
||||||
|
@ -252,6 +252,10 @@ typedef struct
|
|||||||
// range because each GPU is able to map a completely different set of
|
// range because each GPU is able to map a completely different set of
|
||||||
// allocations to the same VA range.
|
// allocations to the same VA range.
|
||||||
uvm_ext_gpu_range_tree_t gpu_ranges[UVM_ID_MAX_GPUS];
|
uvm_ext_gpu_range_tree_t gpu_ranges[UVM_ID_MAX_GPUS];
|
||||||
|
|
||||||
|
// Dynamically allocated page mask allocated in
|
||||||
|
// uvm_va_range_create_external() and used and freed in uvm_free().
|
||||||
|
uvm_processor_mask_t *retained_mask;
|
||||||
} uvm_va_range_external_t;
|
} uvm_va_range_external_t;
|
||||||
|
|
||||||
// va_range state when va_range.type == UVM_VA_RANGE_TYPE_CHANNEL. This
|
// va_range state when va_range.type == UVM_VA_RANGE_TYPE_CHANNEL. This
|
||||||
|
@ -119,15 +119,27 @@ static NV_STATUS register_gpu_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu)
|
|||||||
static bool va_space_check_processors_masks(uvm_va_space_t *va_space)
|
static bool va_space_check_processors_masks(uvm_va_space_t *va_space)
|
||||||
{
|
{
|
||||||
uvm_processor_id_t processor;
|
uvm_processor_id_t processor;
|
||||||
uvm_processor_mask_t processors;
|
uvm_va_block_context_t *block_context = uvm_va_space_block_context(va_space, NULL);
|
||||||
|
uvm_processor_mask_t *processors = &block_context->scratch_processor_mask;
|
||||||
|
|
||||||
uvm_assert_rwsem_locked_write(&va_space->lock);
|
uvm_assert_rwsem_locked_write(&va_space->lock);
|
||||||
|
|
||||||
uvm_processor_mask_copy(&processors, &va_space->registered_gpus);
|
uvm_processor_mask_copy(processors, &va_space->registered_gpus);
|
||||||
uvm_processor_mask_set(&processors, UVM_ID_CPU);
|
uvm_processor_mask_set(processors, UVM_ID_CPU);
|
||||||
|
|
||||||
for_each_id_in_mask(processor, &processors) {
|
for_each_id_in_mask(processor, processors) {
|
||||||
uvm_processor_id_t other_processor;
|
uvm_processor_id_t other_processor;
|
||||||
|
bool check_can_copy_from = true;
|
||||||
|
|
||||||
|
if (UVM_ID_IS_GPU(processor)) {
|
||||||
|
uvm_gpu_t *gpu = uvm_va_space_get_gpu(va_space, processor);
|
||||||
|
|
||||||
|
// Peer copies between two processors can be disabled even when they
|
||||||
|
// are NvLink peers, or there is HW support for atomics between
|
||||||
|
// them.
|
||||||
|
if (gpu->parent->peer_copy_mode == UVM_GPU_PEER_COPY_MODE_UNSUPPORTED)
|
||||||
|
check_can_copy_from = false;
|
||||||
|
}
|
||||||
|
|
||||||
UVM_ASSERT(processor_mask_array_test(va_space->can_access, processor, processor));
|
UVM_ASSERT(processor_mask_array_test(va_space->can_access, processor, processor));
|
||||||
UVM_ASSERT(processor_mask_array_test(va_space->accessible_from, processor, processor));
|
UVM_ASSERT(processor_mask_array_test(va_space->accessible_from, processor, processor));
|
||||||
@ -137,8 +149,11 @@ static bool va_space_check_processors_masks(uvm_va_space_t *va_space)
|
|||||||
|
|
||||||
// NVLINK
|
// NVLINK
|
||||||
UVM_ASSERT(!processor_mask_array_test(va_space->has_nvlink, processor, processor));
|
UVM_ASSERT(!processor_mask_array_test(va_space->has_nvlink, processor, processor));
|
||||||
|
|
||||||
|
if (check_can_copy_from) {
|
||||||
UVM_ASSERT(uvm_processor_mask_subset(&va_space->has_nvlink[uvm_id_value(processor)],
|
UVM_ASSERT(uvm_processor_mask_subset(&va_space->has_nvlink[uvm_id_value(processor)],
|
||||||
&va_space->can_copy_from[uvm_id_value(processor)]));
|
&va_space->can_copy_from[uvm_id_value(processor)]));
|
||||||
|
}
|
||||||
|
|
||||||
// Peers
|
// Peers
|
||||||
UVM_ASSERT(!processor_mask_array_test(va_space->indirect_peers, processor, processor));
|
UVM_ASSERT(!processor_mask_array_test(va_space->indirect_peers, processor, processor));
|
||||||
@ -147,8 +162,12 @@ static bool va_space_check_processors_masks(uvm_va_space_t *va_space)
|
|||||||
|
|
||||||
// Atomics
|
// Atomics
|
||||||
UVM_ASSERT(processor_mask_array_test(va_space->has_native_atomics, processor, processor));
|
UVM_ASSERT(processor_mask_array_test(va_space->has_native_atomics, processor, processor));
|
||||||
|
|
||||||
|
if (check_can_copy_from) {
|
||||||
UVM_ASSERT(uvm_processor_mask_subset(&va_space->has_native_atomics[uvm_id_value(processor)],
|
UVM_ASSERT(uvm_processor_mask_subset(&va_space->has_native_atomics[uvm_id_value(processor)],
|
||||||
&va_space->can_copy_from[uvm_id_value(processor)]));
|
&va_space->can_copy_from[uvm_id_value(processor)]));
|
||||||
|
}
|
||||||
|
|
||||||
UVM_ASSERT(uvm_processor_mask_subset(&va_space->has_native_atomics[uvm_id_value(processor)],
|
UVM_ASSERT(uvm_processor_mask_subset(&va_space->has_native_atomics[uvm_id_value(processor)],
|
||||||
&va_space->can_access[uvm_id_value(processor)]));
|
&va_space->can_access[uvm_id_value(processor)]));
|
||||||
|
|
||||||
@ -178,6 +197,7 @@ NV_STATUS uvm_va_space_create(struct address_space *mapping, uvm_va_space_t **va
|
|||||||
}
|
}
|
||||||
|
|
||||||
uvm_init_rwsem(&va_space->lock, UVM_LOCK_ORDER_VA_SPACE);
|
uvm_init_rwsem(&va_space->lock, UVM_LOCK_ORDER_VA_SPACE);
|
||||||
|
uvm_mutex_init(&va_space->closest_processors.mask_mutex, UVM_LOCK_ORDER_LEAF);
|
||||||
uvm_mutex_init(&va_space->serialize_writers_lock, UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS);
|
uvm_mutex_init(&va_space->serialize_writers_lock, UVM_LOCK_ORDER_VA_SPACE_SERIALIZE_WRITERS);
|
||||||
uvm_mutex_init(&va_space->read_acquire_write_release_lock,
|
uvm_mutex_init(&va_space->read_acquire_write_release_lock,
|
||||||
UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK);
|
UVM_LOCK_ORDER_VA_SPACE_READ_ACQUIRE_WRITE_RELEASE_LOCK);
|
||||||
@ -329,7 +349,6 @@ static void unregister_gpu(uvm_va_space_t *va_space,
|
|||||||
if (gpu->parent->isr.replayable_faults.handling) {
|
if (gpu->parent->isr.replayable_faults.handling) {
|
||||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
|
UVM_ASSERT(uvm_processor_mask_test(&va_space->faultable_processors, gpu->id));
|
||||||
uvm_processor_mask_clear(&va_space->faultable_processors, gpu->id);
|
uvm_processor_mask_clear(&va_space->faultable_processors, gpu->id);
|
||||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, gpu->id));
|
|
||||||
uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
|
uvm_processor_mask_clear(&va_space->system_wide_atomics_enabled_processors, gpu->id);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
@ -427,7 +446,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
|||||||
uvm_va_range_t *va_range, *va_range_next;
|
uvm_va_range_t *va_range, *va_range_next;
|
||||||
uvm_gpu_t *gpu;
|
uvm_gpu_t *gpu;
|
||||||
uvm_gpu_id_t gpu_id;
|
uvm_gpu_id_t gpu_id;
|
||||||
uvm_processor_mask_t retained_gpus;
|
uvm_processor_mask_t *retained_gpus = &va_space->registered_gpus_teardown;
|
||||||
LIST_HEAD(deferred_free_list);
|
LIST_HEAD(deferred_free_list);
|
||||||
|
|
||||||
// Remove the VA space from the global list before we start tearing things
|
// Remove the VA space from the global list before we start tearing things
|
||||||
@ -455,7 +474,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
|||||||
// registered GPUs in the VA space, so those faults will be canceled.
|
// registered GPUs in the VA space, so those faults will be canceled.
|
||||||
uvm_va_space_down_write(va_space);
|
uvm_va_space_down_write(va_space);
|
||||||
|
|
||||||
uvm_processor_mask_copy(&retained_gpus, &va_space->registered_gpus);
|
uvm_processor_mask_copy(retained_gpus, &va_space->registered_gpus);
|
||||||
|
|
||||||
bitmap_copy(va_space->enabled_peers_teardown, va_space->enabled_peers, UVM_MAX_UNIQUE_GPU_PAIRS);
|
bitmap_copy(va_space->enabled_peers_teardown, va_space->enabled_peers, UVM_MAX_UNIQUE_GPU_PAIRS);
|
||||||
|
|
||||||
@ -507,7 +526,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
|||||||
|
|
||||||
nv_kthread_q_flush(&g_uvm_global.global_q);
|
nv_kthread_q_flush(&g_uvm_global.global_q);
|
||||||
|
|
||||||
for_each_gpu_in_mask(gpu, &retained_gpus) {
|
for_each_gpu_in_mask(gpu, retained_gpus) {
|
||||||
if (!gpu->parent->isr.replayable_faults.handling) {
|
if (!gpu->parent->isr.replayable_faults.handling) {
|
||||||
UVM_ASSERT(!gpu->parent->isr.non_replayable_faults.handling);
|
UVM_ASSERT(!gpu->parent->isr.non_replayable_faults.handling);
|
||||||
continue;
|
continue;
|
||||||
@ -523,6 +542,15 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
|||||||
|
|
||||||
if (gpu->parent->access_counters_supported)
|
if (gpu->parent->access_counters_supported)
|
||||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||||
|
|
||||||
|
// Free the processor masks allocated in uvm_va_space_register_gpu().
|
||||||
|
// The mask is also freed in uvm_va_space_unregister_gpu() but that
|
||||||
|
// function won't be called in uvm_release() and uvm_release_deferred()
|
||||||
|
// path.
|
||||||
|
uvm_processor_mask_cache_free(va_space->peers_to_release[uvm_id_value(gpu->id)]);
|
||||||
|
|
||||||
|
// Set the pointer to NULL to avoid accidental re-use and double free.
|
||||||
|
va_space->peers_to_release[uvm_id_value(gpu->id)] = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check that all CPU/GPU affinity masks are empty
|
// Check that all CPU/GPU affinity masks are empty
|
||||||
@ -554,14 +582,14 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
|||||||
// Release the GPUs and their peer counts. Do not use
|
// Release the GPUs and their peer counts. Do not use
|
||||||
// for_each_gpu_in_mask for the outer loop as it reads the GPU
|
// for_each_gpu_in_mask for the outer loop as it reads the GPU
|
||||||
// state, which might get destroyed.
|
// state, which might get destroyed.
|
||||||
for_each_gpu_id_in_mask(gpu_id, &retained_gpus) {
|
for_each_gpu_id_in_mask(gpu_id, retained_gpus) {
|
||||||
uvm_gpu_t *peer_gpu;
|
uvm_gpu_t *peer_gpu;
|
||||||
|
|
||||||
gpu = uvm_gpu_get(gpu_id);
|
gpu = uvm_gpu_get(gpu_id);
|
||||||
|
|
||||||
uvm_processor_mask_clear(&retained_gpus, gpu_id);
|
uvm_processor_mask_clear(retained_gpus, gpu_id);
|
||||||
|
|
||||||
for_each_gpu_in_mask(peer_gpu, &retained_gpus) {
|
for_each_gpu_in_mask(peer_gpu, retained_gpus) {
|
||||||
NvU32 peer_table_index = uvm_gpu_peer_table_index(gpu->id, peer_gpu->id);
|
NvU32 peer_table_index = uvm_gpu_peer_table_index(gpu->id, peer_gpu->id);
|
||||||
if (test_bit(peer_table_index, va_space->enabled_peers_teardown)) {
|
if (test_bit(peer_table_index, va_space->enabled_peers_teardown)) {
|
||||||
uvm_gpu_peer_t *peer_caps = &g_uvm_global.peers[peer_table_index];
|
uvm_gpu_peer_t *peer_caps = &g_uvm_global.peers[peer_table_index];
|
||||||
@ -679,6 +707,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
|||||||
uvm_gpu_t *gpu;
|
uvm_gpu_t *gpu;
|
||||||
uvm_gpu_t *other_gpu;
|
uvm_gpu_t *other_gpu;
|
||||||
bool gpu_can_access_sysmem = true;
|
bool gpu_can_access_sysmem = true;
|
||||||
|
uvm_processor_mask_t *peers_to_release = NULL;
|
||||||
|
|
||||||
status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &gpu);
|
status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &gpu);
|
||||||
if (status != NV_OK)
|
if (status != NV_OK)
|
||||||
@ -733,6 +762,16 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
|||||||
gpu_can_access_sysmem = false;
|
gpu_can_access_sysmem = false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
UVM_ASSERT(!va_space->peers_to_release[uvm_id_value(gpu->id)]);
|
||||||
|
|
||||||
|
peers_to_release = uvm_processor_mask_cache_alloc();
|
||||||
|
if (!peers_to_release) {
|
||||||
|
status = NV_ERR_NO_MEMORY;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
va_space->peers_to_release[uvm_id_value(gpu->id)] = peers_to_release;
|
||||||
|
|
||||||
uvm_processor_mask_set(&va_space->registered_gpus, gpu->id);
|
uvm_processor_mask_set(&va_space->registered_gpus, gpu->id);
|
||||||
va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;
|
va_space->registered_gpus_table[uvm_id_gpu_index(gpu->id)] = gpu;
|
||||||
|
|
||||||
@ -832,6 +871,10 @@ cleanup:
|
|||||||
// a deferred_free_list, mm, etc.
|
// a deferred_free_list, mm, etc.
|
||||||
unregister_gpu(va_space, gpu, NULL, NULL, NULL);
|
unregister_gpu(va_space, gpu, NULL, NULL, NULL);
|
||||||
|
|
||||||
|
va_space->peers_to_release[uvm_id_value(gpu->id)] = NULL;
|
||||||
|
|
||||||
|
uvm_processor_mask_cache_free(peers_to_release);
|
||||||
|
|
||||||
done:
|
done:
|
||||||
UVM_ASSERT(va_space_check_processors_masks(va_space));
|
UVM_ASSERT(va_space_check_processors_masks(va_space));
|
||||||
|
|
||||||
@ -856,7 +899,7 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
|
|||||||
uvm_gpu_va_space_t *gpu_va_space;
|
uvm_gpu_va_space_t *gpu_va_space;
|
||||||
struct mm_struct *mm;
|
struct mm_struct *mm;
|
||||||
uvm_gpu_id_t peer_gpu_id;
|
uvm_gpu_id_t peer_gpu_id;
|
||||||
uvm_processor_mask_t peers_to_release;
|
uvm_processor_mask_t *peers_to_release;
|
||||||
LIST_HEAD(deferred_free_list);
|
LIST_HEAD(deferred_free_list);
|
||||||
|
|
||||||
// Stopping channels requires holding the VA space lock in read mode, so do
|
// Stopping channels requires holding the VA space lock in read mode, so do
|
||||||
@ -917,8 +960,12 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
|
|||||||
if (uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->id))
|
if (uvm_processor_mask_test(&va_space->registered_gpu_va_spaces, gpu->id))
|
||||||
UVM_ASSERT(uvm_gpu_va_space_get(va_space, gpu) == gpu_va_space);
|
UVM_ASSERT(uvm_gpu_va_space_get(va_space, gpu) == gpu_va_space);
|
||||||
|
|
||||||
|
peers_to_release = va_space->peers_to_release[uvm_id_value(gpu->id)];
|
||||||
|
|
||||||
|
va_space->peers_to_release[uvm_id_value(gpu->id)] = NULL;
|
||||||
|
|
||||||
// This will call disable_peers for all GPU's peers, including NVLink
|
// This will call disable_peers for all GPU's peers, including NVLink
|
||||||
unregister_gpu(va_space, gpu, mm, &deferred_free_list, &peers_to_release);
|
unregister_gpu(va_space, gpu, mm, &deferred_free_list, peers_to_release);
|
||||||
|
|
||||||
UVM_ASSERT(uvm_processor_mask_test(&va_space->gpu_unregister_in_progress, gpu->id));
|
UVM_ASSERT(uvm_processor_mask_test(&va_space->gpu_unregister_in_progress, gpu->id));
|
||||||
uvm_processor_mask_clear(&va_space->gpu_unregister_in_progress, gpu->id);
|
uvm_processor_mask_clear(&va_space->gpu_unregister_in_progress, gpu->id);
|
||||||
@ -939,12 +986,16 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
|
|||||||
|
|
||||||
// Do not use for_each_gpu_in_mask as it reads the peer GPU state,
|
// Do not use for_each_gpu_in_mask as it reads the peer GPU state,
|
||||||
// which might get destroyed when we release the peer entry.
|
// which might get destroyed when we release the peer entry.
|
||||||
for_each_gpu_id_in_mask(peer_gpu_id, &peers_to_release) {
|
UVM_ASSERT(peers_to_release);
|
||||||
|
|
||||||
|
for_each_gpu_id_in_mask(peer_gpu_id, peers_to_release) {
|
||||||
uvm_gpu_t *peer_gpu = uvm_gpu_get(peer_gpu_id);
|
uvm_gpu_t *peer_gpu = uvm_gpu_get(peer_gpu_id);
|
||||||
UVM_ASSERT(uvm_gpu_peer_caps(gpu, peer_gpu)->link_type == UVM_GPU_LINK_PCIE);
|
UVM_ASSERT(uvm_gpu_peer_caps(gpu, peer_gpu)->link_type == UVM_GPU_LINK_PCIE);
|
||||||
uvm_gpu_release_pcie_peer_access(gpu, peer_gpu);
|
uvm_gpu_release_pcie_peer_access(gpu, peer_gpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
uvm_processor_mask_cache_free(peers_to_release);
|
||||||
|
|
||||||
uvm_gpu_release_locked(gpu);
|
uvm_gpu_release_locked(gpu);
|
||||||
|
|
||||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||||
@ -1026,7 +1077,6 @@ static NV_STATUS enable_peers(uvm_va_space_t *va_space, uvm_gpu_t *gpu0, uvm_gpu
|
|||||||
return NV_ERR_NOT_COMPATIBLE;
|
return NV_ERR_NOT_COMPATIBLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: Bug 3848497: Disable GPU Peer Mapping when HCC is enabled
|
|
||||||
processor_mask_array_set(va_space->can_access, gpu0->id, gpu1->id);
|
processor_mask_array_set(va_space->can_access, gpu0->id, gpu1->id);
|
||||||
processor_mask_array_set(va_space->can_access, gpu1->id, gpu0->id);
|
processor_mask_array_set(va_space->can_access, gpu1->id, gpu0->id);
|
||||||
processor_mask_array_set(va_space->accessible_from, gpu0->id, gpu1->id);
|
processor_mask_array_set(va_space->accessible_from, gpu0->id, gpu1->id);
|
||||||
@ -1711,49 +1761,59 @@ uvm_processor_id_t uvm_processor_mask_find_closest_id(uvm_va_space_t *va_space,
|
|||||||
const uvm_processor_mask_t *candidates,
|
const uvm_processor_mask_t *candidates,
|
||||||
uvm_processor_id_t src)
|
uvm_processor_id_t src)
|
||||||
{
|
{
|
||||||
uvm_processor_mask_t mask;
|
uvm_processor_mask_t *mask = &va_space->closest_processors.mask;
|
||||||
uvm_processor_id_t id;
|
uvm_processor_id_t closest_id;
|
||||||
|
|
||||||
// Highest priority: the local processor itself
|
// Highest priority: the local processor itself
|
||||||
if (uvm_processor_mask_test(candidates, src))
|
if (uvm_processor_mask_test(candidates, src))
|
||||||
return src;
|
return src;
|
||||||
|
|
||||||
|
uvm_mutex_lock(&va_space->closest_processors.mask_mutex);
|
||||||
|
|
||||||
|
if (uvm_processor_mask_and(mask, candidates, &va_space->has_nvlink[uvm_id_value(src)])) {
|
||||||
// NvLink peers
|
// NvLink peers
|
||||||
if (uvm_processor_mask_and(&mask, candidates, &va_space->has_nvlink[uvm_id_value(src)])) {
|
|
||||||
uvm_processor_mask_t *indirect_peers;
|
uvm_processor_mask_t *indirect_peers;
|
||||||
uvm_processor_mask_t direct_peers;
|
uvm_processor_mask_t *direct_peers = &va_space->closest_processors.direct_peers;
|
||||||
|
|
||||||
indirect_peers = &va_space->indirect_peers[uvm_id_value(src)];
|
indirect_peers = &va_space->indirect_peers[uvm_id_value(src)];
|
||||||
|
|
||||||
|
if (uvm_processor_mask_andnot(direct_peers, mask, indirect_peers)) {
|
||||||
// Direct peers, prioritizing GPU peers over CPU
|
// Direct peers, prioritizing GPU peers over CPU
|
||||||
if (uvm_processor_mask_andnot(&direct_peers, &mask, indirect_peers)) {
|
closest_id = uvm_processor_mask_find_first_gpu_id(direct_peers);
|
||||||
id = uvm_processor_mask_find_first_gpu_id(&direct_peers);
|
if (UVM_ID_IS_INVALID(closest_id))
|
||||||
return UVM_ID_IS_INVALID(id)? UVM_ID_CPU : id;
|
closest_id = UVM_ID_CPU;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
// Indirect peers
|
// Indirect peers
|
||||||
UVM_ASSERT(UVM_ID_IS_GPU(src));
|
UVM_ASSERT(UVM_ID_IS_GPU(src));
|
||||||
UVM_ASSERT(!uvm_processor_mask_test(&mask, UVM_ID_CPU));
|
UVM_ASSERT(!uvm_processor_mask_test(mask, UVM_ID_CPU));
|
||||||
|
|
||||||
return uvm_processor_mask_find_first_gpu_id(&mask);
|
closest_id = uvm_processor_mask_find_first_gpu_id(mask);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
else if (uvm_processor_mask_and(mask, candidates, &va_space->can_access[uvm_id_value(src)])) {
|
||||||
// If source is GPU, prioritize PCIe peers over CPU
|
// If source is GPU, prioritize PCIe peers over CPU
|
||||||
if (uvm_processor_mask_and(&mask, candidates, &va_space->can_access[uvm_id_value(src)])) {
|
|
||||||
// CPUs only have direct access to GPU memory over NVLINK, not PCIe, and
|
// CPUs only have direct access to GPU memory over NVLINK, not PCIe, and
|
||||||
// should have been selected above
|
// should have been selected above
|
||||||
UVM_ASSERT(UVM_ID_IS_GPU(src));
|
UVM_ASSERT(UVM_ID_IS_GPU(src));
|
||||||
|
|
||||||
id = uvm_processor_mask_find_first_gpu_id(&mask);
|
closest_id = uvm_processor_mask_find_first_gpu_id(mask);
|
||||||
return UVM_ID_IS_INVALID(id)? UVM_ID_CPU : id;
|
if (UVM_ID_IS_INVALID(closest_id))
|
||||||
|
closest_id = UVM_ID_CPU;
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
// No GPUs with direct access are in the mask. Just pick the first
|
// No GPUs with direct access are in the mask. Just pick the first
|
||||||
// processor in the mask, if any.
|
// processor in the mask, if any.
|
||||||
return uvm_processor_mask_find_first_id(candidates);
|
closest_id = uvm_processor_mask_find_first_id(candidates);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void uvm_deferred_free_object_channel(uvm_deferred_free_object_t *object, uvm_processor_mask_t *flushed_gpus)
|
uvm_mutex_unlock(&va_space->closest_processors.mask_mutex);
|
||||||
|
|
||||||
|
return closest_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void uvm_deferred_free_object_channel(uvm_deferred_free_object_t *object,
|
||||||
|
uvm_parent_processor_mask_t *flushed_parent_gpus)
|
||||||
{
|
{
|
||||||
uvm_user_channel_t *channel = container_of(object, uvm_user_channel_t, deferred_free);
|
uvm_user_channel_t *channel = container_of(object, uvm_user_channel_t, deferred_free);
|
||||||
uvm_gpu_t *gpu = channel->gpu;
|
uvm_gpu_t *gpu = channel->gpu;
|
||||||
@ -1761,9 +1821,10 @@ static void uvm_deferred_free_object_channel(uvm_deferred_free_object_t *object,
|
|||||||
// Flush out any faults with this instance pointer still in the buffer. This
|
// Flush out any faults with this instance pointer still in the buffer. This
|
||||||
// prevents us from re-allocating the same instance pointer for a new
|
// prevents us from re-allocating the same instance pointer for a new
|
||||||
// channel and mis-attributing old faults to it.
|
// channel and mis-attributing old faults to it.
|
||||||
if (gpu->parent->replayable_faults_supported && !uvm_processor_mask_test(flushed_gpus, gpu->id)) {
|
if (gpu->parent->replayable_faults_supported &&
|
||||||
|
!uvm_parent_processor_mask_test(flushed_parent_gpus, gpu->parent->id)) {
|
||||||
uvm_gpu_fault_buffer_flush(gpu);
|
uvm_gpu_fault_buffer_flush(gpu);
|
||||||
uvm_processor_mask_set(flushed_gpus, gpu->id);
|
uvm_parent_processor_mask_set(flushed_parent_gpus, gpu->parent->id);
|
||||||
}
|
}
|
||||||
|
|
||||||
uvm_user_channel_destroy_detached(channel);
|
uvm_user_channel_destroy_detached(channel);
|
||||||
@ -1772,17 +1833,20 @@ static void uvm_deferred_free_object_channel(uvm_deferred_free_object_t *object,
|
|||||||
void uvm_deferred_free_object_list(struct list_head *deferred_free_list)
|
void uvm_deferred_free_object_list(struct list_head *deferred_free_list)
|
||||||
{
|
{
|
||||||
uvm_deferred_free_object_t *object, *next;
|
uvm_deferred_free_object_t *object, *next;
|
||||||
uvm_processor_mask_t flushed_gpus;
|
uvm_parent_processor_mask_t flushed_parent_gpus;
|
||||||
|
|
||||||
// Used if there are any channels in the list
|
// flushed_parent_gpus prevents redundant fault buffer flushes by tracking
|
||||||
uvm_processor_mask_zero(&flushed_gpus);
|
// the parent GPUs on which the flush already happened. Flushing the fault
|
||||||
|
// buffer on one GPU instance will flush it for all other instances on that
|
||||||
|
// parent GPU.
|
||||||
|
uvm_parent_processor_mask_zero(&flushed_parent_gpus);
|
||||||
|
|
||||||
list_for_each_entry_safe(object, next, deferred_free_list, list_node) {
|
list_for_each_entry_safe(object, next, deferred_free_list, list_node) {
|
||||||
list_del(&object->list_node);
|
list_del(&object->list_node);
|
||||||
|
|
||||||
switch (object->type) {
|
switch (object->type) {
|
||||||
case UVM_DEFERRED_FREE_OBJECT_TYPE_CHANNEL:
|
case UVM_DEFERRED_FREE_OBJECT_TYPE_CHANNEL:
|
||||||
uvm_deferred_free_object_channel(object, &flushed_gpus);
|
uvm_deferred_free_object_channel(object, &flushed_parent_gpus);
|
||||||
break;
|
break;
|
||||||
case UVM_DEFERRED_FREE_OBJECT_GPU_VA_SPACE:
|
case UVM_DEFERRED_FREE_OBJECT_GPU_VA_SPACE:
|
||||||
destroy_gpu_va_space(container_of(object, uvm_gpu_va_space_t, deferred_free));
|
destroy_gpu_va_space(container_of(object, uvm_gpu_va_space_t, deferred_free));
|
||||||
@ -2169,6 +2233,31 @@ static LIST_HEAD(g_cpu_service_block_context_list);
|
|||||||
|
|
||||||
static uvm_spinlock_t g_cpu_service_block_context_list_lock;
|
static uvm_spinlock_t g_cpu_service_block_context_list_lock;
|
||||||
|
|
||||||
|
uvm_service_block_context_t *uvm_service_block_context_alloc(struct mm_struct *mm)
|
||||||
|
{
|
||||||
|
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
|
||||||
|
|
||||||
|
if (!service_context)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
service_context->block_context = uvm_va_block_context_alloc(mm);
|
||||||
|
if (!service_context->block_context) {
|
||||||
|
uvm_kvfree(service_context);
|
||||||
|
service_context = NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
return service_context;
|
||||||
|
}
|
||||||
|
|
||||||
|
void uvm_service_block_context_free(uvm_service_block_context_t *service_context)
|
||||||
|
{
|
||||||
|
if (!service_context)
|
||||||
|
return;
|
||||||
|
|
||||||
|
uvm_va_block_context_free(service_context->block_context);
|
||||||
|
uvm_kvfree(service_context);
|
||||||
|
}
|
||||||
|
|
||||||
NV_STATUS uvm_service_block_context_init(void)
|
NV_STATUS uvm_service_block_context_init(void)
|
||||||
{
|
{
|
||||||
unsigned num_preallocated_contexts = 4;
|
unsigned num_preallocated_contexts = 4;
|
||||||
@ -2177,17 +2266,11 @@ NV_STATUS uvm_service_block_context_init(void)
|
|||||||
|
|
||||||
// Pre-allocate some fault service contexts for the CPU and add them to the global list
|
// Pre-allocate some fault service contexts for the CPU and add them to the global list
|
||||||
while (num_preallocated_contexts-- > 0) {
|
while (num_preallocated_contexts-- > 0) {
|
||||||
uvm_service_block_context_t *service_context = uvm_kvmalloc(sizeof(*service_context));
|
uvm_service_block_context_t *service_context = uvm_service_block_context_alloc(NULL);
|
||||||
|
|
||||||
if (!service_context)
|
if (!service_context)
|
||||||
return NV_ERR_NO_MEMORY;
|
return NV_ERR_NO_MEMORY;
|
||||||
|
|
||||||
service_context->block_context = uvm_va_block_context_alloc(NULL);
|
|
||||||
if (!service_context->block_context) {
|
|
||||||
uvm_kvfree(service_context);
|
|
||||||
return NV_ERR_NO_MEMORY;
|
|
||||||
}
|
|
||||||
|
|
||||||
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
list_add(&service_context->cpu_fault.service_context_list, &g_cpu_service_block_context_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2199,11 +2282,13 @@ void uvm_service_block_context_exit(void)
|
|||||||
uvm_service_block_context_t *service_context, *service_context_tmp;
|
uvm_service_block_context_t *service_context, *service_context_tmp;
|
||||||
|
|
||||||
// Free fault service contexts for the CPU and add clear the global list
|
// Free fault service contexts for the CPU and add clear the global list
|
||||||
list_for_each_entry_safe(service_context, service_context_tmp, &g_cpu_service_block_context_list,
|
list_for_each_entry_safe(service_context,
|
||||||
|
service_context_tmp,
|
||||||
|
&g_cpu_service_block_context_list,
|
||||||
cpu_fault.service_context_list) {
|
cpu_fault.service_context_list) {
|
||||||
uvm_va_block_context_free(service_context->block_context);
|
uvm_service_block_context_free(service_context);
|
||||||
uvm_kvfree(service_context);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
|
INIT_LIST_HEAD(&g_cpu_service_block_context_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2215,7 +2300,8 @@ static uvm_service_block_context_t *service_block_context_cpu_alloc(void)
|
|||||||
|
|
||||||
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
uvm_spin_lock(&g_cpu_service_block_context_list_lock);
|
||||||
|
|
||||||
service_context = list_first_entry_or_null(&g_cpu_service_block_context_list, uvm_service_block_context_t,
|
service_context = list_first_entry_or_null(&g_cpu_service_block_context_list,
|
||||||
|
uvm_service_block_context_t,
|
||||||
cpu_fault.service_context_list);
|
cpu_fault.service_context_list);
|
||||||
|
|
||||||
if (service_context)
|
if (service_context)
|
||||||
@ -2223,17 +2309,10 @@ static uvm_service_block_context_t *service_block_context_cpu_alloc(void)
|
|||||||
|
|
||||||
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
uvm_spin_unlock(&g_cpu_service_block_context_list_lock);
|
||||||
|
|
||||||
if (!service_context) {
|
if (!service_context)
|
||||||
service_context = uvm_kvmalloc(sizeof(*service_context));
|
service_context = uvm_service_block_context_alloc(NULL);
|
||||||
service_context->block_context = uvm_va_block_context_alloc(NULL);
|
else
|
||||||
if (!service_context->block_context) {
|
|
||||||
uvm_kvfree(service_context);
|
|
||||||
service_context = NULL;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
uvm_va_block_context_init(service_context->block_context, NULL);
|
uvm_va_block_context_init(service_context->block_context, NULL);
|
||||||
}
|
|
||||||
|
|
||||||
return service_context;
|
return service_context;
|
||||||
}
|
}
|
||||||
|
@ -230,9 +230,11 @@ struct uvm_va_space_struct
|
|||||||
uvm_processor_mask_t accessible_from[UVM_ID_MAX_PROCESSORS];
|
uvm_processor_mask_t accessible_from[UVM_ID_MAX_PROCESSORS];
|
||||||
|
|
||||||
// Pre-computed masks that contain, for each processor memory, a mask with
|
// Pre-computed masks that contain, for each processor memory, a mask with
|
||||||
// the processors that can directly copy to and from its memory. This is
|
// the processors that can directly copy to and from its memory, using the
|
||||||
// almost the same as accessible_from masks, but also requires peer identity
|
// Copy Engine. These masks are usually the same as accessible_from masks.
|
||||||
// mappings to be supported for peer access.
|
//
|
||||||
|
// In certain configurations, peer identity mappings must be created to
|
||||||
|
// enable CE copies between peers.
|
||||||
uvm_processor_mask_t can_copy_from[UVM_ID_MAX_PROCESSORS];
|
uvm_processor_mask_t can_copy_from[UVM_ID_MAX_PROCESSORS];
|
||||||
|
|
||||||
// Pre-computed masks that contain, for each processor, a mask of processors
|
// Pre-computed masks that contain, for each processor, a mask of processors
|
||||||
@ -265,6 +267,22 @@ struct uvm_va_space_struct
|
|||||||
// Mask of processors that are participating in system-wide atomics
|
// Mask of processors that are participating in system-wide atomics
|
||||||
uvm_processor_mask_t system_wide_atomics_enabled_processors;
|
uvm_processor_mask_t system_wide_atomics_enabled_processors;
|
||||||
|
|
||||||
|
// Temporary copy of registered_gpus used to avoid allocation during VA
|
||||||
|
// space destroy.
|
||||||
|
uvm_processor_mask_t registered_gpus_teardown;
|
||||||
|
|
||||||
|
// Allocated in uvm_va_space_register_gpu(), used and free'd in
|
||||||
|
// uvm_va_space_unregister_gpu().
|
||||||
|
uvm_processor_mask_t *peers_to_release[UVM_ID_MAX_PROCESSORS];
|
||||||
|
|
||||||
|
// Mask of processors to unmap. Used in range_unmap().
|
||||||
|
uvm_processor_mask_t unmap_mask;
|
||||||
|
|
||||||
|
// Available as scratch space for the internal APIs. This is like a caller-
|
||||||
|
// save register: it shouldn't be used across function calls which also take
|
||||||
|
// this va_space.
|
||||||
|
uvm_processor_mask_t scratch_processor_mask;
|
||||||
|
|
||||||
// Mask of physical GPUs where access counters are enabled on this VA space
|
// Mask of physical GPUs where access counters are enabled on this VA space
|
||||||
uvm_parent_processor_mask_t access_counters_enabled_processors;
|
uvm_parent_processor_mask_t access_counters_enabled_processors;
|
||||||
|
|
||||||
@ -349,6 +367,20 @@ struct uvm_va_space_struct
|
|||||||
uvm_hmm_va_space_t hmm;
|
uvm_hmm_va_space_t hmm;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
struct
|
||||||
|
{
|
||||||
|
// Temporary mask used to calculate closest_processors in
|
||||||
|
// uvm_processor_mask_find_closest_id.
|
||||||
|
uvm_processor_mask_t mask;
|
||||||
|
|
||||||
|
// Temporary mask to hold direct_peers in
|
||||||
|
// uvm_processor_mask_find_closest_id.
|
||||||
|
uvm_processor_mask_t direct_peers;
|
||||||
|
|
||||||
|
// Protects the mask and direct_peers above.
|
||||||
|
uvm_mutex_t mask_mutex;
|
||||||
|
} closest_processors;
|
||||||
|
|
||||||
struct
|
struct
|
||||||
{
|
{
|
||||||
bool page_prefetch_enabled;
|
bool page_prefetch_enabled;
|
||||||
|
@ -417,9 +417,7 @@ static void uvm_va_space_mm_shutdown(uvm_va_space_t *va_space)
|
|||||||
uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
|
uvm_va_space_mm_t *va_space_mm = &va_space->va_space_mm;
|
||||||
uvm_gpu_va_space_t *gpu_va_space;
|
uvm_gpu_va_space_t *gpu_va_space;
|
||||||
uvm_gpu_t *gpu;
|
uvm_gpu_t *gpu;
|
||||||
// TODO: Bug 4351121: retained_gpus should be pre-allocated, not on the
|
uvm_processor_mask_t *retained_gpus = &va_space_mm->scratch_processor_mask;
|
||||||
// stack.
|
|
||||||
uvm_processor_mask_t retained_gpus;
|
|
||||||
uvm_parent_processor_mask_t flushed_parent_gpus;
|
uvm_parent_processor_mask_t flushed_parent_gpus;
|
||||||
LIST_HEAD(deferred_free_list);
|
LIST_HEAD(deferred_free_list);
|
||||||
|
|
||||||
@ -443,32 +441,34 @@ static void uvm_va_space_mm_shutdown(uvm_va_space_t *va_space)
|
|||||||
|
|
||||||
// Detach all channels to prevent pending untranslated faults from getting
|
// Detach all channels to prevent pending untranslated faults from getting
|
||||||
// to this VA space. This also removes those channels from the VA space and
|
// to this VA space. This also removes those channels from the VA space and
|
||||||
// puts them on the deferred free list, so only one thread will do this.
|
// puts them on the deferred free list.
|
||||||
uvm_va_space_down_write(va_space);
|
uvm_va_space_down_write(va_space);
|
||||||
uvm_va_space_detach_all_user_channels(va_space, &deferred_free_list);
|
uvm_va_space_detach_all_user_channels(va_space, &deferred_free_list);
|
||||||
uvm_processor_mask_and(&retained_gpus, &va_space->registered_gpus, &va_space->faultable_processors);
|
uvm_processor_mask_and(retained_gpus, &va_space->registered_gpus, &va_space->faultable_processors);
|
||||||
uvm_global_gpu_retain(&retained_gpus);
|
uvm_global_gpu_retain(retained_gpus);
|
||||||
uvm_va_space_up_write(va_space);
|
uvm_va_space_up_write(va_space);
|
||||||
|
|
||||||
|
// It's ok to use retained_gpus outside the lock since there can only be one
|
||||||
|
// thread executing in uvm_va_space_mm_shutdown at a time.
|
||||||
|
|
||||||
// Flush the fault buffer on all registered faultable GPUs.
|
// Flush the fault buffer on all registered faultable GPUs.
|
||||||
// This will avoid spurious cancels of stale pending translated
|
// This will avoid spurious cancels of stale pending translated
|
||||||
// faults after we set UVM_VA_SPACE_MM_STATE_RELEASED later.
|
// faults after we set UVM_VA_SPACE_MM_STATE_RELEASED later.
|
||||||
uvm_parent_processor_mask_zero(&flushed_parent_gpus);
|
uvm_parent_processor_mask_zero(&flushed_parent_gpus);
|
||||||
for_each_gpu_in_mask(gpu, &retained_gpus) {
|
for_each_gpu_in_mask(gpu, retained_gpus) {
|
||||||
if (!uvm_parent_processor_mask_test_and_set(&flushed_parent_gpus, gpu->parent->id))
|
if (!uvm_parent_processor_mask_test_and_set(&flushed_parent_gpus, gpu->parent->id))
|
||||||
uvm_gpu_fault_buffer_flush(gpu);
|
uvm_gpu_fault_buffer_flush(gpu);
|
||||||
}
|
}
|
||||||
|
|
||||||
uvm_global_gpu_release(&retained_gpus);
|
uvm_global_gpu_release(retained_gpus);
|
||||||
|
|
||||||
// Call nvUvmInterfaceUnsetPageDirectory. This has no effect on non-MPS.
|
// Call nvUvmInterfaceUnsetPageDirectory. This has no effect on non-MPS.
|
||||||
// Under MPS this guarantees that no new GPU accesses will be made using
|
// Under MPS this guarantees that no new GPU accesses will be made using
|
||||||
// this mm.
|
// this mm.
|
||||||
//
|
//
|
||||||
// We need only one thread to make this call, but two threads in here could
|
// We need only one thread to make this call, but we could have one thread
|
||||||
// race for it, or we could have one thread in here and one in
|
// in here and one in destroy_gpu_va_space. Serialize these by starting in
|
||||||
// destroy_gpu_va_space. Serialize these by starting in write mode then
|
// write mode then downgrading to read.
|
||||||
// downgrading to read.
|
|
||||||
uvm_va_space_down_write(va_space);
|
uvm_va_space_down_write(va_space);
|
||||||
uvm_va_space_downgrade_write_rm(va_space);
|
uvm_va_space_downgrade_write_rm(va_space);
|
||||||
for_each_gpu_va_space(gpu_va_space, va_space)
|
for_each_gpu_va_space(gpu_va_space, va_space)
|
||||||
|
@ -83,6 +83,11 @@ struct uvm_va_space_mm_struct
|
|||||||
// Wait queue for threads waiting for retainers to finish (retained_count
|
// Wait queue for threads waiting for retainers to finish (retained_count
|
||||||
// going to 0 when not alive).
|
// going to 0 when not alive).
|
||||||
wait_queue_head_t last_retainer_wait_queue;
|
wait_queue_head_t last_retainer_wait_queue;
|
||||||
|
|
||||||
|
// Available as scratch space for the internal APIs. This is like a caller-
|
||||||
|
// save register: it shouldn't be used across function calls which also take
|
||||||
|
// this va_space_mm.
|
||||||
|
uvm_processor_mask_t scratch_processor_mask;
|
||||||
};
|
};
|
||||||
|
|
||||||
static bool uvm_va_space_mm_alive(struct uvm_va_space_mm_struct *va_space_mm)
|
static bool uvm_va_space_mm_alive(struct uvm_va_space_mm_struct *va_space_mm)
|
||||||
|
@ -57,7 +57,11 @@
|
|||||||
#include "nv-dmabuf.h"
|
#include "nv-dmabuf.h"
|
||||||
#include "nv-caps-imex.h"
|
#include "nv-caps-imex.h"
|
||||||
|
|
||||||
#if !defined(CONFIG_RETPOLINE)
|
/*
|
||||||
|
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
|
||||||
|
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
|
||||||
|
*/
|
||||||
|
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
|
||||||
#include "nv-retpoline.h"
|
#include "nv-retpoline.h"
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -250,6 +250,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb
|
|||||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
|
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
|
||||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||||
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
|
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
|
||||||
|
NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present
|
||||||
|
|
||||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
|
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
|
||||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build
|
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
@ -2201,6 +2201,8 @@ static int os_numa_verify_gpu_memory_zone(struct notifier_block *nb,
|
|||||||
return NOTIFY_OK;
|
return NOTIFY_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS 4
|
||||||
|
|
||||||
NV_STATUS NV_API_CALL os_numa_add_gpu_memory
|
NV_STATUS NV_API_CALL os_numa_add_gpu_memory
|
||||||
(
|
(
|
||||||
void *handle,
|
void *handle,
|
||||||
@ -2214,7 +2216,12 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
|
|||||||
nv_linux_state_t *nvl = pci_get_drvdata(handle);
|
nv_linux_state_t *nvl = pci_get_drvdata(handle);
|
||||||
nv_state_t *nv = NV_STATE_PTR(nvl);
|
nv_state_t *nv = NV_STATE_PTR(nvl);
|
||||||
NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa;
|
NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa;
|
||||||
int ret;
|
int ret = 0;
|
||||||
|
NvU64 memblock_size;
|
||||||
|
NvU64 size_remaining;
|
||||||
|
NvU64 calculated_segment_size;
|
||||||
|
NvU64 segment_size;
|
||||||
|
NvU64 segment_base;
|
||||||
os_numa_gpu_mem_hotplug_notifier_t notifier =
|
os_numa_gpu_mem_hotplug_notifier_t notifier =
|
||||||
{
|
{
|
||||||
.start_pa = base,
|
.start_pa = base,
|
||||||
@ -2247,11 +2254,49 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
|
|||||||
goto failed;
|
goto failed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// Adding all memory at once can take a long time. Split up memory into segments
|
||||||
|
// with schedule() in between to prevent soft lockups. Memory segments for
|
||||||
|
// add_memory_driver_managed() need to be aligned to memblock size.
|
||||||
|
//
|
||||||
|
// If there are any issues splitting into segments, then add all memory at once.
|
||||||
|
//
|
||||||
|
if (os_numa_memblock_size(&memblock_size) == NV_OK)
|
||||||
|
{
|
||||||
|
calculated_segment_size = NV_ALIGN_UP(size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Don't split into segments, add all memory at once
|
||||||
|
calculated_segment_size = size;
|
||||||
|
}
|
||||||
|
|
||||||
|
segment_size = calculated_segment_size;
|
||||||
|
segment_base = base;
|
||||||
|
size_remaining = size;
|
||||||
|
|
||||||
|
while ((size_remaining > 0) &&
|
||||||
|
(ret == 0))
|
||||||
|
{
|
||||||
|
if (segment_size > size_remaining)
|
||||||
|
{
|
||||||
|
segment_size = size_remaining;
|
||||||
|
}
|
||||||
|
|
||||||
#ifdef NV_ADD_MEMORY_DRIVER_MANAGED_HAS_MHP_FLAGS_ARG
|
#ifdef NV_ADD_MEMORY_DRIVER_MANAGED_HAS_MHP_FLAGS_ARG
|
||||||
ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)", MHP_NONE);
|
ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)", MHP_NONE);
|
||||||
#else
|
#else
|
||||||
ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)");
|
ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)");
|
||||||
#endif
|
#endif
|
||||||
|
nv_printf(NV_DBG_SETUP, "NVRM: add_memory_driver_managed() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
|
||||||
|
ret, segment_base, segment_size);
|
||||||
|
|
||||||
|
segment_base += segment_size;
|
||||||
|
size_remaining -= segment_size;
|
||||||
|
|
||||||
|
// Yield CPU to prevent soft lockups
|
||||||
|
schedule();
|
||||||
|
}
|
||||||
unregister_memory_notifier(¬ifier.memory_notifier);
|
unregister_memory_notifier(¬ifier.memory_notifier);
|
||||||
|
|
||||||
if (ret == 0)
|
if (ret == 0)
|
||||||
@ -2265,14 +2310,33 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
|
|||||||
zone_end_pfn(zone) != end_pfn)
|
zone_end_pfn(zone) != end_pfn)
|
||||||
{
|
{
|
||||||
nv_printf(NV_DBG_ERRORS, "NVRM: GPU memory zone movable auto onlining failed!\n");
|
nv_printf(NV_DBG_ERRORS, "NVRM: GPU memory zone movable auto onlining failed!\n");
|
||||||
|
|
||||||
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
||||||
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
// Since zone movable auto onlining failed, need to remove the added memory.
|
||||||
if (offline_and_remove_memory(node, base, size) != 0)
|
segment_size = calculated_segment_size;
|
||||||
#else
|
segment_base = base;
|
||||||
if (offline_and_remove_memory(base, size) != 0)
|
size_remaining = size;
|
||||||
#endif
|
|
||||||
|
while (size_remaining > 0)
|
||||||
{
|
{
|
||||||
nv_printf(NV_DBG_ERRORS, "NVRM: offline_and_remove_memory failed\n");
|
if (segment_size > size_remaining)
|
||||||
|
{
|
||||||
|
segment_size = size_remaining;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
||||||
|
ret = offline_and_remove_memory(node, segment_base, segment_size);
|
||||||
|
#else
|
||||||
|
ret = offline_and_remove_memory(segment_base, segment_size);
|
||||||
|
#endif
|
||||||
|
nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
|
||||||
|
ret, segment_base, segment_size);
|
||||||
|
|
||||||
|
segment_base += segment_size;
|
||||||
|
size_remaining -= segment_size;
|
||||||
|
|
||||||
|
// Yield CPU to prevent soft lockups
|
||||||
|
schedule();
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
goto failed;
|
goto failed;
|
||||||
@ -2323,6 +2387,77 @@ failed:
|
|||||||
return NV_ERR_NOT_SUPPORTED;
|
return NV_ERR_NOT_SUPPORTED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
typedef struct {
|
||||||
|
NvU64 base;
|
||||||
|
NvU64 size;
|
||||||
|
NvU32 nodeId;
|
||||||
|
int ret;
|
||||||
|
} remove_numa_memory_info_t;
|
||||||
|
|
||||||
|
static void offline_numa_memory_callback
|
||||||
|
(
|
||||||
|
void *args
|
||||||
|
)
|
||||||
|
{
|
||||||
|
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
||||||
|
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
|
||||||
|
int ret = 0;
|
||||||
|
NvU64 memblock_size;
|
||||||
|
NvU64 size_remaining;
|
||||||
|
NvU64 calculated_segment_size;
|
||||||
|
NvU64 segment_size;
|
||||||
|
NvU64 segment_base;
|
||||||
|
|
||||||
|
//
|
||||||
|
// Removing all memory at once can take a long time. Split up memory into segments
|
||||||
|
// with schedule() in between to prevent soft lockups. Memory segments for
|
||||||
|
// offline_and_remove_memory() need to be aligned to memblock size.
|
||||||
|
//
|
||||||
|
// If there are any issues splitting into segments, then remove all memory at once.
|
||||||
|
//
|
||||||
|
if (os_numa_memblock_size(&memblock_size) == NV_OK)
|
||||||
|
{
|
||||||
|
calculated_segment_size = NV_ALIGN_UP(pNumaInfo->size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// Don't split into segments, remove all memory at once
|
||||||
|
calculated_segment_size = pNumaInfo->size;
|
||||||
|
}
|
||||||
|
|
||||||
|
segment_size = calculated_segment_size;
|
||||||
|
segment_base = pNumaInfo->base;
|
||||||
|
size_remaining = pNumaInfo->size;
|
||||||
|
|
||||||
|
while (size_remaining > 0)
|
||||||
|
{
|
||||||
|
if (segment_size > size_remaining)
|
||||||
|
{
|
||||||
|
segment_size = size_remaining;
|
||||||
|
}
|
||||||
|
|
||||||
|
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
||||||
|
ret = offline_and_remove_memory(pNumaInfo->nodeId,
|
||||||
|
segment_base,
|
||||||
|
segment_size);
|
||||||
|
#else
|
||||||
|
ret = offline_and_remove_memory(segment_base,
|
||||||
|
segment_size);
|
||||||
|
#endif
|
||||||
|
nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
|
||||||
|
ret, segment_base, segment_size);
|
||||||
|
pNumaInfo->ret |= ret;
|
||||||
|
|
||||||
|
segment_base += segment_size;
|
||||||
|
size_remaining -= segment_size;
|
||||||
|
|
||||||
|
// Yield CPU to prevent soft lockups
|
||||||
|
schedule();
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory
|
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory
|
||||||
(
|
(
|
||||||
void *handle,
|
void *handle,
|
||||||
|
@ -26,6 +26,12 @@
|
|||||||
#include "os-interface.h"
|
#include "os-interface.h"
|
||||||
#include "nv-linux.h"
|
#include "nv-linux.h"
|
||||||
|
|
||||||
|
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
|
||||||
|
(defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) || \
|
||||||
|
defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
|
||||||
|
#define NV_NUM_PIN_PAGES_PER_ITERATION 0x80000
|
||||||
|
#endif
|
||||||
|
|
||||||
static inline int nv_follow_pfn(struct vm_area_struct *vma,
|
static inline int nv_follow_pfn(struct vm_area_struct *vma,
|
||||||
unsigned long address,
|
unsigned long address,
|
||||||
unsigned long *pfn)
|
unsigned long *pfn)
|
||||||
@ -163,9 +169,15 @@ NV_STATUS NV_API_CALL os_lock_user_pages(
|
|||||||
NV_STATUS rmStatus;
|
NV_STATUS rmStatus;
|
||||||
struct mm_struct *mm = current->mm;
|
struct mm_struct *mm = current->mm;
|
||||||
struct page **user_pages;
|
struct page **user_pages;
|
||||||
NvU64 i, pinned;
|
NvU64 i;
|
||||||
|
NvU64 npages = page_count;
|
||||||
|
NvU64 pinned = 0;
|
||||||
unsigned int gup_flags = DRF_VAL(_LOCK_USER_PAGES, _FLAGS, _WRITE, flags) ? FOLL_WRITE : 0;
|
unsigned int gup_flags = DRF_VAL(_LOCK_USER_PAGES, _FLAGS, _WRITE, flags) ? FOLL_WRITE : 0;
|
||||||
int ret;
|
long ret;
|
||||||
|
|
||||||
|
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT)
|
||||||
|
gup_flags |= FOLL_LONGTERM;
|
||||||
|
#endif
|
||||||
|
|
||||||
if (!NV_MAY_SLEEP())
|
if (!NV_MAY_SLEEP())
|
||||||
{
|
{
|
||||||
@ -185,16 +197,51 @@ NV_STATUS NV_API_CALL os_lock_user_pages(
|
|||||||
|
|
||||||
nv_mmap_read_lock(mm);
|
nv_mmap_read_lock(mm);
|
||||||
ret = NV_PIN_USER_PAGES((unsigned long)address,
|
ret = NV_PIN_USER_PAGES((unsigned long)address,
|
||||||
page_count, gup_flags, user_pages);
|
npages, gup_flags, user_pages);
|
||||||
nv_mmap_read_unlock(mm);
|
if (ret > 0)
|
||||||
pinned = ret;
|
|
||||||
|
|
||||||
if (ret < 0)
|
|
||||||
{
|
{
|
||||||
os_free_mem(user_pages);
|
pinned = ret;
|
||||||
return NV_ERR_INVALID_ADDRESS;
|
|
||||||
}
|
}
|
||||||
else if (pinned < page_count)
|
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
|
||||||
|
(defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) || \
|
||||||
|
defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
|
||||||
|
//
|
||||||
|
// NV_PIN_USER_PAGES() passes in NULL for the vmas parameter (if required)
|
||||||
|
// in pin_user_pages() (or get_user_pages() if pin_user_pages() does not
|
||||||
|
// exist). For kernels which do not contain the commit 52650c8b466b
|
||||||
|
// (mm/gup: remove the vma allocation from gup_longterm_locked()), if
|
||||||
|
// FOLL_LONGTERM is passed in, this results in the kernel trying to kcalloc
|
||||||
|
// the vmas array, and since the limit for kcalloc is 4 MB, it results in
|
||||||
|
// NV_PIN_USER_PAGES() failing with ENOMEM if more than
|
||||||
|
// NV_NUM_PIN_PAGES_PER_ITERATION pages are requested on 64-bit systems.
|
||||||
|
//
|
||||||
|
// As a workaround, if we requested more than
|
||||||
|
// NV_NUM_PIN_PAGES_PER_ITERATION pages and failed with ENOMEM, try again
|
||||||
|
// with multiple calls of NV_NUM_PIN_PAGES_PER_ITERATION pages at a time.
|
||||||
|
//
|
||||||
|
else if ((ret == -ENOMEM) &&
|
||||||
|
(page_count > NV_NUM_PIN_PAGES_PER_ITERATION))
|
||||||
|
{
|
||||||
|
for (pinned = 0; pinned < page_count; pinned += ret)
|
||||||
|
{
|
||||||
|
npages = page_count - pinned;
|
||||||
|
if (npages > NV_NUM_PIN_PAGES_PER_ITERATION)
|
||||||
|
{
|
||||||
|
npages = NV_NUM_PIN_PAGES_PER_ITERATION;
|
||||||
|
}
|
||||||
|
|
||||||
|
ret = NV_PIN_USER_PAGES(((unsigned long) address) + (pinned * PAGE_SIZE),
|
||||||
|
npages, gup_flags, &user_pages[pinned]);
|
||||||
|
if (ret <= 0)
|
||||||
|
{
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
nv_mmap_read_unlock(mm);
|
||||||
|
|
||||||
|
if (pinned < page_count)
|
||||||
{
|
{
|
||||||
for (i = 0; i < pinned; i++)
|
for (i = 0; i < pinned; i++)
|
||||||
NV_UNPIN_USER_PAGE(user_pages[i]);
|
NV_UNPIN_USER_PAGE(user_pages[i]);
|
||||||
|
@ -348,6 +348,9 @@ namespace DisplayPort
|
|||||||
//
|
//
|
||||||
bool bPowerDownPhyBeforeD3;
|
bool bPowerDownPhyBeforeD3;
|
||||||
|
|
||||||
|
// Force DSC on sink irrespective of LT status
|
||||||
|
bool bForceDscOnSink;
|
||||||
|
|
||||||
//
|
//
|
||||||
// Reset the MSTM_CTRL registers on branch device irrespective of
|
// Reset the MSTM_CTRL registers on branch device irrespective of
|
||||||
// IRQ VECTOR register having stale message. Certain branch devices
|
// IRQ VECTOR register having stale message. Certain branch devices
|
||||||
|
@ -294,8 +294,8 @@ namespace DisplayPort
|
|||||||
else
|
else
|
||||||
{
|
{
|
||||||
// if FEC is not enabled, link overhead comprises only of
|
// if FEC is not enabled, link overhead comprises only of
|
||||||
// 0.05% downspread.
|
// 0.6% downspread.
|
||||||
return rate - 5 * rate/ 1000;
|
return rate - 6 * rate/ 1000;
|
||||||
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -79,6 +79,11 @@
|
|||||||
//
|
//
|
||||||
#define NV_DP_REGKEY_MST_PCON_CAPS_READ_DISABLED "DP_BUG_4388987_WAR"
|
#define NV_DP_REGKEY_MST_PCON_CAPS_READ_DISABLED "DP_BUG_4388987_WAR"
|
||||||
|
|
||||||
|
//
|
||||||
|
// Bug 4459839 : This regkey will enable DSC irrespective of LT status.
|
||||||
|
//
|
||||||
|
#define NV_DP_REGKEY_FORCE_DSC_ON_SINK "DP_FORCE_DSC_ON_SINK"
|
||||||
|
|
||||||
//
|
//
|
||||||
// Data Base used to store all the regkey values.
|
// Data Base used to store all the regkey values.
|
||||||
// The actual data base is declared statically in dp_evoadapter.cpp.
|
// The actual data base is declared statically in dp_evoadapter.cpp.
|
||||||
@ -113,6 +118,7 @@ struct DP_REGKEY_DATABASE
|
|||||||
bool bPowerDownPhyBeforeD3;
|
bool bPowerDownPhyBeforeD3;
|
||||||
bool bReassessMaxLink;
|
bool bReassessMaxLink;
|
||||||
bool bMSTPCONCapsReadDisabled;
|
bool bMSTPCONCapsReadDisabled;
|
||||||
|
bool bForceDscOnSink;
|
||||||
};
|
};
|
||||||
|
|
||||||
#endif //INCLUDED_DP_REGKEYDATABASE_H
|
#endif //INCLUDED_DP_REGKEYDATABASE_H
|
||||||
|
@ -174,6 +174,7 @@ void ConnectorImpl::applyRegkeyOverrides(const DP_REGKEY_DATABASE& dpRegkeyDatab
|
|||||||
this->bDscMstCapBug3143315 = dpRegkeyDatabase.bDscMstCapBug3143315;
|
this->bDscMstCapBug3143315 = dpRegkeyDatabase.bDscMstCapBug3143315;
|
||||||
this->bPowerDownPhyBeforeD3 = dpRegkeyDatabase.bPowerDownPhyBeforeD3;
|
this->bPowerDownPhyBeforeD3 = dpRegkeyDatabase.bPowerDownPhyBeforeD3;
|
||||||
this->bReassessMaxLink = dpRegkeyDatabase.bReassessMaxLink;
|
this->bReassessMaxLink = dpRegkeyDatabase.bReassessMaxLink;
|
||||||
|
this->bForceDscOnSink = dpRegkeyDatabase.bForceDscOnSink;
|
||||||
}
|
}
|
||||||
|
|
||||||
void ConnectorImpl::setPolicyModesetOrderMitigation(bool enabled)
|
void ConnectorImpl::setPolicyModesetOrderMitigation(bool enabled)
|
||||||
@ -3129,7 +3130,7 @@ bool ConnectorImpl::notifyAttachBegin(Group * target, // Gr
|
|||||||
|
|
||||||
// if LT is successful, see if panel supports DSC and if so, set DSC enabled/disabled
|
// if LT is successful, see if panel supports DSC and if so, set DSC enabled/disabled
|
||||||
// according to the mode requested.
|
// according to the mode requested.
|
||||||
if(bLinkTrainingStatus)
|
if(bLinkTrainingStatus || bForceDscOnSink)
|
||||||
{
|
{
|
||||||
for (Device * dev = target->enumDevices(0); dev; dev = target->enumDevices(dev))
|
for (Device * dev = target->enumDevices(0); dev; dev = target->enumDevices(dev))
|
||||||
{
|
{
|
||||||
@ -4631,6 +4632,11 @@ bool ConnectorImpl::trainLinkOptimized(LinkConfiguration lConfig)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//
|
||||||
|
// There is no point in fallback here since we are link training
|
||||||
|
// to loweset link config that can support the mode.
|
||||||
|
//
|
||||||
|
lowestSelected.policy.setSkipFallBack(true);
|
||||||
bLinkTrainingSuccessful = train(lowestSelected, false);
|
bLinkTrainingSuccessful = train(lowestSelected, false);
|
||||||
//
|
//
|
||||||
// If LT failed, check if skipLT was marked. If so, clear the flag and
|
// If LT failed, check if skipLT was marked. If so, clear the flag and
|
||||||
@ -4648,16 +4654,37 @@ bool ConnectorImpl::trainLinkOptimized(LinkConfiguration lConfig)
|
|||||||
}
|
}
|
||||||
if (!bLinkTrainingSuccessful)
|
if (!bLinkTrainingSuccessful)
|
||||||
{
|
{
|
||||||
// Try fall back to max link config and if that fails try original assessed link configuration
|
// If optimized link config fails, try max link config with fallback.
|
||||||
if (!train(getMaxLinkConfig(), false))
|
if (!train(getMaxLinkConfig(), false))
|
||||||
{
|
{
|
||||||
|
//
|
||||||
|
// Note here that if highest link config fails and a lower
|
||||||
|
// link config passes, link training will be returned as
|
||||||
|
// failure but activeLinkConfig will be set to that passing config.
|
||||||
|
//
|
||||||
if (!willLinkSupportModeSST(activeLinkConfig, groupAttached->lastModesetInfo))
|
if (!willLinkSupportModeSST(activeLinkConfig, groupAttached->lastModesetInfo))
|
||||||
{
|
{
|
||||||
|
//
|
||||||
|
// If none of the link configs pass LT or a fall back link config passed LT
|
||||||
|
// but cannot support the mode, then we will force the optimized link config
|
||||||
|
// on the link and mark LT as fail.
|
||||||
|
//
|
||||||
train(lowestSelected, true);
|
train(lowestSelected, true);
|
||||||
|
|
||||||
// Mark link training as failed since we forced it
|
|
||||||
bLinkTrainingSuccessful = false;
|
bLinkTrainingSuccessful = false;
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
//
|
||||||
|
// If a fallback link config pass LT and can support
|
||||||
|
// the mode, mark LT as pass.
|
||||||
|
//
|
||||||
|
bLinkTrainingSuccessful = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
// If LT passes at max link config, mark LT as pass.
|
||||||
|
bLinkTrainingSuccessful = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -94,7 +94,8 @@ const struct
|
|||||||
{NV_DP_DSC_MST_CAP_BUG_3143315, &dpRegkeyDatabase.bDscMstCapBug3143315, DP_REG_VAL_BOOL},
|
{NV_DP_DSC_MST_CAP_BUG_3143315, &dpRegkeyDatabase.bDscMstCapBug3143315, DP_REG_VAL_BOOL},
|
||||||
{NV_DP_REGKEY_POWER_DOWN_PHY, &dpRegkeyDatabase.bPowerDownPhyBeforeD3, DP_REG_VAL_BOOL},
|
{NV_DP_REGKEY_POWER_DOWN_PHY, &dpRegkeyDatabase.bPowerDownPhyBeforeD3, DP_REG_VAL_BOOL},
|
||||||
{NV_DP_REGKEY_REASSESS_MAX_LINK, &dpRegkeyDatabase.bReassessMaxLink, DP_REG_VAL_BOOL},
|
{NV_DP_REGKEY_REASSESS_MAX_LINK, &dpRegkeyDatabase.bReassessMaxLink, DP_REG_VAL_BOOL},
|
||||||
{NV_DP_REGKEY_MST_PCON_CAPS_READ_DISABLED, &dpRegkeyDatabase.bMSTPCONCapsReadDisabled, DP_REG_VAL_BOOL}
|
{NV_DP_REGKEY_MST_PCON_CAPS_READ_DISABLED, &dpRegkeyDatabase.bMSTPCONCapsReadDisabled, DP_REG_VAL_BOOL},
|
||||||
|
{NV_DP_REGKEY_FORCE_DSC_ON_SINK, &dpRegkeyDatabase.bForceDscOnSink, DP_REG_VAL_BOOL},
|
||||||
};
|
};
|
||||||
|
|
||||||
EvoMainLink::EvoMainLink(EvoInterface * provider, Timer * timer) :
|
EvoMainLink::EvoMainLink(EvoInterface * provider, Timer * timer) :
|
||||||
|
@ -36,25 +36,25 @@
|
|||||||
// and then checked back in. You cannot make changes to these sections without
|
// and then checked back in. You cannot make changes to these sections without
|
||||||
// corresponding changes to the buildmeister script
|
// corresponding changes to the buildmeister script
|
||||||
#ifndef NV_BUILD_BRANCH
|
#ifndef NV_BUILD_BRANCH
|
||||||
#define NV_BUILD_BRANCH r551_40
|
#define NV_BUILD_BRANCH r550_00
|
||||||
#endif
|
#endif
|
||||||
#ifndef NV_PUBLIC_BRANCH
|
#ifndef NV_PUBLIC_BRANCH
|
||||||
#define NV_PUBLIC_BRANCH r551_40
|
#define NV_PUBLIC_BRANCH r550_00
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
|
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
|
||||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r550/r551_40-171"
|
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r550/r550_00-204"
|
||||||
#define NV_BUILD_CHANGELIST_NUM (33992326)
|
#define NV_BUILD_CHANGELIST_NUM (34025356)
|
||||||
#define NV_BUILD_TYPE "Official"
|
#define NV_BUILD_TYPE "Official"
|
||||||
#define NV_BUILD_NAME "rel/gpu_drv/r550/r551_40-171"
|
#define NV_BUILD_NAME "rel/gpu_drv/r550/r550_00-204"
|
||||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33992326)
|
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34025356)
|
||||||
|
|
||||||
#else /* Windows builds */
|
#else /* Windows builds */
|
||||||
#define NV_BUILD_BRANCH_VERSION "r551_40-15"
|
#define NV_BUILD_BRANCH_VERSION "r550_00-192"
|
||||||
#define NV_BUILD_CHANGELIST_NUM (33992326)
|
#define NV_BUILD_CHANGELIST_NUM (34025356)
|
||||||
#define NV_BUILD_TYPE "Official"
|
#define NV_BUILD_TYPE "Official"
|
||||||
#define NV_BUILD_NAME "551.78"
|
#define NV_BUILD_NAME "551.86"
|
||||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33992326)
|
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34025356)
|
||||||
#define NV_BUILD_BRANCH_BASE_VERSION R550
|
#define NV_BUILD_BRANCH_BASE_VERSION R550
|
||||||
#endif
|
#endif
|
||||||
// End buildmeister python edited section
|
// End buildmeister python edited section
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
||||||
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
|
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
|
||||||
|
|
||||||
#define NV_VERSION_STRING "550.54.15"
|
#define NV_VERSION_STRING "550.67"
|
||||||
|
|
||||||
#else
|
#else
|
||||||
|
|
||||||
|
@ -2388,7 +2388,8 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
|||||||
|
|
||||||
if (parseCta861DataBlockInfo(pData_collection, (NvU32)ctaDTD_Offset - 4, NULL) == NVT_STATUS_SUCCESS)
|
if (parseCta861DataBlockInfo(pData_collection, (NvU32)ctaDTD_Offset - 4, NULL) == NVT_STATUS_SUCCESS)
|
||||||
{
|
{
|
||||||
pData_collection++;
|
pData_collection++; // go to the next byte. skip Tag+Length byte
|
||||||
|
|
||||||
if (ctaBlockTag == NVT_CEA861_TAG_VIDEO)
|
if (ctaBlockTag == NVT_CEA861_TAG_VIDEO)
|
||||||
{
|
{
|
||||||
for (i=0; i < ctaPayload; i++)
|
for (i=0; i < ctaPayload; i++)
|
||||||
@ -2432,6 +2433,8 @@ NvU32 NvTiming_EDIDStrongValidationMask(NvU8 *pEdid, NvU32 length)
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
pData_collection++; // go to the next byte. skip Tag+Length byte
|
||||||
|
|
||||||
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_CTA_INVALID_DATA_BLOCK);
|
ret |= NVT_EDID_VALIDATION_ERR_MASK(NVT_EDID_VALIDATION_ERR_EXT_CTA_INVALID_DATA_BLOCK);
|
||||||
pData_collection += ctaPayload;
|
pData_collection += ctaPayload;
|
||||||
}
|
}
|
||||||
|
@ -1609,7 +1609,6 @@ void getEdidHDM1_4bVsdbTiming(NVT_EDID_INFO *pInfo)
|
|||||||
CODE_SEGMENT(PAGE_DD_CODE)
|
CODE_SEGMENT(PAGE_DD_CODE)
|
||||||
NVT_STATUS get861ExtInfo(NvU8 *p, NvU32 size, NVT_EDID_CEA861_INFO *p861info)
|
NVT_STATUS get861ExtInfo(NvU8 *p, NvU32 size, NVT_EDID_CEA861_INFO *p861info)
|
||||||
{
|
{
|
||||||
|
|
||||||
NvU32 dtd_offset;
|
NvU32 dtd_offset;
|
||||||
// sanity check
|
// sanity check
|
||||||
if (p == NULL || size < sizeof(EDIDV1STRUC))
|
if (p == NULL || size < sizeof(EDIDV1STRUC))
|
||||||
@ -1725,8 +1724,8 @@ NVT_STATUS parseCta861DataBlockInfo(NvU8 *p,
|
|||||||
if (payload >= 1)
|
if (payload >= 1)
|
||||||
{
|
{
|
||||||
ext_tag = p[i];
|
ext_tag = p[i];
|
||||||
if (ext_tag == NVT_CEA861_EXT_TAG_VIDEO_CAP && payload < 2) return NVT_STATUS_ERR;
|
if (ext_tag == NVT_CEA861_EXT_TAG_VIDEO_CAP && (payload != 2)) return NVT_STATUS_ERR;
|
||||||
else if (ext_tag == NVT_CEA861_EXT_TAG_COLORIMETRY && payload < 3) return NVT_STATUS_ERR;
|
else if (ext_tag == NVT_CEA861_EXT_TAG_COLORIMETRY && payload != 3) return NVT_STATUS_ERR;
|
||||||
else if (ext_tag == NVT_CEA861_EXT_TAG_VIDEO_FORMAT_PREFERENCE && payload < 2) return NVT_STATUS_ERR;
|
else if (ext_tag == NVT_CEA861_EXT_TAG_VIDEO_FORMAT_PREFERENCE && payload < 2) return NVT_STATUS_ERR;
|
||||||
else if (ext_tag == NVT_CEA861_EXT_TAG_YCBCR420_VIDEO && payload < 2) return NVT_STATUS_ERR;
|
else if (ext_tag == NVT_CEA861_EXT_TAG_YCBCR420_VIDEO && payload < 2) return NVT_STATUS_ERR;
|
||||||
else if (ext_tag == NVT_CEA861_EXT_TAG_YCBCR420_CAP && payload < 1) return NVT_STATUS_ERR;
|
else if (ext_tag == NVT_CEA861_EXT_TAG_YCBCR420_CAP && payload < 1) return NVT_STATUS_ERR;
|
||||||
@ -1855,6 +1854,8 @@ NVT_STATUS parseCta861DataBlockInfo(NvU8 *p,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else if (tag == NVT_CTA861_TAG_VIDEO_FORMAT)
|
else if (tag == NVT_CTA861_TAG_VIDEO_FORMAT)
|
||||||
|
{
|
||||||
|
if (payload > 0)
|
||||||
{
|
{
|
||||||
p861info->vfdb[vfd_index].info.vfd_len = p[i] & 0x03;
|
p861info->vfdb[vfd_index].info.vfd_len = p[i] & 0x03;
|
||||||
p861info->vfdb[vfd_index].info.ntsc = (p[i] & 0x40) >> 6;
|
p861info->vfdb[vfd_index].info.ntsc = (p[i] & 0x40) >> 6;
|
||||||
@ -1863,13 +1864,14 @@ NVT_STATUS parseCta861DataBlockInfo(NvU8 *p,
|
|||||||
|
|
||||||
i++; payload--;
|
i++; payload--;
|
||||||
|
|
||||||
for (j = 0; j < payload; j++, i++)
|
for (j = 0; (j < payload) && (p861info->vfdb[vfd_index].total_vfd != 0); j++, i++)
|
||||||
{
|
{
|
||||||
p861info->vfdb[vfd_index].video_format_desc[j] = p[i];
|
p861info->vfdb[vfd_index].video_format_desc[j] = p[i];
|
||||||
}
|
}
|
||||||
|
|
||||||
p861info->total_vfdb = ++vfd_index;
|
p861info->total_vfdb = ++vfd_index;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
else if (tag == NVT_CEA861_TAG_EXTENDED_FLAG)
|
else if (tag == NVT_CEA861_TAG_EXTENDED_FLAG)
|
||||||
{
|
{
|
||||||
if (payload >= 1)
|
if (payload >= 1)
|
||||||
@ -1879,14 +1881,14 @@ NVT_STATUS parseCta861DataBlockInfo(NvU8 *p,
|
|||||||
{
|
{
|
||||||
p861info->video_capability = p[i + 1] & NVT_CEA861_VIDEO_CAPABILITY_MASK;
|
p861info->video_capability = p[i + 1] & NVT_CEA861_VIDEO_CAPABILITY_MASK;
|
||||||
p861info->valid.VCDB = 1;
|
p861info->valid.VCDB = 1;
|
||||||
i += 2;
|
i += payload;
|
||||||
}
|
}
|
||||||
else if (ext_tag == NVT_CEA861_EXT_TAG_COLORIMETRY && payload >= 3)
|
else if (ext_tag == NVT_CEA861_EXT_TAG_COLORIMETRY && payload >= 3)
|
||||||
{
|
{
|
||||||
p861info->colorimetry.byte1 = p[i + 1] & NVT_CEA861_COLORIMETRY_MASK;
|
p861info->colorimetry.byte1 = p[i + 1] & NVT_CEA861_COLORIMETRY_MASK;
|
||||||
p861info->colorimetry.byte2 = p[i + 2] & NVT_CEA861_GAMUT_METADATA_MASK;
|
p861info->colorimetry.byte2 = p[i + 2] & NVT_CEA861_GAMUT_METADATA_MASK;
|
||||||
p861info->valid.colorimetry = 1;
|
p861info->valid.colorimetry = 1;
|
||||||
i += 3;
|
i += payload;
|
||||||
}
|
}
|
||||||
else if (ext_tag == NVT_CEA861_EXT_TAG_VIDEO_FORMAT_PREFERENCE && payload >= 2)
|
else if (ext_tag == NVT_CEA861_EXT_TAG_VIDEO_FORMAT_PREFERENCE && payload >= 2)
|
||||||
{
|
{
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
@ -80,6 +80,7 @@ typedef struct
|
|||||||
|
|
||||||
#define NVLINK_INBAND_GPU_PROBE_CAPS_SRIOV_ENABLED NVBIT(0)
|
#define NVLINK_INBAND_GPU_PROBE_CAPS_SRIOV_ENABLED NVBIT(0)
|
||||||
#define NVLINK_INBAND_GPU_PROBE_CAPS_PROBE_UPDATE NVBIT(1)
|
#define NVLINK_INBAND_GPU_PROBE_CAPS_PROBE_UPDATE NVBIT(1)
|
||||||
|
#define NVLINK_INBAND_GPU_PROBE_CAPS_EGM_SUPPORT NVBIT(2)
|
||||||
|
|
||||||
/* Add more caps as need in the future */
|
/* Add more caps as need in the future */
|
||||||
|
|
||||||
|
@ -378,6 +378,21 @@ nvlink_lib_powerdown_links_from_active_to_off
|
|||||||
lockLinkCount++;
|
lockLinkCount++;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (lockLinkCount == 0)
|
||||||
|
{
|
||||||
|
if (conns != NULL)
|
||||||
|
nvlink_free((void *)conns);
|
||||||
|
|
||||||
|
if (lockLinks != NULL)
|
||||||
|
nvlink_free((void *)lockLinks);
|
||||||
|
|
||||||
|
// Release the top-level lock
|
||||||
|
nvlink_lib_top_lock_release();
|
||||||
|
NVLINK_PRINT((DBG_MODULE_NVLINK_CORE, NVLINK_DBG_LEVEL_ERRORS,
|
||||||
|
"%s: No conns were found\n", __FUNCTION__));
|
||||||
|
return NVL_NOT_FOUND;
|
||||||
|
}
|
||||||
|
|
||||||
// Acquire the per-link locks for all links captured
|
// Acquire the per-link locks for all links captured
|
||||||
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
|
status = nvlink_lib_link_locks_acquire(lockLinks, lockLinkCount);
|
||||||
if (status != NVL_SUCCESS)
|
if (status != NVL_SUCCESS)
|
||||||
@ -923,4 +938,3 @@ nvlink_core_powerdown_floorswept_conns_to_off_end:
|
|||||||
|
|
||||||
return status;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -38,6 +38,7 @@
|
|||||||
#define NVSWITCH_BOARD_LS10_5612_0002_ES 0x03D6
|
#define NVSWITCH_BOARD_LS10_5612_0002_ES 0x03D6
|
||||||
#define NVSWITCH_BOARD_LS10_4697_0000_895 0x03B9
|
#define NVSWITCH_BOARD_LS10_4697_0000_895 0x03B9
|
||||||
#define NVSWITCH_BOARD_LS10_4262_0000_895 0x04FE
|
#define NVSWITCH_BOARD_LS10_4262_0000_895 0x04FE
|
||||||
|
#define NVSWITCH_BOARD_LS10_4300_0000_895 0x0571
|
||||||
|
|
||||||
#define NVSWITCH_BOARD_UNKNOWN_NAME "UNKNOWN"
|
#define NVSWITCH_BOARD_UNKNOWN_NAME "UNKNOWN"
|
||||||
|
|
||||||
@ -48,5 +49,6 @@
|
|||||||
#define NVSWITCH_BOARD_LS10_5612_0002_ES_NAME "LS10_5612_0002_ES"
|
#define NVSWITCH_BOARD_LS10_5612_0002_ES_NAME "LS10_5612_0002_ES"
|
||||||
#define NVSWITCH_BOARD_LS10_4697_0000_895_NAME "LS10_4697_0000_895"
|
#define NVSWITCH_BOARD_LS10_4697_0000_895_NAME "LS10_4697_0000_895"
|
||||||
#define NVSWITCH_BOARD_LS10_4262_0000_895_NAME "LS10_4262_0000_895"
|
#define NVSWITCH_BOARD_LS10_4262_0000_895_NAME "LS10_4262_0000_895"
|
||||||
|
#define NVSWITCH_BOARD_LS10_4300_0000_895_NAME "LS10_4300_0000_895"
|
||||||
|
|
||||||
#endif // _BOARDS_NVSWITCH_H_
|
#endif // _BOARDS_NVSWITCH_H_
|
||||||
|
@ -894,9 +894,9 @@ _nvswitch_collect_error_info_ls10
|
|||||||
{
|
{
|
||||||
data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR;
|
data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_ROUTE_HDR;
|
||||||
NVSWITCH_PRINT(device, INFO,
|
NVSWITCH_PRINT(device, INFO,
|
||||||
"ROUTE: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
|
"ROUTE: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
|
||||||
data->data[i-8], data->data[i-7], data->data[i-6], data->data[i-5],
|
data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4],
|
||||||
data->data[i-4], data->data[i-3], data->data[i-2], data->data[i-1]);
|
data->data[i-3], data->data[i-2], data->data[i-1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -940,9 +940,9 @@ _nvswitch_collect_error_info_ls10
|
|||||||
{
|
{
|
||||||
data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR;
|
data->flags |= NVSWITCH_RAW_ERROR_LOG_DATA_FLAG_INGRESS_HDR;
|
||||||
NVSWITCH_PRINT(device, INFO,
|
NVSWITCH_PRINT(device, INFO,
|
||||||
"INGRESS: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
|
"INGRESS: HEADER: 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x, 0x%08x,\n",
|
||||||
data->data[i-7], data->data[i-6], data->data[i-5], data->data[i-4],
|
data->data[i-6], data->data[i-5], data->data[i-4], data->data[i-3],
|
||||||
data->data[i-3], data->data[i-2], data->data[i-1]);
|
data->data[i-2], data->data[i-1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
#include "export_nvswitch.h"
|
#include "export_nvswitch.h"
|
||||||
#include "soe/soe_nvswitch.h"
|
#include "soe/soe_nvswitch.h"
|
||||||
#include "soe/soeifcore.h"
|
#include "soe/soeifcore.h"
|
||||||
|
#include "boards_nvswitch.h"
|
||||||
|
|
||||||
#include "nvswitch/ls10/dev_pmgr.h"
|
#include "nvswitch/ls10/dev_pmgr.h"
|
||||||
|
|
||||||
@ -176,6 +177,16 @@ static const NVSWITCH_GPIO_INFO nvswitch_gpio_pin_Default[] =
|
|||||||
|
|
||||||
static const NvU32 nvswitch_gpio_pin_Default_size = NV_ARRAY_ELEMENTS(nvswitch_gpio_pin_Default);
|
static const NvU32 nvswitch_gpio_pin_Default_size = NV_ARRAY_ELEMENTS(nvswitch_gpio_pin_Default);
|
||||||
|
|
||||||
|
static const NVSWITCH_GPIO_INFO nvswitch_gpio_pin_4300[] =
|
||||||
|
{
|
||||||
|
NVSWITCH_DESCRIBE_GPIO_PIN( 0, _INSTANCE_ID0, 0, IN), // Instance ID bit 0
|
||||||
|
NVSWITCH_DESCRIBE_GPIO_PIN( 1, _INSTANCE_ID1, 0, IN), // Instance ID bit 1
|
||||||
|
NVSWITCH_DESCRIBE_GPIO_PIN( 2, _INSTANCE_ID2, 0, IN), // Instance ID bit 2
|
||||||
|
NVSWITCH_DESCRIBE_GPIO_PIN( 6, _INSTANCE_ID3, 0, IN), // Instance ID bit 3
|
||||||
|
NVSWITCH_DESCRIBE_GPIO_PIN( 7, _INSTANCE_ID4, 0, IN), // Instance ID bit 4
|
||||||
|
};
|
||||||
|
static const NvU32 nvswitch_gpio_pin_4300_size = NV_ARRAY_ELEMENTS(nvswitch_gpio_pin_4300);
|
||||||
|
|
||||||
//
|
//
|
||||||
// Initialize the software state of the switch I2C & GPIO interface
|
// Initialize the software state of the switch I2C & GPIO interface
|
||||||
// Temporarily forcing default GPIO values.
|
// Temporarily forcing default GPIO values.
|
||||||
@ -191,6 +202,8 @@ nvswitch_init_pmgr_devices_ls10
|
|||||||
{
|
{
|
||||||
ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
|
ls10_device *chip_device = NVSWITCH_GET_CHIP_DEVICE_LS10(device);
|
||||||
PNVSWITCH_OBJI2C pI2c = device->pI2c;
|
PNVSWITCH_OBJI2C pI2c = device->pI2c;
|
||||||
|
NvlStatus retval;
|
||||||
|
NvU16 boardId;
|
||||||
|
|
||||||
if (IS_FMODEL(device) || IS_EMULATION(device) || IS_RTLSIM(device))
|
if (IS_FMODEL(device) || IS_EMULATION(device) || IS_RTLSIM(device))
|
||||||
{
|
{
|
||||||
@ -199,10 +212,20 @@ nvswitch_init_pmgr_devices_ls10
|
|||||||
chip_device->gpio_pin_size = 0;
|
chip_device->gpio_pin_size = 0;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
|
{
|
||||||
|
retval = nvswitch_get_board_id(device, &boardId);
|
||||||
|
if (retval == NVL_SUCCESS &&
|
||||||
|
boardId == NVSWITCH_BOARD_LS10_4300_0000_895)
|
||||||
|
{
|
||||||
|
chip_device->gpio_pin = nvswitch_gpio_pin_4300;
|
||||||
|
chip_device->gpio_pin_size = nvswitch_gpio_pin_4300_size;
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
chip_device->gpio_pin = nvswitch_gpio_pin_Default;
|
chip_device->gpio_pin = nvswitch_gpio_pin_Default;
|
||||||
chip_device->gpio_pin_size = nvswitch_gpio_pin_Default_size;
|
chip_device->gpio_pin_size = nvswitch_gpio_pin_Default_size;
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pI2c->device_list = NULL;
|
pI2c->device_list = NULL;
|
||||||
pI2c->device_list_size = 0;
|
pI2c->device_list_size = 0;
|
||||||
|
@ -62,7 +62,7 @@ static NvlStatus _nvswitch_ctrl_inband_flush_data(nvswitch_device *device, NVSWI
|
|||||||
#define NVSWITCH_DEV_CMD_DISPATCH_RESERVED(cmd) \
|
#define NVSWITCH_DEV_CMD_DISPATCH_RESERVED(cmd) \
|
||||||
case cmd: \
|
case cmd: \
|
||||||
{ \
|
{ \
|
||||||
retval = -NVL_ERR_NOT_IMPLEMENTED; \
|
retval = -NVL_ERR_NOT_SUPPORTED; \
|
||||||
break; \
|
break; \
|
||||||
} \
|
} \
|
||||||
|
|
||||||
|
@ -95,6 +95,7 @@ endif
|
|||||||
ifeq ($(TARGET_ARCH),aarch64)
|
ifeq ($(TARGET_ARCH),aarch64)
|
||||||
CFLAGS += -mgeneral-regs-only
|
CFLAGS += -mgeneral-regs-only
|
||||||
CFLAGS += -march=armv8-a
|
CFLAGS += -march=armv8-a
|
||||||
|
CFLAGS += -ffixed-x18
|
||||||
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
|
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -90,6 +90,7 @@ ifeq ($(TARGET_ARCH),aarch64)
|
|||||||
CFLAGS += -mgeneral-regs-only
|
CFLAGS += -mgeneral-regs-only
|
||||||
CFLAGS += -march=armv8-a
|
CFLAGS += -march=armv8-a
|
||||||
CFLAGS += -mstrict-align
|
CFLAGS += -mstrict-align
|
||||||
|
CFLAGS += -ffixed-x18
|
||||||
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
|
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
@ -74,7 +74,7 @@ NV_STATUS hypervisorInjectInterrupt_IMPL
|
|||||||
NV_STATUS status = NV_ERR_NOT_SUPPORTED;
|
NV_STATUS status = NV_ERR_NOT_SUPPORTED;
|
||||||
|
|
||||||
if (pVgpuNsIntr->pVgpuVfioRef)
|
if (pVgpuNsIntr->pVgpuVfioRef)
|
||||||
status = osVgpuInjectInterrupt(pVgpuNsIntr->pVgpuVfioRef);
|
return NV_ERR_NOT_SUPPORTED;
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
if (pVgpuNsIntr->guestMSIAddr && pVgpuNsIntr->guestMSIData)
|
if (pVgpuNsIntr->guestMSIAddr && pVgpuNsIntr->guestMSIData)
|
||||||
@ -142,14 +142,22 @@ static NV_STATUS get_available_instances(
|
|||||||
|
|
||||||
swizzIdInUseMask = kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager);
|
swizzIdInUseMask = kmigmgrGetSwizzIdInUseMask(pGpu, pKernelMIGManager);
|
||||||
|
|
||||||
|
if (!vgpuTypeInfo->gpuInstanceSize)
|
||||||
|
{
|
||||||
|
// Query for a non MIG vgpuType
|
||||||
|
NV_PRINTF(LEVEL_INFO, "%s Query for a non MIG vGPU type \n",
|
||||||
|
__FUNCTION__);
|
||||||
|
rmStatus = NV_OK;
|
||||||
|
goto exit;
|
||||||
|
}
|
||||||
|
|
||||||
rmStatus = kvgpumgrGetPartitionFlag(vgpuTypeInfo->vgpuTypeId,
|
rmStatus = kvgpumgrGetPartitionFlag(vgpuTypeInfo->vgpuTypeId,
|
||||||
&partitionFlag);
|
&partitionFlag);
|
||||||
if (rmStatus != NV_OK)
|
if (rmStatus != NV_OK)
|
||||||
{
|
{
|
||||||
// Query for a non MIG vgpuType
|
// Query for a non MIG vgpuType
|
||||||
NV_PRINTF(LEVEL_ERROR, "%s Query for a non MIG vGPU type \n",
|
NV_PRINTF(LEVEL_ERROR, "%s failed to get partition flags.\n",
|
||||||
__FUNCTION__);
|
__FUNCTION__);
|
||||||
rmStatus = NV_OK;
|
|
||||||
goto exit;
|
goto exit;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -192,7 +200,7 @@ static NV_STATUS get_available_instances(
|
|||||||
if (vgpuTypeInfo->gpuInstanceSize)
|
if (vgpuTypeInfo->gpuInstanceSize)
|
||||||
{
|
{
|
||||||
// Query for a MIG vgpuType
|
// Query for a MIG vgpuType
|
||||||
NV_PRINTF(LEVEL_ERROR, "%s Query for a MIG vGPU type \n",
|
NV_PRINTF(LEVEL_INFO, "%s Query for a MIG vGPU type \n",
|
||||||
__FUNCTION__);
|
__FUNCTION__);
|
||||||
rmStatus = NV_OK;
|
rmStatus = NV_OK;
|
||||||
goto exit;
|
goto exit;
|
||||||
|
@ -1255,6 +1255,7 @@ struct OBJGPU {
|
|||||||
TMR_EVENT *pVideoTimerEvent;
|
TMR_EVENT *pVideoTimerEvent;
|
||||||
NVENC_SESSION_LIST nvencSessionList;
|
NVENC_SESSION_LIST nvencSessionList;
|
||||||
NvU32 encSessionStatsReportingState;
|
NvU32 encSessionStatsReportingState;
|
||||||
|
NvBool bNvEncSessionDataProcessingWorkItemPending;
|
||||||
NVFBC_SESSION_LIST nvfbcSessionList;
|
NVFBC_SESSION_LIST nvfbcSessionList;
|
||||||
struct OBJVASPACE *pFabricVAS;
|
struct OBJVASPACE *pFabricVAS;
|
||||||
NvBool bPipelinedPteMemEnabled;
|
NvBool bPipelinedPteMemEnabled;
|
||||||
|
@ -1014,6 +1014,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
|
|||||||
{ 0x2702, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080 SUPER" },
|
{ 0x2702, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080 SUPER" },
|
||||||
{ 0x2704, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080" },
|
{ 0x2704, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080" },
|
||||||
{ 0x2705, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Ti SUPER" },
|
{ 0x2705, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Ti SUPER" },
|
||||||
|
{ 0x2709, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070" },
|
||||||
{ 0x2717, 0x0000, 0x0000, "NVIDIA GeForce RTX 4090 Laptop GPU" },
|
{ 0x2717, 0x0000, 0x0000, "NVIDIA GeForce RTX 4090 Laptop GPU" },
|
||||||
{ 0x2730, 0x0000, 0x0000, "NVIDIA RTX 5000 Ada Generation Laptop GPU" },
|
{ 0x2730, 0x0000, 0x0000, "NVIDIA RTX 5000 Ada Generation Laptop GPU" },
|
||||||
{ 0x2757, 0x0000, 0x0000, "NVIDIA GeForce RTX 4090 Laptop GPU" },
|
{ 0x2757, 0x0000, 0x0000, "NVIDIA GeForce RTX 4090 Laptop GPU" },
|
||||||
@ -1021,6 +1022,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
|
|||||||
{ 0x2782, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Ti" },
|
{ 0x2782, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Ti" },
|
||||||
{ 0x2783, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 SUPER" },
|
{ 0x2783, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 SUPER" },
|
||||||
{ 0x2786, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070" },
|
{ 0x2786, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070" },
|
||||||
|
{ 0x2788, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Ti" },
|
||||||
{ 0x27A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080 Laptop GPU" },
|
{ 0x27A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4080 Laptop GPU" },
|
||||||
{ 0x27B0, 0x16fa, 0x1028, "NVIDIA RTX 4000 SFF Ada Generation" },
|
{ 0x27B0, 0x16fa, 0x1028, "NVIDIA RTX 4000 SFF Ada Generation" },
|
||||||
{ 0x27B0, 0x16fa, 0x103c, "NVIDIA RTX 4000 SFF Ada Generation" },
|
{ 0x27B0, 0x16fa, 0x103c, "NVIDIA RTX 4000 SFF Ada Generation" },
|
||||||
@ -1043,6 +1045,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
|
|||||||
{ 0x27FB, 0x0000, 0x0000, "NVIDIA RTX 3500 Ada Generation Embedded GPU" },
|
{ 0x27FB, 0x0000, 0x0000, "NVIDIA RTX 3500 Ada Generation Embedded GPU" },
|
||||||
{ 0x2803, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Ti" },
|
{ 0x2803, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Ti" },
|
||||||
{ 0x2805, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Ti" },
|
{ 0x2805, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Ti" },
|
||||||
|
{ 0x2808, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060" },
|
||||||
{ 0x2820, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Laptop GPU" },
|
{ 0x2820, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Laptop GPU" },
|
||||||
{ 0x2838, 0x0000, 0x0000, "NVIDIA RTX 3000 Ada Generation Laptop GPU" },
|
{ 0x2838, 0x0000, 0x0000, "NVIDIA RTX 3000 Ada Generation Laptop GPU" },
|
||||||
{ 0x2860, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Laptop GPU" },
|
{ 0x2860, 0x0000, 0x0000, "NVIDIA GeForce RTX 4070 Laptop GPU" },
|
||||||
|
@ -7,7 +7,7 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
@ -108,6 +108,9 @@ struct Spdm {
|
|||||||
NvU32 sessionMsgCount;
|
NvU32 sessionMsgCount;
|
||||||
PTMR_EVENT pHeartbeatEvent;
|
PTMR_EVENT pHeartbeatEvent;
|
||||||
NvU32 heartbeatPeriodSec;
|
NvU32 heartbeatPeriodSec;
|
||||||
|
NvU8 *pTransportBuffer;
|
||||||
|
NvU32 transportBufferSize;
|
||||||
|
NvU32 pendingResponseSize;
|
||||||
};
|
};
|
||||||
|
|
||||||
#ifndef __NVOC_CLASS_Spdm_TYPEDEF__
|
#ifndef __NVOC_CLASS_Spdm_TYPEDEF__
|
||||||
|
@ -7,7 +7,7 @@ extern "C" {
|
|||||||
#endif
|
#endif
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
@ -103,4 +103,24 @@ typedef struct MESSAGE_QUEUE_COLLECTION
|
|||||||
#define GSP_MSG_QUEUE_HEADER_SIZE RM_PAGE_SIZE
|
#define GSP_MSG_QUEUE_HEADER_SIZE RM_PAGE_SIZE
|
||||||
#define GSP_MSG_QUEUE_HEADER_ALIGN 4 // 2 ^ 4 = 16
|
#define GSP_MSG_QUEUE_HEADER_ALIGN 4 // 2 ^ 4 = 16
|
||||||
|
|
||||||
|
/*!
|
||||||
|
* Calculate 32-bit checksum
|
||||||
|
*
|
||||||
|
* This routine assumes that the data is padded out with zeros to the next
|
||||||
|
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
|
||||||
|
*/
|
||||||
|
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
|
||||||
|
{
|
||||||
|
NvU64 *p = (NvU64 *)pData;
|
||||||
|
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
|
||||||
|
NvU64 checkSum = 0;
|
||||||
|
|
||||||
|
NV_ASSERT_CHECKED(uLen > 0);
|
||||||
|
|
||||||
|
while (p < pEnd)
|
||||||
|
checkSum ^= *p++;
|
||||||
|
|
||||||
|
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
|
||||||
|
}
|
||||||
|
|
||||||
#endif // _MESSAGE_QUEUE_PRIV_H_
|
#endif // _MESSAGE_QUEUE_PRIV_H_
|
||||||
|
@ -585,6 +585,13 @@ kbifRestorePcieConfigRegisters_GM107
|
|||||||
NvU64 timeStampStart;
|
NvU64 timeStampStart;
|
||||||
NvU64 timeStampEnd;
|
NvU64 timeStampEnd;
|
||||||
|
|
||||||
|
if (pKernelBif->xveRegmapRef[0].bufBootConfigSpace == NULL)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR, "Config space buffer is NULL!\n");
|
||||||
|
NV_ASSERT(0);
|
||||||
|
return NV_ERR_OBJECT_NOT_FOUND;
|
||||||
|
}
|
||||||
|
|
||||||
// Restore pcie config space for function 0
|
// Restore pcie config space for function 0
|
||||||
status = _kbifRestorePcieConfigRegisters_GM107(pGpu, pKernelBif,
|
status = _kbifRestorePcieConfigRegisters_GM107(pGpu, pKernelBif,
|
||||||
&pKernelBif->xveRegmapRef[0]);
|
&pKernelBif->xveRegmapRef[0]);
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
@ -259,32 +259,50 @@ kfspPollForQueueEmpty_IMPL
|
|||||||
KernelFsp *pKernelFsp
|
KernelFsp *pKernelFsp
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
NV_STATUS status = NV_OK;
|
||||||
RMTIMEOUT timeout;
|
RMTIMEOUT timeout;
|
||||||
|
|
||||||
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_OSTIMER | GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
|
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
|
||||||
|
GPU_TIMEOUT_FLAGS_OSTIMER |
|
||||||
|
GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
|
||||||
|
|
||||||
while (!kfspIsQueueEmpty(pGpu, pKernelFsp))
|
while (!kfspIsQueueEmpty(pGpu, pKernelFsp))
|
||||||
{
|
{
|
||||||
//
|
//
|
||||||
// For now we assume that any response from FSP before RM message send is complete
|
// For now we assume that any response from FSP before RM message
|
||||||
// indicates an error and we should abort.
|
// send is complete indicates an error and we should abort.
|
||||||
|
//
|
||||||
|
// Ongoing dicussion on usefullness of this check. Bug to be filed.
|
||||||
//
|
//
|
||||||
if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp))
|
if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp))
|
||||||
{
|
{
|
||||||
kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
|
kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
|
||||||
NV_PRINTF(LEVEL_ERROR, "Received error message from FSP while waiting for CMDQ to be empty.\n");
|
NV_PRINTF(LEVEL_ERROR,
|
||||||
return NV_ERR_GENERIC;
|
"Received error message from FSP while waiting for CMDQ to be empty.\n");
|
||||||
|
status = NV_ERR_GENERIC;
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT)
|
|
||||||
{
|
|
||||||
NV_PRINTF(LEVEL_ERROR, "Timed out waiting for FSP command queue to be empty.\n");
|
|
||||||
return NV_ERR_TIMEOUT;
|
|
||||||
}
|
|
||||||
osSpinLoop();
|
osSpinLoop();
|
||||||
|
|
||||||
|
status = gpuCheckTimeout(pGpu, &timeout);
|
||||||
|
if (status != NV_OK)
|
||||||
|
{
|
||||||
|
if ((status == NV_ERR_TIMEOUT) &&
|
||||||
|
kfspIsQueueEmpty(pGpu, pKernelFsp))
|
||||||
|
{
|
||||||
|
status = NV_OK;
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR,
|
||||||
|
"Timed out waiting for FSP command queue to be empty.\n");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return NV_OK;
|
return status;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
|
@ -846,6 +846,14 @@ _kgspRpcEventIsGpuDegradedCallback
|
|||||||
OBJRPC *pRpc
|
OBJRPC *pRpc
|
||||||
)
|
)
|
||||||
{
|
{
|
||||||
|
RPC_PARAMS(nvlink_is_gpu_degraded, _v17_00);
|
||||||
|
KernelNvlink *pKernelNvlink = GPU_GET_KERNEL_NVLINK(pGpu);
|
||||||
|
NV2080_CTRL_NVLINK_IS_GPU_DEGRADED_PARAMS_v17_00 *dest = &rpc_params->params;
|
||||||
|
|
||||||
|
if(dest->bIsGpuDegraded)
|
||||||
|
{
|
||||||
|
knvlinkSetDegradedMode(pGpu, pKernelNvlink, dest->linkId);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
|
@ -476,24 +476,6 @@ void GspMsgQueuesCleanup(MESSAGE_QUEUE_COLLECTION **ppMQCollection)
|
|||||||
*ppMQCollection = NULL;
|
*ppMQCollection = NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*!
|
|
||||||
* Calculate 32-bit checksum
|
|
||||||
*
|
|
||||||
* This routine assumes that the data is padded out with zeros to the next
|
|
||||||
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
|
|
||||||
*/
|
|
||||||
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
|
|
||||||
{
|
|
||||||
NvU64 *p = (NvU64 *)pData;
|
|
||||||
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
|
|
||||||
NvU64 checkSum = 0;
|
|
||||||
|
|
||||||
while (p < pEnd)
|
|
||||||
checkSum ^= *p++;
|
|
||||||
|
|
||||||
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
|
|
||||||
}
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* GspMsgQueueSendCommand
|
* GspMsgQueueSendCommand
|
||||||
*
|
*
|
||||||
@ -532,7 +514,7 @@ NV_STATUS GspMsgQueueSendCommand(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
|||||||
|
|
||||||
pCQE->seqNum = pMQI->txSeqNum;
|
pCQE->seqNum = pMQI->txSeqNum;
|
||||||
pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize);
|
pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize);
|
||||||
pCQE->checkSum = 0;
|
pCQE->checkSum = 0; // The checkSum field is included in the checksum calculation, so zero it.
|
||||||
|
|
||||||
if (gpuIsCCFeatureEnabled(pGpu))
|
if (gpuIsCCFeatureEnabled(pGpu))
|
||||||
{
|
{
|
||||||
@ -666,7 +648,8 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
|||||||
NvU32 nRetries;
|
NvU32 nRetries;
|
||||||
NvU32 nMaxRetries = 3;
|
NvU32 nMaxRetries = 3;
|
||||||
NvU32 nElements = 1; // Assume record fits in one queue element for now.
|
NvU32 nElements = 1; // Assume record fits in one queue element for now.
|
||||||
NvU32 uElementSize = 0;
|
NvU32 uElementSize;
|
||||||
|
NvU32 checkSum;
|
||||||
NvU32 seqMismatchDiff = NV_U32_MAX;
|
NvU32 seqMismatchDiff = NV_U32_MAX;
|
||||||
NV_STATUS nvStatus = NV_OK;
|
NV_STATUS nvStatus = NV_OK;
|
||||||
|
|
||||||
@ -717,15 +700,23 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
|||||||
// Retry if checksum fails.
|
// Retry if checksum fails.
|
||||||
if (gpuIsCCFeatureEnabled(pGpu))
|
if (gpuIsCCFeatureEnabled(pGpu))
|
||||||
{
|
{
|
||||||
// In Confidential Compute scenario, checksum includes complete element range.
|
//
|
||||||
if (_checkSum32(pMQI->pCmdQueueElement, (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN)) != 0)
|
// In the Confidential Compute scenario, the actual message length
|
||||||
{
|
// is inside the encrypted payload, and we can't access it before
|
||||||
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
|
// decryption, therefore the checksum encompasses the whole element
|
||||||
nvStatus = NV_ERR_INVALID_DATA;
|
// range. This makes checksum verification significantly slower
|
||||||
continue;
|
// because messages are typically much smaller than element size.
|
||||||
}
|
//
|
||||||
|
checkSum = _checkSum32(pMQI->pCmdQueueElement,
|
||||||
|
(nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN));
|
||||||
} else
|
} else
|
||||||
if (_checkSum32(pMQI->pCmdQueueElement, uElementSize) != 0)
|
{
|
||||||
|
checkSum = _checkSum32(pMQI->pCmdQueueElement,
|
||||||
|
(GSP_MSG_QUEUE_ELEMENT_HDR_SIZE +
|
||||||
|
pMQI->pCmdQueueElement->rpc.length));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (checkSum != 0)
|
||||||
{
|
{
|
||||||
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
|
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
|
||||||
nvStatus = NV_ERR_INVALID_DATA;
|
nvStatus = NV_ERR_INVALID_DATA;
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
@ -475,8 +475,11 @@ _kmemsysGetFbInfos
|
|||||||
// It will be zero unless VGA display memory is reserved
|
// It will be zero unless VGA display memory is reserved
|
||||||
if (pKernelMemorySystem->fbOverrideStartKb != 0)
|
if (pKernelMemorySystem->fbOverrideStartKb != 0)
|
||||||
{
|
{
|
||||||
|
status = NV_OK;
|
||||||
data = NvU64_LO32(pKernelMemorySystem->fbOverrideStartKb);
|
data = NvU64_LO32(pKernelMemorySystem->fbOverrideStartKb);
|
||||||
NV_ASSERT(((NvU64) data << 10ULL) == pKernelMemorySystem->fbOverrideStartKb);
|
NV_ASSERT_OR_ELSE((NvU64) data == pKernelMemorySystem->fbOverrideStartKb,
|
||||||
|
status = NV_ERR_INVALID_DATA);
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
|
@ -1,5 +1,5 @@
|
|||||||
/*
|
/*
|
||||||
* SPDX-FileCopyrightText: Copyright (c) 2012-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
* SPDX-FileCopyrightText: Copyright (c) 2012-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||||
* SPDX-License-Identifier: MIT
|
* SPDX-License-Identifier: MIT
|
||||||
*
|
*
|
||||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||||
@ -159,6 +159,7 @@ nvencsessionConstruct_IMPL
|
|||||||
(listCount(&(pGpu->nvencSessionList)) == 1))
|
(listCount(&(pGpu->nvencSessionList)) == 1))
|
||||||
{
|
{
|
||||||
// Register 1Hz timer callback for this GPU.
|
// Register 1Hz timer callback for this GPU.
|
||||||
|
pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_FALSE;
|
||||||
status = osSchedule1HzCallback(pGpu,
|
status = osSchedule1HzCallback(pGpu,
|
||||||
_gpuNvEncSessionDataProcessingCallback,
|
_gpuNvEncSessionDataProcessingCallback,
|
||||||
NULL,
|
NULL,
|
||||||
@ -379,8 +380,7 @@ _gpuNvEncSessionProcessBuffer(POBJGPU pGpu, NvencSession *pNvencSession)
|
|||||||
portMemFree(pLocalSessionInfoBuffer);
|
portMemFree(pLocalSessionInfoBuffer);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void _gpuNvEncSessionDataProcessing(OBJGPU *pGpu)
|
||||||
_gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data)
|
|
||||||
{
|
{
|
||||||
PNVENC_SESSION_LIST_ITEM pNvencSessionListItem;
|
PNVENC_SESSION_LIST_ITEM pNvencSessionListItem;
|
||||||
PNVENC_SESSION_LIST_ITEM pNvencSessionListItemNext;
|
PNVENC_SESSION_LIST_ITEM pNvencSessionListItemNext;
|
||||||
@ -416,3 +416,46 @@ _gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void _gpuNvEncSessionDataProcessingWorkItem(NvU32 gpuInstance, void *pArgs)
|
||||||
|
{
|
||||||
|
OBJGPU *pGpu;
|
||||||
|
|
||||||
|
pGpu = gpumgrGetGpu(gpuInstance);
|
||||||
|
if (pGpu == NULL)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR, "NVENC Sessions GPU instance is invalid\n");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
_gpuNvEncSessionDataProcessing(pGpu);
|
||||||
|
pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
_gpuNvEncSessionDataProcessingCallback(POBJGPU pGpu, void *data)
|
||||||
|
{
|
||||||
|
NV_STATUS status;
|
||||||
|
|
||||||
|
if (!pGpu->bNvEncSessionDataProcessingWorkItemPending)
|
||||||
|
{
|
||||||
|
status = osQueueWorkItemWithFlags(pGpu,
|
||||||
|
_gpuNvEncSessionDataProcessingWorkItem,
|
||||||
|
NULL,
|
||||||
|
OS_QUEUE_WORKITEM_FLAGS_LOCK_SEMA
|
||||||
|
| OS_QUEUE_WORKITEM_FLAGS_LOCK_GPU_GROUP_DEVICE_RW);
|
||||||
|
if (status != NV_OK)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR,
|
||||||
|
"NVENC session queuing async callback failed, status=%x\n",
|
||||||
|
status);
|
||||||
|
|
||||||
|
// Call directly to do NVENC session data processing
|
||||||
|
_gpuNvEncSessionDataProcessing(pGpu);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
pGpu->bNvEncSessionDataProcessingWorkItemPending = NV_TRUE;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -1034,6 +1034,7 @@ knvlinkCoreShutdownDeviceLinks_IMPL
|
|||||||
OBJSYS *pSys = SYS_GET_INSTANCE();
|
OBJSYS *pSys = SYS_GET_INSTANCE();
|
||||||
NvU32 count = 0;
|
NvU32 count = 0;
|
||||||
NvU32 linkId;
|
NvU32 linkId;
|
||||||
|
NvlStatus status = NV_OK;
|
||||||
|
|
||||||
// Skip link shutdown where fabric manager is present, for nvlink version bellow 4.0
|
// Skip link shutdown where fabric manager is present, for nvlink version bellow 4.0
|
||||||
if ((pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 &&
|
if ((pKernelNvlink->ipVerNvlink < NVLINK_VERSION_40 &&
|
||||||
@ -1096,8 +1097,17 @@ knvlinkCoreShutdownDeviceLinks_IMPL
|
|||||||
// Trigger laneshutdown through core lib if shutdown is supported
|
// Trigger laneshutdown through core lib if shutdown is supported
|
||||||
if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED) && (count > 0))
|
if (pKernelNvlink->getProperty(pKernelNvlink, PDB_PROP_KNVLINK_LANE_SHUTDOWN_ENABLED) && (count > 0))
|
||||||
{
|
{
|
||||||
if (nvlink_lib_powerdown_links_from_active_to_off(
|
status = nvlink_lib_powerdown_links_from_active_to_off(
|
||||||
pLinks, count, NVLINK_STATE_CHANGE_SYNC))
|
pLinks, count, NVLINK_STATE_CHANGE_SYNC);
|
||||||
|
if (status != NVL_SUCCESS)
|
||||||
|
{
|
||||||
|
if (status == NVL_NOT_FOUND)
|
||||||
|
{
|
||||||
|
// Bug 4419022
|
||||||
|
NV_PRINTF(LEVEL_ERROR, "Need to shutdown all links unilaterally for GPU%d\n",
|
||||||
|
pGpu->gpuInstance);
|
||||||
|
}
|
||||||
|
else
|
||||||
{
|
{
|
||||||
NV_PRINTF(LEVEL_ERROR, "Unable to turn off links for the GPU%d\n",
|
NV_PRINTF(LEVEL_ERROR, "Unable to turn off links for the GPU%d\n",
|
||||||
pGpu->gpuInstance);
|
pGpu->gpuInstance);
|
||||||
@ -1105,6 +1115,7 @@ knvlinkCoreShutdownDeviceLinks_IMPL
|
|||||||
return NV_ERR_INVALID_STATE;
|
return NV_ERR_INVALID_STATE;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -51,6 +51,14 @@
|
|||||||
// Regardless of whether Requester is configured to support these,
|
// Regardless of whether Requester is configured to support these,
|
||||||
// we only expect Responder to provide these capabilities.
|
// we only expect Responder to provide these capabilities.
|
||||||
//
|
//
|
||||||
|
|
||||||
|
//
|
||||||
|
// TODO: SPDM_CAPABILITIES_FLAGS_GH100 and g_SpdmAlgoCheckTable_GH100 is expected capabilities flags
|
||||||
|
// and attributions what GH100 receive from responder. Currently, we have only 1 responder
|
||||||
|
// and return fixed capabilities flags and attributions.
|
||||||
|
// If we want to support different return capabilitis and attributions afterwards, we need
|
||||||
|
// to refactor spdmCheckConnection_GH100().
|
||||||
|
//
|
||||||
#define SPDM_CAPABILITIES_FLAGS_GH100 \
|
#define SPDM_CAPABILITIES_FLAGS_GH100 \
|
||||||
SPDM_GET_CAPABILITIES_RESPONSE_FLAGS_CERT_CAP | \
|
SPDM_GET_CAPABILITIES_RESPONSE_FLAGS_CERT_CAP | \
|
||||||
SPDM_GET_CAPABILITIES_RESPONSE_FLAGS_MEAS_CAP_SIG | \
|
SPDM_GET_CAPABILITIES_RESPONSE_FLAGS_MEAS_CAP_SIG | \
|
||||||
@ -64,21 +72,6 @@
|
|||||||
SPDM_GET_CAPABILITIES_RESPONSE_FLAGS_HBEAT_CAP;
|
SPDM_GET_CAPABILITIES_RESPONSE_FLAGS_HBEAT_CAP;
|
||||||
|
|
||||||
/* ------------------------ Static Variables ------------------------------- */
|
/* ------------------------ Static Variables ------------------------------- */
|
||||||
//
|
|
||||||
// For transport functionality, we require access to the GPU and Spdm objects,
|
|
||||||
// as well as additional state (temporary response buffer).
|
|
||||||
//
|
|
||||||
// However, libspdm transport layer is implemented via callbacks which currently
|
|
||||||
// do not support passing any custom parameters, meaning we must use static variables
|
|
||||||
// to access these objects. If we ever require multiple instances of the Spdm object,
|
|
||||||
// this will be an issue.
|
|
||||||
//
|
|
||||||
static OBJGPU *g_pGpu = NULL;
|
|
||||||
static Spdm *g_pSpdm = NULL;
|
|
||||||
static NvU8 *g_pTransportBuffer = NULL;
|
|
||||||
static NvU32 g_transportBufferSize = 0;
|
|
||||||
static NvU32 g_pendingResponseSize = 0;
|
|
||||||
|
|
||||||
static SPDM_ALGO_CHECK_ENTRY g_SpdmAlgoCheckTable_GH100[] =
|
static SPDM_ALGO_CHECK_ENTRY g_SpdmAlgoCheckTable_GH100[] =
|
||||||
{
|
{
|
||||||
{ LIBSPDM_DATA_MEASUREMENT_SPEC, SPDM_MEASUREMENT_SPECIFICATION_DMTF },
|
{ LIBSPDM_DATA_MEASUREMENT_SPEC, SPDM_MEASUREMENT_SPECIFICATION_DMTF },
|
||||||
@ -127,7 +120,6 @@ static libspdm_return_t _spdmSendMessageGsp(void *spdm_context, size_t message_s
|
|||||||
static libspdm_return_t _spdmReceiveMessageGsp(void *spdm_context, size_t *message_size,
|
static libspdm_return_t _spdmReceiveMessageGsp(void *spdm_context, size_t *message_size,
|
||||||
void **message, uint64_t timeout);
|
void **message, uint64_t timeout);
|
||||||
|
|
||||||
|
|
||||||
/* ------------------------ Static Functions ------------------------------- */
|
/* ------------------------ Static Functions ------------------------------- */
|
||||||
//
|
//
|
||||||
// Hardcoding check for libspdm secured message callbacks version.
|
// Hardcoding check for libspdm secured message callbacks version.
|
||||||
@ -311,6 +303,8 @@ _spdmEncodeMessageGsp
|
|||||||
void *pSecuredMessageContext = NULL;
|
void *pSecuredMessageContext = NULL;
|
||||||
NV_SPDM_DESC_HEADER *pNvSpdmDescHdr = NULL;
|
NV_SPDM_DESC_HEADER *pNvSpdmDescHdr = NULL;
|
||||||
NvU32 payloadSize = 0;
|
NvU32 payloadSize = 0;
|
||||||
|
Spdm *pSpdm = NULL;
|
||||||
|
size_t dataSize = sizeof(void *);
|
||||||
|
|
||||||
// Check libspdm parameters.
|
// Check libspdm parameters.
|
||||||
if (spdm_context == NULL || message == NULL || message_size == 0 ||
|
if (spdm_context == NULL || message == NULL || message_size == 0 ||
|
||||||
@ -332,6 +326,21 @@ _spdmEncodeMessageGsp
|
|||||||
return LIBSPDM_STATUS_INVALID_MSG_FIELD;
|
return LIBSPDM_STATUS_INVALID_MSG_FIELD;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status = libspdm_get_data(spdm_context, LIBSPDM_DATA_APP_CONTEXT_DATA,
|
||||||
|
NULL, (void *)&pSpdm, &dataSize);
|
||||||
|
|
||||||
|
if (status != LIBSPDM_STATUS_SUCCESS)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR, ", spdmStatus != LIBSPDM_STATUS_SUCCESS \n ");
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pSpdm == NULL)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR, " pSpdm == NULL, SPDM context probably corrupted !! \n ");
|
||||||
|
return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
|
||||||
|
}
|
||||||
|
|
||||||
// Initialize descriptor header.
|
// Initialize descriptor header.
|
||||||
pNvSpdmDescHdr = (NV_SPDM_DESC_HEADER *)*transport_message;
|
pNvSpdmDescHdr = (NV_SPDM_DESC_HEADER *)*transport_message;
|
||||||
portMemSet(pNvSpdmDescHdr, 0, sizeof(NV_SPDM_DESC_HEADER));
|
portMemSet(pNvSpdmDescHdr, 0, sizeof(NV_SPDM_DESC_HEADER));
|
||||||
@ -401,7 +410,7 @@ _spdmEncodeMessageGsp
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Check final encrypted message size.
|
// Check final encrypted message size.
|
||||||
if (*transport_message_size > g_pSpdm->payloadBufferSize)
|
if (*transport_message_size > pSpdm->payloadBufferSize)
|
||||||
{
|
{
|
||||||
return LIBSPDM_STATUS_BUFFER_TOO_SMALL;
|
return LIBSPDM_STATUS_BUFFER_TOO_SMALL;
|
||||||
}
|
}
|
||||||
@ -432,6 +441,8 @@ _spdmDecodeMessageGsp
|
|||||||
void *pSecuredMessageContext = NULL;
|
void *pSecuredMessageContext = NULL;
|
||||||
libspdm_return_t status = LIBSPDM_STATUS_SUCCESS;
|
libspdm_return_t status = LIBSPDM_STATUS_SUCCESS;
|
||||||
spdm_secured_message_a_data_header1_t *pSpdmSecuredMsgHdr = NULL;
|
spdm_secured_message_a_data_header1_t *pSpdmSecuredMsgHdr = NULL;
|
||||||
|
Spdm *pSpdm = NULL;
|
||||||
|
size_t dataSize = sizeof(void *);
|
||||||
|
|
||||||
// Check libspdm parameters.
|
// Check libspdm parameters.
|
||||||
if (spdm_context == NULL || session_id == NULL || is_app_message == NULL ||
|
if (spdm_context == NULL || session_id == NULL || is_app_message == NULL ||
|
||||||
@ -447,10 +458,25 @@ _spdmDecodeMessageGsp
|
|||||||
return LIBSPDM_STATUS_INVALID_PARAMETER;
|
return LIBSPDM_STATUS_INVALID_PARAMETER;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
status = libspdm_get_data(spdm_context, LIBSPDM_DATA_APP_CONTEXT_DATA,
|
||||||
|
NULL, (void *)&pSpdm, &dataSize);
|
||||||
|
|
||||||
|
if (status != LIBSPDM_STATUS_SUCCESS)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR, " spdmStatus != LIBSPDM_STATUS_SUCCESS \n ");
|
||||||
|
return status;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pSpdm == NULL)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR, " pSpdm == NULL, SPDM context probably corrupted !! \n ");
|
||||||
|
return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
|
||||||
|
}
|
||||||
|
|
||||||
// Retrieve NV-header from message, and perform basic validation.
|
// Retrieve NV-header from message, and perform basic validation.
|
||||||
pNvSpdmDescHdr = (NV_SPDM_DESC_HEADER *)transport_message;
|
pNvSpdmDescHdr = (NV_SPDM_DESC_HEADER *)transport_message;
|
||||||
if (transport_message_size < sizeof(NV_SPDM_DESC_HEADER) ||
|
if (transport_message_size < sizeof(NV_SPDM_DESC_HEADER) ||
|
||||||
transport_message_size > g_pSpdm->payloadBufferSize)
|
transport_message_size > pSpdm->payloadBufferSize)
|
||||||
{
|
{
|
||||||
return LIBSPDM_STATUS_INVALID_MSG_FIELD;
|
return LIBSPDM_STATUS_INVALID_MSG_FIELD;
|
||||||
}
|
}
|
||||||
@ -568,9 +594,9 @@ _spdmSendMessageGsp
|
|||||||
{
|
{
|
||||||
NV_STATUS nvStatus = NV_OK;
|
NV_STATUS nvStatus = NV_OK;
|
||||||
libspdm_return_t spdmStatus = LIBSPDM_STATUS_SUCCESS;
|
libspdm_return_t spdmStatus = LIBSPDM_STATUS_SUCCESS;
|
||||||
|
Spdm *pSpdm = NULL;
|
||||||
// Ensure size is cleared to indicate no response pending in buffer yet
|
OBJGPU *pGpu = NULL;
|
||||||
g_pendingResponseSize = 0;
|
size_t dataSize = sizeof(void *);
|
||||||
|
|
||||||
// Check libspdm parameters.
|
// Check libspdm parameters.
|
||||||
if (message_size == 0 || message == NULL)
|
if (message_size == 0 || message == NULL)
|
||||||
@ -578,23 +604,44 @@ _spdmSendMessageGsp
|
|||||||
return LIBSPDM_STATUS_INVALID_PARAMETER;
|
return LIBSPDM_STATUS_INVALID_PARAMETER;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_pGpu == NULL || g_pSpdm == NULL)
|
spdmStatus = libspdm_get_data(spdm_context, LIBSPDM_DATA_APP_CONTEXT_DATA,
|
||||||
|
NULL, (void *)&pSpdm, &dataSize);
|
||||||
|
|
||||||
|
if (spdmStatus != LIBSPDM_STATUS_SUCCESS)
|
||||||
{
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR," spdmStatus != LIBSPDM_STATUS_SUCCESS \n ");
|
||||||
|
return spdmStatus;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (pSpdm == NULL)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR, " pSpdm == NULL, SPDM context probably corrupted !! \n ");
|
||||||
return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
|
return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_transportBufferSize < message_size)
|
pGpu = ENG_GET_GPU(pSpdm);
|
||||||
|
|
||||||
|
if (pGpu == NULL)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR, " pGpu == NULL, SPDM context probably corrupted !! \n ");
|
||||||
|
return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ensure size is cleared to indicate no response pending in buffer yet
|
||||||
|
pSpdm->pendingResponseSize = 0;
|
||||||
|
|
||||||
|
if (pSpdm->transportBufferSize < message_size)
|
||||||
{
|
{
|
||||||
return LIBSPDM_STATUS_BUFFER_TOO_SMALL;
|
return LIBSPDM_STATUS_BUFFER_TOO_SMALL;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill transport buffer with message and send
|
// Fill transport buffer with message and send
|
||||||
g_pendingResponseSize = g_transportBufferSize;
|
pSpdm->pendingResponseSize = pSpdm->transportBufferSize;
|
||||||
portMemCopy(g_pTransportBuffer, g_transportBufferSize, message, message_size);
|
portMemCopy(pSpdm->pTransportBuffer, pSpdm->transportBufferSize, message, message_size);
|
||||||
|
|
||||||
nvStatus = spdmMessageProcess_HAL(g_pGpu, g_pSpdm,
|
nvStatus = spdmMessageProcess_HAL(pGpu, pSpdm,
|
||||||
g_pTransportBuffer, message_size,
|
pSpdm->pTransportBuffer, message_size,
|
||||||
g_pTransportBuffer, &g_pendingResponseSize);
|
pSpdm->pTransportBuffer, &pSpdm->pendingResponseSize);
|
||||||
if (nvStatus != NV_OK)
|
if (nvStatus != NV_OK)
|
||||||
{
|
{
|
||||||
spdmStatus = LIBSPDM_STATUS_SEND_FAIL;
|
spdmStatus = LIBSPDM_STATUS_SEND_FAIL;
|
||||||
@ -603,7 +650,7 @@ _spdmSendMessageGsp
|
|||||||
if (spdmStatus != LIBSPDM_STATUS_SUCCESS)
|
if (spdmStatus != LIBSPDM_STATUS_SUCCESS)
|
||||||
{
|
{
|
||||||
// If message failed, size is cleared to indicate no response pending
|
// If message failed, size is cleared to indicate no response pending
|
||||||
g_pendingResponseSize = 0;
|
pSpdm->pendingResponseSize = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
return spdmStatus;
|
return spdmStatus;
|
||||||
@ -624,6 +671,8 @@ _spdmReceiveMessageGsp
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
libspdm_return_t spdmStatus = LIBSPDM_STATUS_SUCCESS;
|
libspdm_return_t spdmStatus = LIBSPDM_STATUS_SUCCESS;
|
||||||
|
Spdm *pSpdm = NULL;
|
||||||
|
size_t dataSize = sizeof(void *);
|
||||||
|
|
||||||
// Check libspdm parameters.
|
// Check libspdm parameters.
|
||||||
if (message_size == NULL || message == NULL || *message == NULL)
|
if (message_size == NULL || message == NULL || *message == NULL)
|
||||||
@ -631,25 +680,36 @@ _spdmReceiveMessageGsp
|
|||||||
return LIBSPDM_STATUS_INVALID_PARAMETER;
|
return LIBSPDM_STATUS_INVALID_PARAMETER;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g_pGpu == NULL || g_pSpdm == NULL)
|
spdmStatus = libspdm_get_data(spdm_context, LIBSPDM_DATA_APP_CONTEXT_DATA,
|
||||||
|
NULL, (void *)&pSpdm, &dataSize);
|
||||||
|
|
||||||
|
if (spdmStatus != LIBSPDM_STATUS_SUCCESS)
|
||||||
{
|
{
|
||||||
return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
|
NV_PRINTF(LEVEL_ERROR, " spdmStatus != LIBSPDM_STATUS_SUCCESS \n ");
|
||||||
|
return spdmStatus;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (pSpdm == NULL)
|
||||||
|
{
|
||||||
|
NV_PRINTF(LEVEL_ERROR, " pSpdm == NULL, SPDM context probably corrupted !! \n ");
|
||||||
|
return LIBSPDM_STATUS_INVALID_STATE_LOCAL;
|
||||||
|
}
|
||||||
// Basic validation to ensure we have a real response.
|
// Basic validation to ensure we have a real response.
|
||||||
if (g_pendingResponseSize == 0 || g_pendingResponseSize > *message_size)
|
if (pSpdm->pendingResponseSize == 0 ||
|
||||||
|
pSpdm->pendingResponseSize > *message_size)
|
||||||
{
|
{
|
||||||
spdmStatus = LIBSPDM_STATUS_RECEIVE_FAIL;
|
spdmStatus = LIBSPDM_STATUS_RECEIVE_FAIL;
|
||||||
goto ErrorExit;
|
goto ErrorExit;
|
||||||
}
|
}
|
||||||
|
|
||||||
portMemCopy(*message, *message_size, g_pTransportBuffer, g_pendingResponseSize);
|
portMemCopy(*message, *message_size,
|
||||||
*message_size = g_pendingResponseSize;
|
pSpdm->pTransportBuffer, pSpdm->pendingResponseSize);
|
||||||
|
*message_size = pSpdm->pendingResponseSize;
|
||||||
|
|
||||||
ErrorExit:
|
ErrorExit:
|
||||||
|
|
||||||
// Ensure size is cleared to indicate no response pending in buffer
|
// Ensure size is cleared to indicate no response pending in buffer
|
||||||
g_pendingResponseSize = 0;
|
pSpdm->pendingResponseSize = 0;
|
||||||
|
|
||||||
return spdmStatus;
|
return spdmStatus;
|
||||||
}
|
}
|
||||||
@ -673,18 +733,14 @@ spdmDeviceInit_GH100
|
|||||||
return NV_ERR_INVALID_ARGUMENT;
|
return NV_ERR_INVALID_ARGUMENT;
|
||||||
}
|
}
|
||||||
|
|
||||||
g_pGpu = pGpu;
|
pSpdm->pendingResponseSize = 0;
|
||||||
g_pSpdm = pSpdm;
|
pSpdm->pTransportBuffer = portMemAllocNonPaged(pSpdm->payloadBufferSize);
|
||||||
g_pendingResponseSize = 0;
|
if (pSpdm->pTransportBuffer == NULL)
|
||||||
g_pTransportBuffer = portMemAllocNonPaged(pSpdm->payloadBufferSize);
|
|
||||||
|
|
||||||
if (g_pTransportBuffer == NULL)
|
|
||||||
{
|
{
|
||||||
g_transportBufferSize = 0;
|
pSpdm->transportBufferSize = 0;
|
||||||
return NV_ERR_NO_MEMORY;
|
return NV_ERR_NO_MEMORY;
|
||||||
}
|
}
|
||||||
|
pSpdm->transportBufferSize = pSpdm->payloadBufferSize;
|
||||||
g_transportBufferSize = pSpdm->payloadBufferSize;
|
|
||||||
|
|
||||||
// Register transport layer functionality with library.
|
// Register transport layer functionality with library.
|
||||||
libspdm_register_transport_layer_func(pSpdm->pLibspdmContext,
|
libspdm_register_transport_layer_func(pSpdm->pLibspdmContext,
|
||||||
@ -703,7 +759,6 @@ spdmDeviceInit_GH100
|
|||||||
return NV_OK;
|
return NV_OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
/*!
|
/*!
|
||||||
* To deinitialize the GSP SPDM Responder, we need to release the surface for
|
* To deinitialize the GSP SPDM Responder, we need to release the surface for
|
||||||
* SPDM communication. GSP-RM will handle the rest.
|
* SPDM communication. GSP-RM will handle the rest.
|
||||||
@ -717,10 +772,10 @@ spdmDeviceDeinit_GH100
|
|||||||
)
|
)
|
||||||
{
|
{
|
||||||
// Just-in-case, portMemFree handles NULL.
|
// Just-in-case, portMemFree handles NULL.
|
||||||
portMemFree(g_pTransportBuffer);
|
portMemFree(pSpdm->pTransportBuffer);
|
||||||
g_pTransportBuffer = NULL;
|
pSpdm->pTransportBuffer = NULL;
|
||||||
g_transportBufferSize = 0;
|
pSpdm->transportBufferSize = 0;
|
||||||
g_pendingResponseSize = 0;
|
pSpdm->pendingResponseSize = 0;
|
||||||
|
|
||||||
return NV_OK;
|
return NV_OK;
|
||||||
}
|
}
|
||||||
|
@ -432,6 +432,11 @@ spdmContextInit_IMPL
|
|||||||
|
|
||||||
libspdm_init_msg_log(pSpdm->pLibspdmContext, pSpdm->pMsgLog, pSpdm->msgLogMaxSize);
|
libspdm_init_msg_log(pSpdm->pLibspdmContext, pSpdm->pMsgLog, pSpdm->msgLogMaxSize);
|
||||||
|
|
||||||
|
|
||||||
|
// Store SPDM object pointer to libspdm context
|
||||||
|
CHECK_SPDM_STATUS(libspdm_set_data(pSpdm->pLibspdmContext, LIBSPDM_DATA_APP_CONTEXT_DATA,
|
||||||
|
NULL, (void *)&pSpdm, sizeof(void *)));
|
||||||
|
|
||||||
//
|
//
|
||||||
// Perform any device-specific initialization. spdmDeviceInit is also
|
// Perform any device-specific initialization. spdmDeviceInit is also
|
||||||
// responsible for registering transport layer functions with libspdm.
|
// responsible for registering transport layer functions with libspdm.
|
||||||
|
@ -606,7 +606,8 @@ _memoryexportVerifyMem
|
|||||||
if (pGpu == NULL)
|
if (pGpu == NULL)
|
||||||
return NV_OK;
|
return NV_OK;
|
||||||
|
|
||||||
if (pKernelMIGGpuInstance != NULL)
|
// MIG is about vidmem partitioning, so limit the check.
|
||||||
|
if ((pKernelMIGGpuInstance != NULL) && (addrSpace == ADDR_FBMEM))
|
||||||
{
|
{
|
||||||
if ((pKernelMIGGpuInstance->pMemoryPartitionHeap != pSrcMemory->pHeap))
|
if ((pKernelMIGGpuInstance->pMemoryPartitionHeap != pSrcMemory->pHeap))
|
||||||
return NV_ERR_INVALID_OBJECT_PARENT;
|
return NV_ERR_INVALID_OBJECT_PARENT;
|
||||||
|
@ -1396,15 +1396,9 @@ NvU32 kvgpumgrGetPgpuSubdevIdEncoding(OBJGPU *pGpu, NvU8 *pgpuString,
|
|||||||
return NV_U32_MAX;
|
return NV_U32_MAX;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (chipID)
|
|
||||||
{
|
|
||||||
default:
|
|
||||||
// The encoding of the subdevice ID is its value converted to string
|
// The encoding of the subdevice ID is its value converted to string
|
||||||
bytes = NvU32ToAsciiStr(subID, SUBDEVID_ENCODED_VALUE_SIZE,
|
bytes = NvU32ToAsciiStr(subID, SUBDEVID_ENCODED_VALUE_SIZE,
|
||||||
pgpuString, NV_FALSE);
|
pgpuString, NV_FALSE);
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
return bytes;
|
return bytes;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
NVIDIA_VERSION = 550.54.15
|
NVIDIA_VERSION = 550.67
|
||||||
|
|
||||||
# This file.
|
# This file.
|
||||||
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))
|
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))
|
||||||
|
Loading…
x
Reference in New Issue
Block a user