535.171.04

This commit is contained in:
Bernhard Stoeckner 2024-03-21 14:22:31 +01:00
parent 044f70bbb8
commit c042c7903d
No known key found for this signature in database
GPG Key ID: 7D23DC2750FAC2E1
36 changed files with 691 additions and 265 deletions

View File

@ -2,6 +2,8 @@
## Release 535 Entries ## Release 535 Entries
### [535.171.04] 2024-03-21
### [535.161.08] 2024-03-18 ### [535.161.08] 2024-03-18
### [535.161.07] 2024-02-22 ### [535.161.07] 2024-02-22

View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source # NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules, This is the source release of the NVIDIA Linux open GPU kernel modules,
version 535.161.08. version 535.171.04.
## How to Build ## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding firmware and user-space NVIDIA GPU driver components from a corresponding
535.161.08 driver release. This can be achieved by installing 535.171.04 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules` the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g., option. E.g.,
@ -180,7 +180,7 @@ software applications.
## Compatible GPUs ## Compatible GPUs
The open-gpu-kernel-modules can be used on any Turing or later GPU The open-gpu-kernel-modules can be used on any Turing or later GPU
(see the table below). However, in the 535.161.08 release, (see the table below). However, in the 535.171.04 release,
GeForce and Workstation support is still considered alpha-quality. GeForce and Workstation support is still considered alpha-quality.
To enable use of the open kernel modules on GeForce and Workstation GPUs, To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
parameter to 1. For more details, see the NVIDIA GPU driver end user parameter to 1. For more details, see the NVIDIA GPU driver end user
README here: README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.161.08/README/kernel_open.html https://us.download.nvidia.com/XFree86/Linux-x86_64/535.171.04/README/kernel_open.html
In the below table, if three IDs are listed, the first is the PCI Device In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -892,6 +892,8 @@ Subsystem Device ID.
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 | | NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 | | NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 | | NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
| NVIDIA RTX 1000 Ada Generation Laptop GPU | 28B9 |
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BB |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 | | NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 | | NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 | | NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |

View File

@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src) EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.161.08\" EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.171.04\"
ifneq ($(SYSSRCHOST1X),) ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X) EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@ -152,6 +152,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags) NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
NV_CONFTEST_COMPILE_TEST_HEADERS += $(obj)/conftest/functions.h NV_CONFTEST_COMPILE_TEST_HEADERS += $(obj)/conftest/functions.h

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
@ -1982,31 +1982,6 @@ static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
return nvl->numa_info.use_auto_online; return nvl->numa_info.use_auto_online;
} }
typedef struct {
NvU64 base;
NvU64 size;
NvU32 nodeId;
int ret;
} remove_numa_memory_info_t;
static void offline_numa_memory_callback
(
void *args
)
{
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
pNumaInfo->base,
pNumaInfo->size);
#else
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
pNumaInfo->size);
#endif
#endif
}
typedef enum typedef enum
{ {
NV_NUMA_STATUS_DISABLED = 0, NV_NUMA_STATUS_DISABLED = 0,

View File

@ -3032,6 +3032,22 @@ compile_test() {
;; ;;
foll_longterm_present)
#
# Determine if FOLL_LONGTERM enum is present or not
#
# Added by commit 932f4a630a69 ("mm/gup: replace
# get_user_pages_longterm() with FOLL_LONGTERM") in
# v5.2
#
CODE="
#include <linux/mm.h>
int foll_longterm = FOLL_LONGTERM;
"
compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types"
;;
vfio_pin_pages_has_vfio_device_arg) vfio_pin_pages_has_vfio_device_arg)
# #
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *" # Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
@ -5081,11 +5097,15 @@ compile_test() {
# vmap ops and convert GEM backends") update # vmap ops and convert GEM backends") update
# drm_gem_object_funcs::vmap to take 'map' argument. # drm_gem_object_funcs::vmap to take 'map' argument.
# #
# Note that the 'map' argument type is changed from 'struct dma_buf_map'
# to 'struct iosys_map' by commit 7938f4218168 ("dma-buf-map: Rename
# to iosys-map) in v5.18.
#
CODE=" CODE="
#include <drm/drm_gem.h> #include <drm/drm_gem.h>
int conftest_drm_gem_object_vmap_has_map_arg( int conftest_drm_gem_object_vmap_has_map_arg(
struct drm_gem_object *obj, struct dma_buf_map *map) { struct drm_gem_object *obj) {
return obj->funcs->vmap(obj, map); return obj->funcs->vmap(obj, NULL);
}" }"
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types" compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"

View File

@ -54,7 +54,11 @@
#include "nv-time.h" #include "nv-time.h"
#include "nv-lock.h" #include "nv-lock.h"
#if !defined(CONFIG_RETPOLINE) /*
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
*/
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
#include "nv-retpoline.h" #include "nv-retpoline.h"
#endif #endif

View File

@ -34,16 +34,6 @@
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED()) #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
// ATS prefetcher uses hmm_range_fault() to query residency information.
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
// of memory regions while hmm_range_fault() is being called, MMU interval
// notifiers are needed.
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
#define UVM_ATS_PREFETCH_SUPPORTED() 1
#else
#define UVM_ATS_PREFETCH_SUPPORTED() 0
#endif
typedef struct typedef struct
{ {
// Mask of gpu_va_spaces which are registered for ATS access. The mask is // Mask of gpu_va_spaces which are registered for ATS access. The mask is

View File

@ -30,23 +30,36 @@
#include <linux/mempolicy.h> #include <linux/mempolicy.h>
#include <linux/mmu_notifier.h> #include <linux/mmu_notifier.h>
#if UVM_ATS_PREFETCH_SUPPORTED() #if UVM_HMM_RANGE_FAULT_SUPPORTED()
#include <linux/hmm.h> #include <linux/hmm.h>
#endif #endif
static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space, typedef enum
struct vm_area_struct *vma, {
NvU64 start, UVM_ATS_SERVICE_TYPE_FAULTS = 0,
size_t length, UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS,
uvm_fault_access_type_t access_type, UVM_ATS_SERVICE_TYPE_COUNT
uvm_ats_fault_context_t *ats_context) } uvm_ats_service_type_t;
static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 start,
size_t length,
uvm_fault_access_type_t access_type,
uvm_ats_service_type_t service_type,
uvm_ats_fault_context_t *ats_context)
{ {
uvm_va_space_t *va_space = gpu_va_space->va_space; uvm_va_space_t *va_space = gpu_va_space->va_space;
struct mm_struct *mm = va_space->va_space_mm.mm; struct mm_struct *mm = va_space->va_space_mm.mm;
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
NV_STATUS status; NV_STATUS status;
NvU64 user_space_start; NvU64 user_space_start;
NvU64 user_space_length; NvU64 user_space_length;
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
uvm_populate_permissions_t populate_permissions = fault_service_type ?
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
UVM_POPULATE_PERMISSIONS_INHERIT;
// Request uvm_migrate_pageable() to touch the corresponding page after // Request uvm_migrate_pageable() to touch the corresponding page after
// population. // population.
@ -83,10 +96,10 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
.dst_node_id = ats_context->residency_node, .dst_node_id = ats_context->residency_node,
.start = start, .start = start,
.length = length, .length = length,
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY, .populate_permissions = populate_permissions,
.touch = true, .touch = fault_service_type,
.skip_mapped = true, .skip_mapped = fault_service_type,
.populate_on_cpu_alloc_failures = true, .populate_on_cpu_alloc_failures = fault_service_type,
.user_space_start = &user_space_start, .user_space_start = &user_space_start,
.user_space_length = &user_space_length, .user_space_length = &user_space_length,
}; };
@ -233,7 +246,7 @@ static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma,
return uvm_ats_region_from_start_end(start, end); return uvm_ats_region_from_start_end(start, end);
} }
#if UVM_ATS_PREFETCH_SUPPORTED() #if UVM_HMM_RANGE_FAULT_SUPPORTED()
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq) static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
{ {
@ -271,12 +284,12 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_context_t *ats_context) uvm_ats_fault_context_t *ats_context)
{ {
NV_STATUS status = NV_OK; NV_STATUS status = NV_OK;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
#if UVM_ATS_PREFETCH_SUPPORTED() #if UVM_HMM_RANGE_FAULT_SUPPORTED()
int ret; int ret;
NvU64 start; NvU64 start;
NvU64 end; NvU64 end;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
struct hmm_range range; struct hmm_range range;
uvm_page_index_t page_index; uvm_page_index_t page_index;
uvm_va_block_region_t vma_region; uvm_va_block_region_t vma_region;
@ -357,78 +370,83 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
mmu_interval_notifier_remove(range.notifier); mmu_interval_notifier_remove(range.notifier);
#else
uvm_page_mask_zero(residency_mask);
#endif #endif
return status; return status;
} }
static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space, static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma, struct vm_area_struct *vma,
uvm_ats_fault_context_t *ats_context, uvm_ats_fault_context_t *ats_context,
uvm_va_block_region_t max_prefetch_region, uvm_va_block_region_t max_prefetch_region)
uvm_page_mask_t *faulted_mask)
{ {
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask; uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask; uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask; uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree; uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
if (uvm_page_mask_empty(faulted_mask)) if (uvm_page_mask_empty(accessed_mask))
return; return;
uvm_perf_prefetch_compute_ats(gpu_va_space->va_space, uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
faulted_mask, accessed_mask,
uvm_va_block_region_from_mask(NULL, faulted_mask), uvm_va_block_region_from_mask(NULL, accessed_mask),
max_prefetch_region, max_prefetch_region,
residency_mask, residency_mask,
bitmap_tree, bitmap_tree,
prefetch_mask); prefetch_mask);
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
if (vma->vm_flags & VM_WRITE)
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
} }
static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space, static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma, struct vm_area_struct *vma,
NvU64 base, NvU64 base,
uvm_ats_fault_context_t *ats_context) uvm_ats_service_type_t service_type,
uvm_ats_fault_context_t *ats_context)
{ {
NV_STATUS status = NV_OK; NV_STATUS status;
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask; uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask; uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base); uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
// Residency mask needs to be computed even if prefetching is disabled since
// the residency information is also needed by access counters servicing in
// uvm_ats_service_access_counters()
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
return status;
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space)) if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
return status; return status;
if (uvm_page_mask_empty(faulted_mask)) if (uvm_page_mask_empty(accessed_mask))
return status;
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
return status; return status;
// Prefetch the entire region if none of the pages are resident on any node // Prefetch the entire region if none of the pages are resident on any node
// and if preferred_location is the faulting GPU. // and if preferred_location is the faulting GPU.
if (ats_context->prefetch_state.has_preferred_location && if (ats_context->prefetch_state.has_preferred_location &&
ats_context->prefetch_state.first_touch && (ats_context->prefetch_state.first_touch || (service_type == UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS)) &&
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) { uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL); uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
}
else {
ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
}
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask); uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
if (vma->vm_flags & VM_WRITE) if (vma->vm_flags & VM_WRITE)
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask); uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
return status;
} }
else {
ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask); uvm_page_mask_or(accessed_mask, accessed_mask, prefetch_mask);
}
return status; return status;
} }
@ -446,6 +464,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask; uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask; uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
uvm_fault_client_type_t client_type = ats_context->client_type; uvm_fault_client_type_t client_type = ats_context->client_type;
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;
UVM_ASSERT(vma); UVM_ASSERT(vma);
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE)); UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
@ -454,6 +473,9 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(gpu_va_space->ats.enabled); UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE); UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
uvm_assert_mmap_lock_locked(vma->vm_mm);
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
uvm_page_mask_zero(faults_serviced_mask); uvm_page_mask_zero(faults_serviced_mask);
uvm_page_mask_zero(reads_serviced_mask); uvm_page_mask_zero(reads_serviced_mask);
@ -479,7 +501,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
ats_batch_select_residency(gpu_va_space, vma, ats_context); ats_batch_select_residency(gpu_va_space, vma, ats_context);
ats_fault_prefetch(gpu_va_space, vma, base, ats_context); ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) { for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
NvU64 start = base + (subregion.first * PAGE_SIZE); NvU64 start = base + (subregion.first * PAGE_SIZE);
@ -491,7 +513,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(start >= vma->vm_start); UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end); UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context); status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK) if (status != NV_OK)
return status; return status;
@ -526,7 +548,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
UVM_ASSERT(start >= vma->vm_start); UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end); UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context); status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK) if (status != NV_OK)
return status; return status;
@ -598,3 +620,53 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
return status; return status;
} }
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context)
{
uvm_va_block_region_t subregion;
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS;
UVM_ASSERT(vma);
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
UVM_ASSERT(g_uvm_global.ats.enabled);
UVM_ASSERT(gpu_va_space);
UVM_ASSERT(gpu_va_space->ats.enabled);
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
uvm_assert_mmap_lock_locked(vma->vm_mm);
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
ats_batch_select_residency(gpu_va_space, vma, ats_context);
// Ignoring the return value of ats_compute_prefetch is ok since prefetching
// is just an optimization and servicing access counter migrations is still
// worthwhile even without any prefetching added. So, let servicing continue
// instead of returning early even if the prefetch computation fails.
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
// Remove pages which are already resident at the intended destination from
// the accessed_mask.
uvm_page_mask_andnot(&ats_context->accessed_mask,
&ats_context->accessed_mask,
&ats_context->prefetch_state.residency_mask);
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
NV_STATUS status;
NvU64 start = base + (subregion.first * PAGE_SIZE);
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
UVM_ASSERT(start >= vma->vm_start);
UVM_ASSERT((start + length) <= vma->vm_end);
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
if (status != NV_OK)
return status;
}
return NV_OK;
}

View File

@ -42,11 +42,31 @@
// corresponding bit in read_fault_mask. These returned masks are only valid if // corresponding bit in read_fault_mask. These returned masks are only valid if
// the return status is NV_OK. Status other than NV_OK indicate system global // the return status is NV_OK. Status other than NV_OK indicate system global
// fault servicing failures. // fault servicing failures.
//
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
// lock.
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space, NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma, struct vm_area_struct *vma,
NvU64 base, NvU64 base,
uvm_ats_fault_context_t *ats_context); uvm_ats_fault_context_t *ats_context);
// Service access counter notifications on ATS regions in the range (base, base
// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
// The caller is responsible for ensuring that the addresses in the
// accessed_mask is completely covered by the VMA. The caller is also
// responsible for handling any errors returned by this function.
//
// Returns NV_OK if servicing was successful. Any other error indicates an error
// while servicing the range.
//
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
// lock.
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma,
NvU64 base,
uvm_ats_fault_context_t *ats_context);
// Return whether there are any VA ranges (and thus GMMU mappings) within the // Return whether there are any VA ranges (and thus GMMU mappings) within the
// UVM_GMMU_ATS_GRANULARITY-aligned region containing address. // UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next); bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);

View File

@ -181,23 +181,28 @@ struct uvm_service_block_context_struct
typedef struct typedef struct
{ {
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
// VMA. Used for batching ATS faults in a vma. // VMA. Used for batching ATS faults in a vma. This is unused for access
// counter service requests.
uvm_page_mask_t read_fault_mask; uvm_page_mask_t read_fault_mask;
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a // Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
// SAM VMA. Used for batching ATS faults in a vma. // SAM VMA. Used for batching ATS faults in a vma. This is unused for access
// counter service requests.
uvm_page_mask_t write_fault_mask; uvm_page_mask_t write_fault_mask;
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region // Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used to return ATS fault status. // of a SAM VMA. Used to return ATS fault status. This is unused for access
// counter service requests.
uvm_page_mask_t faults_serviced_mask; uvm_page_mask_t faults_serviced_mask;
// Mask of successfully serviced read faults on pages in write_fault_mask. // Mask of successfully serviced read faults on pages in write_fault_mask.
// This is unused for access counter service requests.
uvm_page_mask_t reads_serviced_mask; uvm_page_mask_t reads_serviced_mask;
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a // Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
// SAM VMA. This is used as input to the prefetcher. // VMA. This is used as input for access counter service requests and output
uvm_page_mask_t faulted_mask; // of fault service requests.
uvm_page_mask_t accessed_mask;
// Client type of the service requestor. // Client type of the service requestor.
uvm_fault_client_type_t client_type; uvm_fault_client_type_t client_type;
@ -466,6 +471,9 @@ struct uvm_access_counter_service_batch_context_struct
// Structure used to coalesce access counter servicing in a VA block // Structure used to coalesce access counter servicing in a VA block
uvm_service_block_context_t block_service_context; uvm_service_block_context_t block_service_context;
// Structure used to service access counter migrations in an ATS block.
uvm_ats_fault_context_t ats_context;
// Unique id (per-GPU) generated for tools events recording // Unique id (per-GPU) generated for tools events recording
NvU32 batch_id; NvU32 batch_id;
}; };

View File

@ -33,7 +33,8 @@
#include "uvm_va_space_mm.h" #include "uvm_va_space_mm.h"
#include "uvm_pmm_sysmem.h" #include "uvm_pmm_sysmem.h"
#include "uvm_perf_module.h" #include "uvm_perf_module.h"
#include "uvm_ats_ibm.h" #include "uvm_ats.h"
#include "uvm_ats_faults.h"
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
@ -125,7 +126,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
// Whether access counter migrations are enabled or not. The policy is as // Whether access counter migrations are enabled or not. The policy is as
// follows: // follows:
// - MIMC migrations are disabled by default on all systems except P9. // - MIMC migrations are disabled by default on all non-ATS systems.
// - MOMC migrations are disabled by default on all systems // - MOMC migrations are disabled by default on all systems
// - Users can override this policy by specifying on/off // - Users can override this policy by specifying on/off
static bool is_migration_enabled(uvm_access_counter_type_t type) static bool is_migration_enabled(uvm_access_counter_type_t type)
@ -148,7 +149,7 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC) if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
return false; return false;
if (UVM_ATS_IBM_SUPPORTED()) if (UVM_ATS_SUPPORTED())
return g_uvm_global.ats.supported; return g_uvm_global.ats.supported;
return false; return false;
@ -1507,8 +1508,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
accessed_pages)); accessed_pages));
} }
static void expand_notification_block(struct mm_struct *mm, static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
uvm_gpu_va_space_t *gpu_va_space,
uvm_va_block_t *va_block, uvm_va_block_t *va_block,
uvm_page_mask_t *accessed_pages, uvm_page_mask_t *accessed_pages,
const uvm_access_counter_buffer_entry_t *current_entry) const uvm_access_counter_buffer_entry_t *current_entry)
@ -1543,7 +1543,7 @@ static void expand_notification_block(struct mm_struct *mm,
// which received the notification if the memory was already migrated before // which received the notification if the memory was already migrated before
// acquiring the locks either during the servicing of previous notifications // acquiring the locks either during the servicing of previous notifications
// or during faults or because of explicit migrations or if the VA range was // or during faults or because of explicit migrations or if the VA range was
// freed after receving the notification. Return NV_OK in such cases. // freed after receiving the notification. Return NV_OK in such cases.
if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id)) if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
return; return;
@ -1578,14 +1578,14 @@ static void expand_notification_block(struct mm_struct *mm,
} }
} }
static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm, static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
uvm_gpu_va_space_t *gpu_va_space, struct mm_struct *mm,
uvm_va_block_t *va_block, uvm_va_block_t *va_block,
uvm_access_counter_service_batch_context_t *batch_context, uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index, NvU32 index,
NvU32 *out_index) NvU32 *out_index)
{ {
NvU32 i = index; NvU32 i;
NvU32 flags = 0; NvU32 flags = 0;
NV_STATUS status = NV_OK; NV_STATUS status = NV_OK;
NV_STATUS flags_status; NV_STATUS flags_status;
@ -1595,7 +1595,7 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications; uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
UVM_ASSERT(va_block); UVM_ASSERT(va_block);
UVM_ASSERT(i < batch_context->virt.num_notifications); UVM_ASSERT(index < batch_context->virt.num_notifications);
uvm_assert_rwsem_locked(&va_space->lock); uvm_assert_rwsem_locked(&va_space->lock);
@ -1603,28 +1603,25 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
uvm_mutex_lock(&va_block->lock); uvm_mutex_lock(&va_block->lock);
while (i < batch_context->virt.num_notifications) { for (i = index; i < batch_context->virt.num_notifications; i++) {
uvm_access_counter_buffer_entry_t *current_entry = notifications[i]; uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
NvU64 address = current_entry->address.address; NvU64 address = current_entry->address.address;
if ((current_entry->virtual_info.va_space != va_space) || (address > va_block->end)) { if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
*out_index = i; expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
else
break; break;
}
expand_notification_block(mm, gpu_va_space, va_block, accessed_pages, current_entry);
i++;
*out_index = i;
} }
*out_index = i;
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context); status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
uvm_mutex_unlock(&va_block->lock); uvm_mutex_unlock(&va_block->lock);
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
if (status == NV_OK) if (status == NV_OK)
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR; flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
@ -1636,62 +1633,154 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
return status; return status;
} }
static NV_STATUS service_virt_notifications_batch(struct mm_struct *mm, static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
uvm_gpu_va_space_t *gpu_va_space, struct mm_struct *mm,
uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index,
NvU32 *out_index)
{
NvU32 i;
NvU64 base;
NvU64 end;
NvU64 address;
NvU32 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
NV_STATUS status = NV_OK;
NV_STATUS flags_status;
struct vm_area_struct *vma = NULL;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
UVM_ASSERT(index < batch_context->virt.num_notifications);
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked(&va_space->lock);
address = notifications[index]->address.address;
vma = find_vma_intersection(mm, address, address + 1);
if (!vma) {
// Clear the notification entry to continue receiving access counter
// notifications when a new VMA is allocated in this range.
status = notify_tools_and_process_flags(gpu, &notifications[index], 1, flags);
*out_index = index + 1;
return status;
}
base = UVM_VA_BLOCK_ALIGN_DOWN(address);
end = min(base + UVM_VA_BLOCK_SIZE, (NvU64)vma->vm_end);
uvm_page_mask_zero(&ats_context->accessed_mask);
for (i = index; i < batch_context->virt.num_notifications; i++) {
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
address = current_entry->address.address;
if ((current_entry->virtual_info.va_space == va_space) && (address < end))
uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
else
break;
}
*out_index = i;
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
// TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
// location is set
// If no pages were actually migrated, don't clear the access counters.
status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
if (status != NV_OK)
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
flags_status = notify_tools_and_process_flags(gpu, &notifications[index], *out_index - index, flags);
if ((status == NV_OK) && (flags_status != NV_OK))
status = flags_status;
return status;
}
static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_access_counter_service_batch_context_t *batch_context, uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index, NvU32 index,
NvU32 *out_index) NvU32 *out_index)
{ {
NV_STATUS status; NV_STATUS status;
uvm_va_block_t *va_block; uvm_va_range_t *va_range;
uvm_va_space_t *va_space = gpu_va_space->va_space; uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index]; uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
NvU64 address = current_entry->address.address; NvU64 address = current_entry->address.address;
UVM_ASSERT(va_space); UVM_ASSERT(va_space);
if (mm)
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked(&va_space->lock); uvm_assert_rwsem_locked(&va_space->lock);
// Virtual address notifications are always 64K aligned // Virtual address notifications are always 64K aligned
UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K)); UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));
// TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block va_range = uvm_va_range_find(va_space, address);
// migrations for virtual notifications on configs with if (va_range) {
// 4KB page size // Avoid clearing the entry by default.
status = uvm_va_block_find(va_space, address, &va_block); NvU32 flags = 0;
if ((status == NV_OK) && !uvm_va_block_is_hmm(va_block)) { uvm_va_block_t *va_block = NULL;
UVM_ASSERT(va_block); if (va_range->type == UVM_VA_RANGE_TYPE_MANAGED) {
size_t index = uvm_va_range_block_index(va_range, address);
status = service_virt_notifications_in_block(mm, gpu_va_space, va_block, batch_context, index, out_index); va_block = uvm_va_range_block(va_range, index);
// If the va_range is a managed range, the notification belongs to a
// recently freed va_range if va_block is NULL. If va_block is not
// NULL, service_virt_notifications_in_block will process flags.
// Clear the notification entry to continue receiving notifications
// when a new va_range is allocated in that region.
flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
}
if (va_block) {
status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
}
else {
status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
*out_index = index + 1;
}
}
else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
status = service_virt_notification_ats(gpu_va_space, mm, batch_context, index, out_index);
} }
else { else {
NvU32 flags = 0; NvU32 flags;
uvm_va_block_t *va_block = NULL;
status = uvm_hmm_va_block_find(va_space, address, &va_block);
// TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
// migrations for virtual notifications
//
// - If the va_block is HMM, don't clear the notification since HMM
// migrations are currently disabled.
//
// - If the va_block isn't HMM, the notification belongs to a recently
// freed va_range. Clear the notification entry to continue receiving
// notifications when a new va_range is allocated in this region.
flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_CLEAR;
UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) || UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
(status == NV_ERR_INVALID_ADDRESS) || (status == NV_ERR_INVALID_ADDRESS) ||
uvm_va_block_is_hmm(va_block)); uvm_va_block_is_hmm(va_block));
// NV_ERR_OBJECT_NOT_FOUND is returned if the VA range is valid but no // Clobber status to continue processing the rest of the notifications
// VA block has been allocated yet. This can happen if there are stale // in the batch.
// notifications in the batch. A new VA range may have been allocated in status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
// that range. So, clear the notification entry to continue getting
// notifications for the new VA range.
if (status == NV_ERR_OBJECT_NOT_FOUND)
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
// NV_ERR_INVALID_ADDRESS is returned if the corresponding VA range
// doesn't exist or it's not a managed range. Access counter migrations
// are not currently supported on such ranges.
//
// TODO: Bug 1990466: [uvm] Use access counters to trigger migrations
// When support for SAM migrations is addded, clear the notification
// entry if the VA range doesn't exist in order to receive notifications
// when a new VA range is allocated in that region.
status = notify_tools_and_process_flags(gpu_va_space->gpu, &batch_context->virt.notifications[index], 1, flags);
*out_index = index + 1; *out_index = index + 1;
status = NV_OK;
} }
return status; return status;
@ -1745,7 +1834,7 @@ static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
} }
if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) { if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
status = service_virt_notifications_batch(mm, gpu_va_space, batch_context, i, &i); status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
} }
else { else {
status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0); status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);

View File

@ -1632,23 +1632,23 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask; const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask; const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask; uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask; uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
UVM_ASSERT(vma); UVM_ASSERT(vma);
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC; ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC;
uvm_page_mask_or(faulted_mask, write_fault_mask, read_fault_mask); uvm_page_mask_or(accessed_mask, write_fault_mask, read_fault_mask);
status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context); status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);
// Remove prefetched pages from the serviced mask since fault servicing // Remove prefetched pages from the serviced mask since fault servicing
// failures belonging to prefetch pages need to be ignored. // failures belonging to prefetch pages need to be ignored.
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, faulted_mask); uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, faulted_mask)); UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, accessed_mask));
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, faulted_mask)) { if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, accessed_mask)) {
(*block_faults) += (fault_index_end - fault_index_start); (*block_faults) += (fault_index_end - fault_index_start);
return status; return status;
} }

View File

@ -114,6 +114,16 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
#define UVM_IS_CONFIG_HMM() 0 #define UVM_IS_CONFIG_HMM() 0
#endif #endif
// ATS prefetcher uses hmm_range_fault() to query residency information.
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
// of memory regions while hmm_range_fault() is being called, MMU interval
// notifiers are needed.
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 1
#else
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 0
#endif
// Various issues prevent us from using mmu_notifiers in older kernels. These // Various issues prevent us from using mmu_notifiers in older kernels. These
// include: // include:
// - ->release being called under RCU instead of SRCU: fixed by commit // - ->release being called under RCU instead of SRCU: fixed by commit

View File

@ -280,7 +280,9 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
} }
} }
if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) { if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
uvm_va_space_pageable_mem_access_supported(va_space)) {
#if UVM_CAN_USE_MMU_NOTIFIERS() #if UVM_CAN_USE_MMU_NOTIFIERS()
// Initialize MMU interval notifiers for this process. This allows // Initialize MMU interval notifiers for this process. This allows
// mmu_interval_notifier_insert() to be called without holding the // mmu_interval_notifier_insert() to be called without holding the

View File

@ -56,7 +56,11 @@
#include "nv-pat.h" #include "nv-pat.h"
#include "nv-dmabuf.h" #include "nv-dmabuf.h"
#if !defined(CONFIG_RETPOLINE) /*
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
*/
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
#include "nv-retpoline.h" #include "nv-retpoline.h"
#endif #endif

View File

@ -250,6 +250,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
@ -2130,6 +2130,8 @@ static int os_numa_verify_gpu_memory_zone(struct notifier_block *nb,
return NOTIFY_OK; return NOTIFY_OK;
} }
#define ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS 4
NV_STATUS NV_API_CALL os_numa_add_gpu_memory NV_STATUS NV_API_CALL os_numa_add_gpu_memory
( (
void *handle, void *handle,
@ -2143,7 +2145,12 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
nv_linux_state_t *nvl = pci_get_drvdata(handle); nv_linux_state_t *nvl = pci_get_drvdata(handle);
nv_state_t *nv = NV_STATE_PTR(nvl); nv_state_t *nv = NV_STATE_PTR(nvl);
NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa; NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa;
int ret; int ret = 0;
NvU64 memblock_size;
NvU64 size_remaining;
NvU64 calculated_segment_size;
NvU64 segment_size;
NvU64 segment_base;
os_numa_gpu_mem_hotplug_notifier_t notifier = os_numa_gpu_mem_hotplug_notifier_t notifier =
{ {
.start_pa = base, .start_pa = base,
@ -2176,11 +2183,49 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
goto failed; goto failed;
} }
//
// Adding all memory at once can take a long time. Split up memory into segments
// with schedule() in between to prevent soft lockups. Memory segments for
// add_memory_driver_managed() need to be aligned to memblock size.
//
// If there are any issues splitting into segments, then add all memory at once.
//
if (os_numa_memblock_size(&memblock_size) == NV_OK)
{
calculated_segment_size = NV_ALIGN_UP(size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
}
else
{
// Don't split into segments, add all memory at once
calculated_segment_size = size;
}
segment_size = calculated_segment_size;
segment_base = base;
size_remaining = size;
while ((size_remaining > 0) &&
(ret == 0))
{
if (segment_size > size_remaining)
{
segment_size = size_remaining;
}
#ifdef NV_ADD_MEMORY_DRIVER_MANAGED_HAS_MHP_FLAGS_ARG #ifdef NV_ADD_MEMORY_DRIVER_MANAGED_HAS_MHP_FLAGS_ARG
ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)", MHP_NONE); ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)", MHP_NONE);
#else #else
ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)"); ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)");
#endif #endif
nv_printf(NV_DBG_SETUP, "NVRM: add_memory_driver_managed() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
ret, segment_base, segment_size);
segment_base += segment_size;
size_remaining -= segment_size;
// Yield CPU to prevent soft lockups
schedule();
}
unregister_memory_notifier(&notifier.memory_notifier); unregister_memory_notifier(&notifier.memory_notifier);
if (ret == 0) if (ret == 0)
@ -2194,14 +2239,33 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
zone_end_pfn(zone) != end_pfn) zone_end_pfn(zone) != end_pfn)
{ {
nv_printf(NV_DBG_ERRORS, "NVRM: GPU memory zone movable auto onlining failed!\n"); nv_printf(NV_DBG_ERRORS, "NVRM: GPU memory zone movable auto onlining failed!\n");
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG // Since zone movable auto onlining failed, need to remove the added memory.
if (offline_and_remove_memory(node, base, size) != 0) segment_size = calculated_segment_size;
#else segment_base = base;
if (offline_and_remove_memory(base, size) != 0) size_remaining = size;
#endif
while (size_remaining > 0)
{ {
nv_printf(NV_DBG_ERRORS, "NVRM: offline_and_remove_memory failed\n"); if (segment_size > size_remaining)
{
segment_size = size_remaining;
}
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
ret = offline_and_remove_memory(node, segment_base, segment_size);
#else
ret = offline_and_remove_memory(segment_base, segment_size);
#endif
nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
ret, segment_base, segment_size);
segment_base += segment_size;
size_remaining -= segment_size;
// Yield CPU to prevent soft lockups
schedule();
} }
#endif #endif
goto failed; goto failed;
@ -2221,6 +2285,77 @@ failed:
return NV_ERR_NOT_SUPPORTED; return NV_ERR_NOT_SUPPORTED;
} }
typedef struct {
NvU64 base;
NvU64 size;
NvU32 nodeId;
int ret;
} remove_numa_memory_info_t;
static void offline_numa_memory_callback
(
void *args
)
{
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
int ret = 0;
NvU64 memblock_size;
NvU64 size_remaining;
NvU64 calculated_segment_size;
NvU64 segment_size;
NvU64 segment_base;
//
// Removing all memory at once can take a long time. Split up memory into segments
// with schedule() in between to prevent soft lockups. Memory segments for
// offline_and_remove_memory() need to be aligned to memblock size.
//
// If there are any issues splitting into segments, then remove all memory at once.
//
if (os_numa_memblock_size(&memblock_size) == NV_OK)
{
calculated_segment_size = NV_ALIGN_UP(pNumaInfo->size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
}
else
{
// Don't split into segments, remove all memory at once
calculated_segment_size = pNumaInfo->size;
}
segment_size = calculated_segment_size;
segment_base = pNumaInfo->base;
size_remaining = pNumaInfo->size;
while (size_remaining > 0)
{
if (segment_size > size_remaining)
{
segment_size = size_remaining;
}
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
ret = offline_and_remove_memory(pNumaInfo->nodeId,
segment_base,
segment_size);
#else
ret = offline_and_remove_memory(segment_base,
segment_size);
#endif
nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
ret, segment_base, segment_size);
pNumaInfo->ret |= ret;
segment_base += segment_size;
size_remaining -= segment_size;
// Yield CPU to prevent soft lockups
schedule();
}
#endif
}
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory NV_STATUS NV_API_CALL os_numa_remove_gpu_memory
( (
void *handle, void *handle,

View File

@ -26,6 +26,12 @@
#include "os-interface.h" #include "os-interface.h"
#include "nv-linux.h" #include "nv-linux.h"
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
(defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) || \
defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
#define NV_NUM_PIN_PAGES_PER_ITERATION 0x80000
#endif
static inline int nv_follow_pfn(struct vm_area_struct *vma, static inline int nv_follow_pfn(struct vm_area_struct *vma,
unsigned long address, unsigned long address,
unsigned long *pfn) unsigned long *pfn)
@ -163,9 +169,15 @@ NV_STATUS NV_API_CALL os_lock_user_pages(
NV_STATUS rmStatus; NV_STATUS rmStatus;
struct mm_struct *mm = current->mm; struct mm_struct *mm = current->mm;
struct page **user_pages; struct page **user_pages;
NvU64 i, pinned; NvU64 i;
NvU64 npages = page_count;
NvU64 pinned = 0;
unsigned int gup_flags = DRF_VAL(_LOCK_USER_PAGES, _FLAGS, _WRITE, flags) ? FOLL_WRITE : 0; unsigned int gup_flags = DRF_VAL(_LOCK_USER_PAGES, _FLAGS, _WRITE, flags) ? FOLL_WRITE : 0;
int ret; long ret;
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT)
gup_flags |= FOLL_LONGTERM;
#endif
if (!NV_MAY_SLEEP()) if (!NV_MAY_SLEEP())
{ {
@ -185,16 +197,51 @@ NV_STATUS NV_API_CALL os_lock_user_pages(
nv_mmap_read_lock(mm); nv_mmap_read_lock(mm);
ret = NV_PIN_USER_PAGES((unsigned long)address, ret = NV_PIN_USER_PAGES((unsigned long)address,
page_count, gup_flags, user_pages, NULL); npages, gup_flags, user_pages, NULL);
nv_mmap_read_unlock(mm); if (ret > 0)
pinned = ret;
if (ret < 0)
{ {
os_free_mem(user_pages); pinned = ret;
return NV_ERR_INVALID_ADDRESS;
} }
else if (pinned < page_count) #if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
(defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) || \
defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
//
// NV_PIN_USER_PAGES() passes in NULL for the vmas parameter (if required)
// in pin_user_pages() (or get_user_pages() if pin_user_pages() does not
// exist). For kernels which do not contain the commit 52650c8b466b
// (mm/gup: remove the vma allocation from gup_longterm_locked()), if
// FOLL_LONGTERM is passed in, this results in the kernel trying to kcalloc
// the vmas array, and since the limit for kcalloc is 4 MB, it results in
// NV_PIN_USER_PAGES() failing with ENOMEM if more than
// NV_NUM_PIN_PAGES_PER_ITERATION pages are requested on 64-bit systems.
//
// As a workaround, if we requested more than
// NV_NUM_PIN_PAGES_PER_ITERATION pages and failed with ENOMEM, try again
// with multiple calls of NV_NUM_PIN_PAGES_PER_ITERATION pages at a time.
//
else if ((ret == -ENOMEM) &&
(page_count > NV_NUM_PIN_PAGES_PER_ITERATION))
{
for (pinned = 0; pinned < page_count; pinned += ret)
{
npages = page_count - pinned;
if (npages > NV_NUM_PIN_PAGES_PER_ITERATION)
{
npages = NV_NUM_PIN_PAGES_PER_ITERATION;
}
ret = NV_PIN_USER_PAGES(((unsigned long) address) + (pinned * PAGE_SIZE),
npages, gup_flags, &user_pages[pinned], NULL);
if (ret <= 0)
{
break;
}
}
}
#endif
nv_mmap_read_unlock(mm);
if (pinned < page_count)
{ {
for (i = 0; i < pinned; i++) for (i = 0; i < pinned; i++)
NV_UNPIN_USER_PAGE(user_pages[i]); NV_UNPIN_USER_PAGE(user_pages[i]);

View File

@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without // and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script // corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH #ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r538_27 #define NV_BUILD_BRANCH r538_49
#endif #endif
#ifndef NV_PUBLIC_BRANCH #ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r538_27 #define NV_PUBLIC_BRANCH r538_49
#endif #endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_27-451" #define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_49-495"
#define NV_BUILD_CHANGELIST_NUM (33992350) #define NV_BUILD_CHANGELIST_NUM (34058561)
#define NV_BUILD_TYPE "Official" #define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r535/r538_27-451" #define NV_BUILD_NAME "rel/gpu_drv/r535/r538_49-495"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33992350) #define NV_LAST_OFFICIAL_CHANGELIST_NUM (34058561)
#else /* Windows builds */ #else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r538_27-6" #define NV_BUILD_BRANCH_VERSION "r538_49-2"
#define NV_BUILD_CHANGELIST_NUM (33992350) #define NV_BUILD_CHANGELIST_NUM (34058561)
#define NV_BUILD_TYPE "Official" #define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "538.46" #define NV_BUILD_NAME "538.52"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33992350) #define NV_LAST_OFFICIAL_CHANGELIST_NUM (34058561)
#define NV_BUILD_BRANCH_BASE_VERSION R535 #define NV_BUILD_BRANCH_BASE_VERSION R535
#endif #endif
// End buildmeister python edited section // End buildmeister python edited section

View File

@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \ #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1) (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "535.161.08" #define NV_VERSION_STRING "535.171.04"
#else #else

View File

@ -95,6 +95,7 @@ endif
ifeq ($(TARGET_ARCH),aarch64) ifeq ($(TARGET_ARCH),aarch64)
CFLAGS += -mgeneral-regs-only CFLAGS += -mgeneral-regs-only
CFLAGS += -march=armv8-a CFLAGS += -march=armv8-a
CFLAGS += -ffixed-x18
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics) CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
endif endif

View File

@ -38,7 +38,7 @@ void nvUpdateHdmiInfoFrames(const NVDispEvoRec *pDispEvo,
void nvDpyUpdateHdmiPreModesetEvo(NVDpyEvoPtr pDpyEvo); void nvDpyUpdateHdmiPreModesetEvo(NVDpyEvoPtr pDpyEvo);
void nvDpyUpdateHdmiVRRCaps(NVDpyEvoPtr pDpyEvo); void nvDpyUpdateHdmiVRRCaps(NVDpyEvoPtr pDpyEvo);
void nvUpdateHdmiCaps(NVDpyEvoPtr pDpyEvo); void nvSendHdmiCapsToRm(NVDpyEvoPtr pDpyEvo);
void nvLogEdidCea861InfoEvo(NVDpyEvoPtr pDpyEvo, void nvLogEdidCea861InfoEvo(NVDpyEvoPtr pDpyEvo,
NVEvoInfoStringPtr pInfoString); NVEvoInfoStringPtr pInfoString);

View File

@ -71,7 +71,7 @@ static NvBool ValidateEdid (const NVDpyEvoRec *pDpyEvo,
const NvBool ignoreEdidChecksum); const NvBool ignoreEdidChecksum);
static void LogEdid (NVDpyEvoPtr pDpyEvo, static void LogEdid (NVDpyEvoPtr pDpyEvo,
NVEvoInfoStringPtr pInfoString); NVEvoInfoStringPtr pInfoString);
static void ClearEdid (NVDpyEvoPtr pDpyEvo); static void ClearEdid (NVDpyEvoPtr pDpyEvo, const NvBool bSendHdmiCapsToRm);
static void ClearCustomEdid (const NVDpyEvoRec *pDpyEvo); static void ClearCustomEdid (const NVDpyEvoRec *pDpyEvo);
static void WriteEdidToResman (const NVDpyEvoRec *pDpyEvo, static void WriteEdidToResman (const NVDpyEvoRec *pDpyEvo,
const NVEdidRec *pEdid); const NVEdidRec *pEdid);
@ -90,14 +90,14 @@ static void AssignDpyEvoName (NVDpyEvoPtr pDpyEvo);
static NvBool IsConnectorTMDS (NVConnectorEvoPtr); static NvBool IsConnectorTMDS (NVConnectorEvoPtr);
static void DpyDisconnectEvo(NVDpyEvoPtr pDpyEvo) static void DpyDisconnectEvo(NVDpyEvoPtr pDpyEvo, const NvBool bSendHdmiCapsToRm)
{ {
NVDispEvoPtr pDispEvo = pDpyEvo->pDispEvo; NVDispEvoPtr pDispEvo = pDpyEvo->pDispEvo;
pDispEvo->connectedDisplays = pDispEvo->connectedDisplays =
nvDpyIdListMinusDpyId(pDispEvo->connectedDisplays, pDpyEvo->id); nvDpyIdListMinusDpyId(pDispEvo->connectedDisplays, pDpyEvo->id);
ClearEdid(pDpyEvo); ClearEdid(pDpyEvo, bSendHdmiCapsToRm);
} }
static NvBool DpyConnectEvo( static NvBool DpyConnectEvo(
@ -351,6 +351,7 @@ static void ApplyNewEdid(
NVDpyEvoPtr pDpyEvo, NVDpyEvoPtr pDpyEvo,
const NVEdidRec *pEdid, const NVEdidRec *pEdid,
const NVParsedEdidEvoRec *pParsedEdid, const NVParsedEdidEvoRec *pParsedEdid,
const NvBool bSendHdmiCapsToRm,
NVEvoInfoStringPtr pInfoString) NVEvoInfoStringPtr pInfoString)
{ {
if (pDpyEvo->edid.buffer != NULL) { if (pDpyEvo->edid.buffer != NULL) {
@ -392,7 +393,9 @@ static void ApplyNewEdid(
DpyAssignColorSpaceCaps(pDpyEvo, pInfoString); DpyAssignColorSpaceCaps(pDpyEvo, pInfoString);
} }
nvUpdateHdmiCaps(pDpyEvo); if (bSendHdmiCapsToRm) {
nvSendHdmiCapsToRm(pDpyEvo);
}
nvDpyProbeMaxPixelClock(pDpyEvo); nvDpyProbeMaxPixelClock(pDpyEvo);
@ -574,7 +577,8 @@ static void ReadAndApplyEdidEvo(
* worrying that this request has different parameters (like CustomEdid * worrying that this request has different parameters (like CustomEdid
* or mode validation overrides). * or mode validation overrides).
*/ */
ApplyNewEdid(pDpyEvo, &edid, pParsedEdid, &infoString); ApplyNewEdid(pDpyEvo, &edid, pParsedEdid, TRUE /* bSendHdmiCapsToRm */,
&infoString);
} else { } else {
nvFree(edid.buffer); nvFree(edid.buffer);
} }
@ -1844,14 +1848,15 @@ static void LogEdid(NVDpyEvoPtr pDpyEvo, NVEvoInfoStringPtr pInfoString)
* structure. * structure.
*/ */
static void ClearEdid(NVDpyEvoPtr pDpyEvo) static void ClearEdid(NVDpyEvoPtr pDpyEvo, const NvBool bSendHdmiCapsToRm)
{ {
NVEdidRec edid = { }; NVEdidRec edid = { };
NVEvoInfoStringRec infoString; NVEvoInfoStringRec infoString;
nvInitInfoString(&infoString, NULL, 0); nvInitInfoString(&infoString, NULL, 0);
if (EdidHasChanged(pDpyEvo, &edid, NULL)) { if (EdidHasChanged(pDpyEvo, &edid, NULL)) {
ApplyNewEdid(pDpyEvo, &edid, NULL, &infoString); ApplyNewEdid(pDpyEvo, &edid, NULL,
bSendHdmiCapsToRm, &infoString);
} }
} }
@ -2283,7 +2288,7 @@ NVDpyEvoPtr nvAllocDpyEvo(NVDispEvoPtr pDispEvo,
void nvFreeDpyEvo(NVDispEvoPtr pDispEvo, NVDpyEvoPtr pDpyEvo) void nvFreeDpyEvo(NVDispEvoPtr pDispEvo, NVDpyEvoPtr pDpyEvo)
{ {
DpyDisconnectEvo(pDpyEvo); DpyDisconnectEvo(pDpyEvo, FALSE /* bSendHdmiCapsToRm */);
// Let the DP library host implementation handle deleting a pDpy as if the // Let the DP library host implementation handle deleting a pDpy as if the
// library had notified it of a lost device. // library had notified it of a lost device.
@ -2826,7 +2831,7 @@ NvBool nvDpyGetDynamicData(
return FALSE; return FALSE;
} }
} else { } else {
DpyDisconnectEvo(pDpyEvo); DpyDisconnectEvo(pDpyEvo, TRUE /* bSendHdmiCapsToRm */);
} }
if (nvConnectorUsesDPLib(pConnectorEvo)) { if (nvConnectorUsesDPLib(pConnectorEvo)) {

View File

@ -8602,6 +8602,7 @@ void nvEvoEnableMergeModePreModeset(NVDispEvoRec *pDispEvo,
pHC->serverLock = NV_EVO_RASTER_LOCK; pHC->serverLock = NV_EVO_RASTER_LOCK;
pHC->serverLockPin = NV_EVO_LOCK_PIN_INTERNAL(primaryHead); pHC->serverLockPin = NV_EVO_LOCK_PIN_INTERNAL(primaryHead);
pHC->setLockOffsetX = TRUE; pHC->setLockOffsetX = TRUE;
pHC->crashLockUnstallMode = FALSE;
} else { } else {
pHC->clientLock = NV_EVO_RASTER_LOCK; pHC->clientLock = NV_EVO_RASTER_LOCK;
pHC->clientLockPin = NV_EVO_LOCK_PIN_INTERNAL(primaryHead); pHC->clientLockPin = NV_EVO_LOCK_PIN_INTERNAL(primaryHead);
@ -8612,11 +8613,10 @@ void nvEvoEnableMergeModePreModeset(NVDispEvoRec *pDispEvo,
} else { } else {
pHC->clientLockoutWindow = 2; pHC->clientLockoutWindow = 2;
} }
pHC->crashLockUnstallMode =
(pTimings->vrr.type != NVKMS_DPY_VRR_TYPE_NONE);
} }
if (pTimings->vrr.type != NVKMS_DPY_VRR_TYPE_NONE) {
pHC->crashLockUnstallMode = TRUE;
}
pHC->stereoLocked = FALSE; pHC->stereoLocked = FALSE;
EvoUpdateHeadParams(pDispEvo, head, pUpdateState); EvoUpdateHeadParams(pDispEvo, head, pUpdateState);

View File

@ -6639,12 +6639,19 @@ static void EvoSetStallLockC3(NVDispEvoPtr pDispEvo, const int head,
NVEvoChannelPtr pChannel = pDevEvo->core; NVEvoChannelPtr pChannel = pDevEvo->core;
NVEvoSubDevPtr pEvoSubDev = &pDevEvo->gpus[pDispEvo->displayOwner]; NVEvoSubDevPtr pEvoSubDev = &pDevEvo->gpus[pDispEvo->displayOwner];
NVEvoHeadControlPtr pHC = &pEvoSubDev->headControl[head]; NVEvoHeadControlPtr pHC = &pEvoSubDev->headControl[head];
NvU32 data = 0x0;
nvUpdateUpdateState(pDevEvo, updateState, pChannel); nvUpdateUpdateState(pDevEvo, updateState, pChannel);
if (pHC->crashLockUnstallMode) {
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _CRASH_LOCK);
} else {
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _LINE_LOCK);
}
if (enable) { if (enable) {
NvU32 data = DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _TRUE) | data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _TRUE) |
DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _MODE, _ONE_SHOT); DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _MODE, _ONE_SHOT);
if (!pHC->useStallLockPin) { if (!pHC->useStallLockPin) {
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _LOCK_PIN, _LOCK_PIN_NONE); data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _LOCK_PIN, _LOCK_PIN_NONE);
@ -6657,20 +6664,12 @@ static void EvoSetStallLockC3(NVDispEvoPtr pDispEvo, const int head,
data |= DRF_NUM(C37D, _HEAD_SET_STALL_LOCK, _LOCK_PIN, data |= DRF_NUM(C37D, _HEAD_SET_STALL_LOCK, _LOCK_PIN,
NVC37D_HEAD_SET_STALL_LOCK_LOCK_PIN_LOCK_PIN(pin)); NVC37D_HEAD_SET_STALL_LOCK_LOCK_PIN_LOCK_PIN(pin));
} }
if (pHC->crashLockUnstallMode) {
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _CRASH_LOCK);
} else {
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _LINE_LOCK);
}
nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_STALL_LOCK(head), 1);
nvDmaSetEvoMethodData(pChannel, data);
} else { } else {
nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_STALL_LOCK(head), 1); data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _FALSE);
nvDmaSetEvoMethodData(pChannel,
DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _FALSE));
} }
nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_STALL_LOCK(head), 1);
nvDmaSetEvoMethodData(pChannel, data);
} }
static NvBool GetChannelState(NVDevEvoPtr pDevEvo, static NvBool GetChannelState(NVDevEvoPtr pDevEvo,

View File

@ -203,7 +203,7 @@ NvBool nvDpyIsHdmiEvo(const NVDpyEvoRec *pDpyEvo)
/*! /*!
* Updates the display's HDMI 2.0 capabilities to the RM. * Updates the display's HDMI 2.0 capabilities to the RM.
*/ */
void nvUpdateHdmiCaps(NVDpyEvoPtr pDpyEvo) void nvSendHdmiCapsToRm(NVDpyEvoPtr pDpyEvo)
{ {
NV0073_CTRL_SPECIFIC_SET_HDMI_SINK_CAPS_PARAMS params = { 0 }; NV0073_CTRL_SPECIFIC_SET_HDMI_SINK_CAPS_PARAMS params = { 0 };
NVParsedEdidEvoPtr pParsedEdid = &pDpyEvo->parsedEdid; NVParsedEdidEvoPtr pParsedEdid = &pDpyEvo->parsedEdid;
@ -221,7 +221,7 @@ void nvUpdateHdmiCaps(NVDpyEvoPtr pDpyEvo)
params.caps = 0; params.caps = 0;
/* /*
* nvUpdateHdmiCaps() gets called on dpy's connect/disconnect events * nvSendHdmiCapsToRm() gets called on dpy's connect/disconnect events
* to set/clear capabilities, clear capabilities if parsed edid * to set/clear capabilities, clear capabilities if parsed edid
* is not valid. * is not valid.
*/ */

View File

@ -91,6 +91,7 @@ ifeq ($(TARGET_ARCH),aarch64)
CFLAGS += -mgeneral-regs-only CFLAGS += -mgeneral-regs-only
CFLAGS += -march=armv8-a CFLAGS += -march=armv8-a
CFLAGS += -mstrict-align CFLAGS += -mstrict-align
CFLAGS += -ffixed-x18
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics) CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
endif endif

View File

@ -1,5 +1,5 @@
/* /*
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a
@ -77,6 +77,9 @@
#define NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(i) \ #define NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(i) \
((NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(i)) / 2) ((NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(i)) / 2)
// First index of doorbell which is controlled by VF
#define NV_CTRL_INTR_GPU_DOORBELL_INDEX_VF_START 2048
// The max number of leaf registers we expect // The max number of leaf registers we expect
#define NV_MAX_INTR_LEAVES 16 #define NV_MAX_INTR_LEAVES 16

View File

@ -1042,6 +1042,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
{ 0x28A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" }, { 0x28A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
{ 0x28A1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" }, { 0x28A1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
{ 0x28B8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Laptop GPU" }, { 0x28B8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Laptop GPU" },
{ 0x28B9, 0x0000, 0x0000, "NVIDIA RTX 1000 Ada Generation Laptop GPU" },
{ 0x28BB, 0x0000, 0x0000, "NVIDIA RTX 500 Ada Generation Laptop GPU" },
{ 0x28E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" }, { 0x28E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
{ 0x28E1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" }, { 0x28E1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
{ 0x28F8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Embedded GPU" }, { 0x28F8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Embedded GPU" },

View File

@ -7,7 +7,7 @@ extern "C" {
#endif #endif
/* /*
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT * SPDX-License-Identifier: MIT
* *
* Permission is hereby granted, free of charge, to any person obtaining a * Permission is hereby granted, free of charge, to any person obtaining a

View File

@ -103,4 +103,24 @@ typedef struct MESSAGE_QUEUE_COLLECTION
#define GSP_MSG_QUEUE_HEADER_SIZE RM_PAGE_SIZE #define GSP_MSG_QUEUE_HEADER_SIZE RM_PAGE_SIZE
#define GSP_MSG_QUEUE_HEADER_ALIGN 4 // 2 ^ 4 = 16 #define GSP_MSG_QUEUE_HEADER_ALIGN 4 // 2 ^ 4 = 16
/*!
* Calculate 32-bit checksum
*
* This routine assumes that the data is padded out with zeros to the next
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
*/
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
{
NvU64 *p = (NvU64 *)pData;
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
NvU64 checkSum = 0;
NV_ASSERT_CHECKED(uLen > 0);
while (p < pEnd)
checkSum ^= *p++;
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
}
#endif // _MESSAGE_QUEUE_PRIV_H_ #endif // _MESSAGE_QUEUE_PRIV_H_

View File

@ -244,32 +244,50 @@ kfspPollForQueueEmpty_IMPL
KernelFsp *pKernelFsp KernelFsp *pKernelFsp
) )
{ {
NV_STATUS status = NV_OK;
RMTIMEOUT timeout; RMTIMEOUT timeout;
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_OSTIMER | GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE); gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
GPU_TIMEOUT_FLAGS_OSTIMER |
GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
while (!kfspIsQueueEmpty(pGpu, pKernelFsp)) while (!kfspIsQueueEmpty(pGpu, pKernelFsp))
{ {
// //
// For now we assume that any response from FSP before RM message send is complete // For now we assume that any response from FSP before RM message
// indicates an error and we should abort. // send is complete indicates an error and we should abort.
//
// Ongoing dicussion on usefullness of this check. Bug to be filed.
// //
if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp)) if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp))
{ {
kfspReadMessage(pGpu, pKernelFsp, NULL, 0); kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
NV_PRINTF(LEVEL_ERROR, "Received error message from FSP while waiting for CMDQ to be empty.\n"); NV_PRINTF(LEVEL_ERROR,
return NV_ERR_GENERIC; "Received error message from FSP while waiting for CMDQ to be empty.\n");
status = NV_ERR_GENERIC;
break;
} }
if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT)
{
NV_PRINTF(LEVEL_ERROR, "Timed out waiting for FSP command queue to be empty.\n");
return NV_ERR_TIMEOUT;
}
osSpinLoop(); osSpinLoop();
status = gpuCheckTimeout(pGpu, &timeout);
if (status != NV_OK)
{
if ((status == NV_ERR_TIMEOUT) &&
kfspIsQueueEmpty(pGpu, pKernelFsp))
{
status = NV_OK;
}
else
{
NV_PRINTF(LEVEL_ERROR,
"Timed out waiting for FSP command queue to be empty.\n");
}
break;
}
} }
return NV_OK; return status;
} }
/*! /*!

View File

@ -476,24 +476,6 @@ void GspMsgQueuesCleanup(MESSAGE_QUEUE_COLLECTION **ppMQCollection)
*ppMQCollection = NULL; *ppMQCollection = NULL;
} }
/*!
* Calculate 32-bit checksum
*
* This routine assumes that the data is padded out with zeros to the next
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
*/
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
{
NvU64 *p = (NvU64 *)pData;
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
NvU64 checkSum = 0;
while (p < pEnd)
checkSum ^= *p++;
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
}
/*! /*!
* GspMsgQueueSendCommand * GspMsgQueueSendCommand
* *
@ -533,7 +515,7 @@ NV_STATUS GspMsgQueueSendCommand(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
pCQE->seqNum = pMQI->txSeqNum; pCQE->seqNum = pMQI->txSeqNum;
pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize); pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize);
pCQE->checkSum = 0; pCQE->checkSum = 0; // The checkSum field is included in the checksum calculation, so zero it.
ConfidentialCompute *pCC = GPU_GET_CONF_COMPUTE(pGpu); ConfidentialCompute *pCC = GPU_GET_CONF_COMPUTE(pGpu);
if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_ENABLED)) if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_ENABLED))
@ -660,7 +642,8 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
NvU32 nRetries; NvU32 nRetries;
NvU32 nMaxRetries = 3; NvU32 nMaxRetries = 3;
NvU32 nElements = 1; // Assume record fits in one queue element for now. NvU32 nElements = 1; // Assume record fits in one queue element for now.
NvU32 uElementSize = 0; NvU32 uElementSize;
NvU32 checkSum;
NvU32 seqMismatchDiff = NV_U32_MAX; NvU32 seqMismatchDiff = NV_U32_MAX;
NV_STATUS nvStatus = NV_OK; NV_STATUS nvStatus = NV_OK;
ConfidentialCompute *pCC = NULL; ConfidentialCompute *pCC = NULL;
@ -713,15 +696,23 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
pCC = GPU_GET_CONF_COMPUTE(pGpu); pCC = GPU_GET_CONF_COMPUTE(pGpu);
if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_READY)) if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_READY))
{ {
// In Confidential Compute scenario, checksum includes complete element range. //
if (_checkSum32(pMQI->pCmdQueueElement, (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN)) != 0) // In the Confidential Compute scenario, the actual message length
{ // is inside the encrypted payload, and we can't access it before
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n"); // decryption, therefore the checksum encompasses the whole element
nvStatus = NV_ERR_INVALID_DATA; // range. This makes checksum verification significantly slower
continue; // because messages are typically much smaller than element size.
} //
checkSum = _checkSum32(pMQI->pCmdQueueElement,
(nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN));
} else } else
if (_checkSum32(pMQI->pCmdQueueElement, uElementSize) != 0) {
checkSum = _checkSum32(pMQI->pCmdQueueElement,
(GSP_MSG_QUEUE_ELEMENT_HDR_SIZE +
pMQI->pCmdQueueElement->rpc.length));
}
if (checkSum != 0)
{ {
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n"); NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
nvStatus = NV_ERR_INVALID_DATA; nvStatus = NV_ERR_INVALID_DATA;

View File

@ -1587,6 +1587,7 @@ memdescFree
} }
if (pMemDesc->_addressSpace != ADDR_FBMEM && if (pMemDesc->_addressSpace != ADDR_FBMEM &&
pMemDesc->_addressSpace != ADDR_EGM &&
pMemDesc->_addressSpace != ADDR_SYSMEM) pMemDesc->_addressSpace != ADDR_SYSMEM)
{ {
return; return;
@ -1991,6 +1992,7 @@ memdescUnmap
switch (pMemDesc->_addressSpace) switch (pMemDesc->_addressSpace)
{ {
case ADDR_SYSMEM: case ADDR_SYSMEM:
case ADDR_EGM:
{ {
osUnmapSystemMemory(pMemDesc, Kernel, ProcessId, Address, Priv); osUnmapSystemMemory(pMemDesc, Kernel, ProcessId, Address, Priv);
break; break;

View File

@ -733,8 +733,9 @@ memUnmap_IMPL
// //
} }
// System Memory case // System Memory case
else if ((pGpu == NULL) || ((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) && else if ((pGpu == NULL) || (((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM)
FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags))) || (memdescGetAddressSpace(pMemDesc) == ADDR_EGM)
) && FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
{ {
if (FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)) if (FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags))
{ {

View File

@ -1,4 +1,4 @@
NVIDIA_VERSION = 535.161.08 NVIDIA_VERSION = 535.171.04
# This file. # This file.
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST)) VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))