mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-02-27 09:54:14 +01:00
535.171.04
This commit is contained in:
parent
044f70bbb8
commit
c042c7903d
@ -2,6 +2,8 @@
|
||||
|
||||
## Release 535 Entries
|
||||
|
||||
### [535.171.04] 2024-03-21
|
||||
|
||||
### [535.161.08] 2024-03-18
|
||||
|
||||
### [535.161.07] 2024-02-22
|
||||
|
10
README.md
10
README.md
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 535.161.08.
|
||||
version 535.171.04.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
535.161.08 driver release. This can be achieved by installing
|
||||
535.171.04 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -180,7 +180,7 @@ software applications.
|
||||
## Compatible GPUs
|
||||
|
||||
The open-gpu-kernel-modules can be used on any Turing or later GPU
|
||||
(see the table below). However, in the 535.161.08 release,
|
||||
(see the table below). However, in the 535.171.04 release,
|
||||
GeForce and Workstation support is still considered alpha-quality.
|
||||
|
||||
To enable use of the open kernel modules on GeForce and Workstation GPUs,
|
||||
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
|
||||
parameter to 1. For more details, see the NVIDIA GPU driver end user
|
||||
README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.161.08/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/535.171.04/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@ -892,6 +892,8 @@ Subsystem Device ID.
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
|
||||
| NVIDIA RTX 1000 Ada Generation Laptop GPU | 28B9 |
|
||||
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BB |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
|
||||
|
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.161.08\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.171.04\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
@ -152,6 +152,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
|
||||
NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)
|
||||
|
||||
NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
|
||||
NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
|
||||
NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)
|
||||
|
||||
NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
|
||||
NV_CONFTEST_COMPILE_TEST_HEADERS += $(obj)/conftest/functions.h
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -1982,31 +1982,6 @@ static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
|
||||
return nvl->numa_info.use_auto_online;
|
||||
}
|
||||
|
||||
typedef struct {
|
||||
NvU64 base;
|
||||
NvU64 size;
|
||||
NvU32 nodeId;
|
||||
int ret;
|
||||
} remove_numa_memory_info_t;
|
||||
|
||||
static void offline_numa_memory_callback
|
||||
(
|
||||
void *args
|
||||
)
|
||||
{
|
||||
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
||||
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
|
||||
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
|
||||
pNumaInfo->base,
|
||||
pNumaInfo->size);
|
||||
#else
|
||||
pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
|
||||
pNumaInfo->size);
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
NV_NUMA_STATUS_DISABLED = 0,
|
||||
|
@ -3032,6 +3032,22 @@ compile_test() {
|
||||
|
||||
;;
|
||||
|
||||
foll_longterm_present)
|
||||
#
|
||||
# Determine if FOLL_LONGTERM enum is present or not
|
||||
#
|
||||
# Added by commit 932f4a630a69 ("mm/gup: replace
|
||||
# get_user_pages_longterm() with FOLL_LONGTERM") in
|
||||
# v5.2
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
int foll_longterm = FOLL_LONGTERM;
|
||||
"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types"
|
||||
;;
|
||||
|
||||
vfio_pin_pages_has_vfio_device_arg)
|
||||
#
|
||||
# Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
|
||||
@ -5081,11 +5097,15 @@ compile_test() {
|
||||
# vmap ops and convert GEM backends") update
|
||||
# drm_gem_object_funcs::vmap to take 'map' argument.
|
||||
#
|
||||
# Note that the 'map' argument type is changed from 'struct dma_buf_map'
|
||||
# to 'struct iosys_map' by commit 7938f4218168 ("dma-buf-map: Rename
|
||||
# to iosys-map) in v5.18.
|
||||
#
|
||||
CODE="
|
||||
#include <drm/drm_gem.h>
|
||||
int conftest_drm_gem_object_vmap_has_map_arg(
|
||||
struct drm_gem_object *obj, struct dma_buf_map *map) {
|
||||
return obj->funcs->vmap(obj, map);
|
||||
struct drm_gem_object *obj) {
|
||||
return obj->funcs->vmap(obj, NULL);
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"
|
||||
|
@ -54,7 +54,11 @@
|
||||
#include "nv-time.h"
|
||||
#include "nv-lock.h"
|
||||
|
||||
#if !defined(CONFIG_RETPOLINE)
|
||||
/*
|
||||
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
|
||||
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
|
||||
*/
|
||||
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
|
||||
#include "nv-retpoline.h"
|
||||
#endif
|
||||
|
||||
|
@ -34,16 +34,6 @@
|
||||
|
||||
#define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())
|
||||
|
||||
// ATS prefetcher uses hmm_range_fault() to query residency information.
|
||||
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
|
||||
// of memory regions while hmm_range_fault() is being called, MMU interval
|
||||
// notifiers are needed.
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_ATS_PREFETCH_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
typedef struct
|
||||
{
|
||||
// Mask of gpu_va_spaces which are registered for ATS access. The mask is
|
||||
|
@ -30,23 +30,36 @@
|
||||
#include <linux/mempolicy.h>
|
||||
#include <linux/mmu_notifier.h>
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
#include <linux/hmm.h>
|
||||
#endif
|
||||
|
||||
static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
typedef enum
|
||||
{
|
||||
UVM_ATS_SERVICE_TYPE_FAULTS = 0,
|
||||
UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS,
|
||||
UVM_ATS_SERVICE_TYPE_COUNT
|
||||
} uvm_ats_service_type_t;
|
||||
|
||||
static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 start,
|
||||
size_t length,
|
||||
uvm_fault_access_type_t access_type,
|
||||
uvm_ats_service_type_t service_type,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
struct mm_struct *mm = va_space->va_space_mm.mm;
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
NV_STATUS status;
|
||||
NvU64 user_space_start;
|
||||
NvU64 user_space_length;
|
||||
bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
|
||||
bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
|
||||
uvm_populate_permissions_t populate_permissions = fault_service_type ?
|
||||
(write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
|
||||
UVM_POPULATE_PERMISSIONS_INHERIT;
|
||||
|
||||
|
||||
// Request uvm_migrate_pageable() to touch the corresponding page after
|
||||
// population.
|
||||
@ -83,10 +96,10 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
.dst_node_id = ats_context->residency_node,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.populate_permissions = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
|
||||
.touch = true,
|
||||
.skip_mapped = true,
|
||||
.populate_on_cpu_alloc_failures = true,
|
||||
.populate_permissions = populate_permissions,
|
||||
.touch = fault_service_type,
|
||||
.skip_mapped = fault_service_type,
|
||||
.populate_on_cpu_alloc_failures = fault_service_type,
|
||||
.user_space_start = &user_space_start,
|
||||
.user_space_length = &user_space_length,
|
||||
};
|
||||
@ -233,7 +246,7 @@ static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma,
|
||||
return uvm_ats_region_from_start_end(start, end);
|
||||
}
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
|
||||
static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
|
||||
{
|
||||
@ -271,12 +284,12 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
|
||||
#if UVM_ATS_PREFETCH_SUPPORTED()
|
||||
#if UVM_HMM_RANGE_FAULT_SUPPORTED()
|
||||
int ret;
|
||||
NvU64 start;
|
||||
NvU64 end;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
struct hmm_range range;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_va_block_region_t vma_region;
|
||||
@ -357,78 +370,83 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
mmu_interval_notifier_remove(range.notifier);
|
||||
|
||||
#else
|
||||
uvm_page_mask_zero(residency_mask);
|
||||
#endif
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
uvm_ats_fault_context_t *ats_context,
|
||||
uvm_va_block_region_t max_prefetch_region,
|
||||
uvm_page_mask_t *faulted_mask)
|
||||
uvm_va_block_region_t max_prefetch_region)
|
||||
{
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return;
|
||||
|
||||
uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
|
||||
faulted_mask,
|
||||
uvm_va_block_region_from_mask(NULL, faulted_mask),
|
||||
accessed_mask,
|
||||
uvm_va_block_region_from_mask(NULL, accessed_mask),
|
||||
max_prefetch_region,
|
||||
residency_mask,
|
||||
bitmap_tree,
|
||||
prefetch_mask);
|
||||
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
}
|
||||
|
||||
static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_service_type_t service_type,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
|
||||
NV_STATUS status;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
|
||||
uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);
|
||||
|
||||
// Residency mask needs to be computed even if prefetching is disabled since
|
||||
// the residency information is also needed by access counters servicing in
|
||||
// uvm_ats_service_access_counters()
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
|
||||
return status;
|
||||
|
||||
if (uvm_page_mask_empty(faulted_mask))
|
||||
return status;
|
||||
|
||||
status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
if (uvm_page_mask_empty(accessed_mask))
|
||||
return status;
|
||||
|
||||
// Prefetch the entire region if none of the pages are resident on any node
|
||||
// and if preferred_location is the faulting GPU.
|
||||
if (ats_context->prefetch_state.has_preferred_location &&
|
||||
ats_context->prefetch_state.first_touch &&
|
||||
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
|
||||
(ats_context->prefetch_state.first_touch || (service_type == UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS)) &&
|
||||
uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {
|
||||
|
||||
uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
|
||||
}
|
||||
else {
|
||||
ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
|
||||
}
|
||||
|
||||
if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
|
||||
uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
|
||||
uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
|
||||
uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
|
||||
|
||||
if (vma->vm_flags & VM_WRITE)
|
||||
uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
|
||||
else {
|
||||
uvm_page_mask_or(accessed_mask, accessed_mask, prefetch_mask);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
@ -446,6 +464,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_fault_client_type_t client_type = ats_context->client_type;
|
||||
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
@ -454,6 +473,9 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
|
||||
|
||||
uvm_page_mask_zero(faults_serviced_mask);
|
||||
uvm_page_mask_zero(reads_serviced_mask);
|
||||
|
||||
@ -479,7 +501,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
@ -491,7 +513,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -526,7 +548,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -598,3 +620,53 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context)
|
||||
{
|
||||
uvm_va_block_region_t subregion;
|
||||
uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
|
||||
uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
|
||||
UVM_ASSERT(g_uvm_global.ats.enabled);
|
||||
UVM_ASSERT(gpu_va_space);
|
||||
UVM_ASSERT(gpu_va_space->ats.enabled);
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
|
||||
uvm_assert_mmap_lock_locked(vma->vm_mm);
|
||||
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
|
||||
|
||||
ats_batch_select_residency(gpu_va_space, vma, ats_context);
|
||||
|
||||
// Ignoring the return value of ats_compute_prefetch is ok since prefetching
|
||||
// is just an optimization and servicing access counter migrations is still
|
||||
// worthwhile even without any prefetching added. So, let servicing continue
|
||||
// instead of returning early even if the prefetch computation fails.
|
||||
ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
|
||||
|
||||
// Remove pages which are already resident at the intended destination from
|
||||
// the accessed_mask.
|
||||
uvm_page_mask_andnot(&ats_context->accessed_mask,
|
||||
&ats_context->accessed_mask,
|
||||
&ats_context->prefetch_state.residency_mask);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
|
||||
NV_STATUS status;
|
||||
NvU64 start = base + (subregion.first * PAGE_SIZE);
|
||||
size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
|
||||
uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
|
||||
|
||||
UVM_ASSERT(start >= vma->vm_start);
|
||||
UVM_ASSERT((start + length) <= vma->vm_end);
|
||||
|
||||
status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
@ -42,11 +42,31 @@
|
||||
// corresponding bit in read_fault_mask. These returned masks are only valid if
|
||||
// the return status is NV_OK. Status other than NV_OK indicate system global
|
||||
// fault servicing failures.
|
||||
//
|
||||
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
|
||||
// lock.
|
||||
NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context);
|
||||
|
||||
// Service access counter notifications on ATS regions in the range (base, base
|
||||
// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
|
||||
// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
|
||||
// The caller is responsible for ensuring that the addresses in the
|
||||
// accessed_mask is completely covered by the VMA. The caller is also
|
||||
// responsible for handling any errors returned by this function.
|
||||
//
|
||||
// Returns NV_OK if servicing was successful. Any other error indicates an error
|
||||
// while servicing the range.
|
||||
//
|
||||
// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
|
||||
// lock.
|
||||
NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma,
|
||||
NvU64 base,
|
||||
uvm_ats_fault_context_t *ats_context);
|
||||
|
||||
// Return whether there are any VA ranges (and thus GMMU mappings) within the
|
||||
// UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
|
||||
bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);
|
||||
|
@ -181,23 +181,28 @@ struct uvm_service_block_context_struct
|
||||
typedef struct
|
||||
{
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. Used for batching ATS faults in a vma.
|
||||
// VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t read_fault_mask;
|
||||
|
||||
// Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. Used for batching ATS faults in a vma.
|
||||
// SAM VMA. Used for batching ATS faults in a vma. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t write_fault_mask;
|
||||
|
||||
// Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used to return ATS fault status.
|
||||
// of a SAM VMA. Used to return ATS fault status. This is unused for access
|
||||
// counter service requests.
|
||||
uvm_page_mask_t faults_serviced_mask;
|
||||
|
||||
// Mask of successfully serviced read faults on pages in write_fault_mask.
|
||||
// This is unused for access counter service requests.
|
||||
uvm_page_mask_t reads_serviced_mask;
|
||||
|
||||
// Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
|
||||
// SAM VMA. This is used as input to the prefetcher.
|
||||
uvm_page_mask_t faulted_mask;
|
||||
// Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
|
||||
// VMA. This is used as input for access counter service requests and output
|
||||
// of fault service requests.
|
||||
uvm_page_mask_t accessed_mask;
|
||||
|
||||
// Client type of the service requestor.
|
||||
uvm_fault_client_type_t client_type;
|
||||
@ -466,6 +471,9 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
// Structure used to coalesce access counter servicing in a VA block
|
||||
uvm_service_block_context_t block_service_context;
|
||||
|
||||
// Structure used to service access counter migrations in an ATS block.
|
||||
uvm_ats_fault_context_t ats_context;
|
||||
|
||||
// Unique id (per-GPU) generated for tools events recording
|
||||
NvU32 batch_id;
|
||||
};
|
||||
|
@ -33,7 +33,8 @@
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_pmm_sysmem.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_ats_ibm.h"
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_ats_faults.h"
|
||||
|
||||
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN 1
|
||||
#define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
|
||||
@ -125,7 +126,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
|
||||
|
||||
// Whether access counter migrations are enabled or not. The policy is as
|
||||
// follows:
|
||||
// - MIMC migrations are disabled by default on all systems except P9.
|
||||
// - MIMC migrations are disabled by default on all non-ATS systems.
|
||||
// - MOMC migrations are disabled by default on all systems
|
||||
// - Users can override this policy by specifying on/off
|
||||
static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
@ -148,7 +149,7 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
return false;
|
||||
|
||||
if (UVM_ATS_IBM_SUPPORTED())
|
||||
if (UVM_ATS_SUPPORTED())
|
||||
return g_uvm_global.ats.supported;
|
||||
|
||||
return false;
|
||||
@ -1507,8 +1508,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
|
||||
accessed_pages));
|
||||
}
|
||||
|
||||
static void expand_notification_block(struct mm_struct *mm,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *accessed_pages,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
@ -1543,7 +1543,7 @@ static void expand_notification_block(struct mm_struct *mm,
|
||||
// which received the notification if the memory was already migrated before
|
||||
// acquiring the locks either during the servicing of previous notifications
|
||||
// or during faults or because of explicit migrations or if the VA range was
|
||||
// freed after receving the notification. Return NV_OK in such cases.
|
||||
// freed after receiving the notification. Return NV_OK in such cases.
|
||||
if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
|
||||
return;
|
||||
|
||||
@ -1578,14 +1578,14 @@ static void expand_notification_block(struct mm_struct *mm,
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
NvU32 i = index;
|
||||
NvU32 i;
|
||||
NvU32 flags = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS flags_status;
|
||||
@ -1595,7 +1595,7 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(i < batch_context->virt.num_notifications);
|
||||
UVM_ASSERT(index < batch_context->virt.num_notifications);
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
@ -1603,28 +1603,25 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
while (i < batch_context->virt.num_notifications) {
|
||||
for (i = index; i < batch_context->virt.num_notifications; i++) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU64 address = current_entry->address.address;
|
||||
|
||||
if ((current_entry->virtual_info.va_space != va_space) || (address > va_block->end)) {
|
||||
*out_index = i;
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
|
||||
expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
expand_notification_block(mm, gpu_va_space, va_block, accessed_pages, current_entry);
|
||||
|
||||
i++;
|
||||
*out_index = i;
|
||||
}
|
||||
|
||||
// Atleast one notification should have been processed.
|
||||
UVM_ASSERT(index < *out_index);
|
||||
|
||||
status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
// Atleast one notification should have been processed.
|
||||
UVM_ASSERT(index < *out_index);
|
||||
|
||||
if (status == NV_OK)
|
||||
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
@ -1636,62 +1633,154 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications_batch(struct mm_struct *mm,
|
||||
uvm_gpu_va_space_t *gpu_va_space,
|
||||
static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
|
||||
NvU32 i;
|
||||
NvU64 base;
|
||||
NvU64 end;
|
||||
NvU64 address;
|
||||
NvU32 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
NV_STATUS status = NV_OK;
|
||||
NV_STATUS flags_status;
|
||||
struct vm_area_struct *vma = NULL;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
|
||||
|
||||
UVM_ASSERT(index < batch_context->virt.num_notifications);
|
||||
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
address = notifications[index]->address.address;
|
||||
|
||||
vma = find_vma_intersection(mm, address, address + 1);
|
||||
if (!vma) {
|
||||
// Clear the notification entry to continue receiving access counter
|
||||
// notifications when a new VMA is allocated in this range.
|
||||
status = notify_tools_and_process_flags(gpu, ¬ifications[index], 1, flags);
|
||||
*out_index = index + 1;
|
||||
return status;
|
||||
}
|
||||
|
||||
base = UVM_VA_BLOCK_ALIGN_DOWN(address);
|
||||
end = min(base + UVM_VA_BLOCK_SIZE, (NvU64)vma->vm_end);
|
||||
|
||||
uvm_page_mask_zero(&ats_context->accessed_mask);
|
||||
|
||||
for (i = index; i < batch_context->virt.num_notifications; i++) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
address = current_entry->address.address;
|
||||
|
||||
if ((current_entry->virtual_info.va_space == va_space) && (address < end))
|
||||
uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
|
||||
else
|
||||
break;
|
||||
}
|
||||
|
||||
*out_index = i;
|
||||
|
||||
// Atleast one notification should have been processed.
|
||||
UVM_ASSERT(index < *out_index);
|
||||
|
||||
// TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
|
||||
// location is set
|
||||
// If no pages were actually migrated, don't clear the access counters.
|
||||
status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
|
||||
if (status != NV_OK)
|
||||
flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
flags_status = notify_tools_and_process_flags(gpu, ¬ifications[index], *out_index - index, flags);
|
||||
if ((status == NV_OK) && (flags_status != NV_OK))
|
||||
status = flags_status;
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
|
||||
NvU64 address = current_entry->address.address;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
if (mm)
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
// Virtual address notifications are always 64K aligned
|
||||
UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));
|
||||
|
||||
// TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
|
||||
// migrations for virtual notifications on configs with
|
||||
// 4KB page size
|
||||
status = uvm_va_block_find(va_space, address, &va_block);
|
||||
if ((status == NV_OK) && !uvm_va_block_is_hmm(va_block)) {
|
||||
va_range = uvm_va_range_find(va_space, address);
|
||||
if (va_range) {
|
||||
// Avoid clearing the entry by default.
|
||||
NvU32 flags = 0;
|
||||
uvm_va_block_t *va_block = NULL;
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
if (va_range->type == UVM_VA_RANGE_TYPE_MANAGED) {
|
||||
size_t index = uvm_va_range_block_index(va_range, address);
|
||||
|
||||
status = service_virt_notifications_in_block(mm, gpu_va_space, va_block, batch_context, index, out_index);
|
||||
va_block = uvm_va_range_block(va_range, index);
|
||||
|
||||
// If the va_range is a managed range, the notification belongs to a
|
||||
// recently freed va_range if va_block is NULL. If va_block is not
|
||||
// NULL, service_virt_notifications_in_block will process flags.
|
||||
// Clear the notification entry to continue receiving notifications
|
||||
// when a new va_range is allocated in that region.
|
||||
flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
}
|
||||
|
||||
if (va_block) {
|
||||
status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
|
||||
}
|
||||
else {
|
||||
NvU32 flags = 0;
|
||||
status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
|
||||
*out_index = index + 1;
|
||||
}
|
||||
}
|
||||
else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
status = service_virt_notification_ats(gpu_va_space, mm, batch_context, index, out_index);
|
||||
}
|
||||
else {
|
||||
NvU32 flags;
|
||||
uvm_va_block_t *va_block = NULL;
|
||||
|
||||
status = uvm_hmm_va_block_find(va_space, address, &va_block);
|
||||
|
||||
// TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
|
||||
// migrations for virtual notifications
|
||||
//
|
||||
// - If the va_block is HMM, don't clear the notification since HMM
|
||||
// migrations are currently disabled.
|
||||
//
|
||||
// - If the va_block isn't HMM, the notification belongs to a recently
|
||||
// freed va_range. Clear the notification entry to continue receiving
|
||||
// notifications when a new va_range is allocated in this region.
|
||||
flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
|
||||
UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
|
||||
(status == NV_ERR_INVALID_ADDRESS) ||
|
||||
uvm_va_block_is_hmm(va_block));
|
||||
|
||||
// NV_ERR_OBJECT_NOT_FOUND is returned if the VA range is valid but no
|
||||
// VA block has been allocated yet. This can happen if there are stale
|
||||
// notifications in the batch. A new VA range may have been allocated in
|
||||
// that range. So, clear the notification entry to continue getting
|
||||
// notifications for the new VA range.
|
||||
if (status == NV_ERR_OBJECT_NOT_FOUND)
|
||||
flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
|
||||
// Clobber status to continue processing the rest of the notifications
|
||||
// in the batch.
|
||||
status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
|
||||
|
||||
// NV_ERR_INVALID_ADDRESS is returned if the corresponding VA range
|
||||
// doesn't exist or it's not a managed range. Access counter migrations
|
||||
// are not currently supported on such ranges.
|
||||
//
|
||||
// TODO: Bug 1990466: [uvm] Use access counters to trigger migrations
|
||||
// When support for SAM migrations is addded, clear the notification
|
||||
// entry if the VA range doesn't exist in order to receive notifications
|
||||
// when a new VA range is allocated in that region.
|
||||
status = notify_tools_and_process_flags(gpu_va_space->gpu, &batch_context->virt.notifications[index], 1, flags);
|
||||
*out_index = index + 1;
|
||||
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
return status;
|
||||
@ -1745,7 +1834,7 @@ static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
|
||||
}
|
||||
|
||||
if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
|
||||
status = service_virt_notifications_batch(mm, gpu_va_space, batch_context, i, &i);
|
||||
status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
|
||||
|
@ -1632,23 +1632,23 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
|
||||
const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
|
||||
const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
|
||||
uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
|
||||
uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
|
||||
uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
|
||||
ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC;
|
||||
|
||||
uvm_page_mask_or(faulted_mask, write_fault_mask, read_fault_mask);
|
||||
uvm_page_mask_or(accessed_mask, write_fault_mask, read_fault_mask);
|
||||
|
||||
status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);
|
||||
|
||||
// Remove prefetched pages from the serviced mask since fault servicing
|
||||
// failures belonging to prefetch pages need to be ignored.
|
||||
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, faulted_mask);
|
||||
uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);
|
||||
|
||||
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, faulted_mask));
|
||||
UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, accessed_mask));
|
||||
|
||||
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, faulted_mask)) {
|
||||
if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, accessed_mask)) {
|
||||
(*block_faults) += (fault_index_end - fault_index_start);
|
||||
return status;
|
||||
}
|
||||
|
@ -114,6 +114,16 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
|
||||
#define UVM_IS_CONFIG_HMM() 0
|
||||
#endif
|
||||
|
||||
// ATS prefetcher uses hmm_range_fault() to query residency information.
|
||||
// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
|
||||
// of memory regions while hmm_range_fault() is being called, MMU interval
|
||||
// notifiers are needed.
|
||||
#if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
|
||||
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 1
|
||||
#else
|
||||
#define UVM_HMM_RANGE_FAULT_SUPPORTED() 0
|
||||
#endif
|
||||
|
||||
// Various issues prevent us from using mmu_notifiers in older kernels. These
|
||||
// include:
|
||||
// - ->release being called under RCU instead of SRCU: fixed by commit
|
||||
|
@ -280,7 +280,9 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
|
||||
}
|
||||
}
|
||||
|
||||
if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
|
||||
if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
|
||||
uvm_va_space_pageable_mem_access_supported(va_space)) {
|
||||
|
||||
#if UVM_CAN_USE_MMU_NOTIFIERS()
|
||||
// Initialize MMU interval notifiers for this process. This allows
|
||||
// mmu_interval_notifier_insert() to be called without holding the
|
||||
|
@ -56,7 +56,11 @@
|
||||
#include "nv-pat.h"
|
||||
#include "nv-dmabuf.h"
|
||||
|
||||
#if !defined(CONFIG_RETPOLINE)
|
||||
/*
|
||||
* Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
|
||||
* CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
|
||||
*/
|
||||
#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
|
||||
#include "nv-retpoline.h"
|
||||
#endif
|
||||
|
||||
|
@ -250,6 +250,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
|
||||
NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present
|
||||
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
|
||||
NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -2130,6 +2130,8 @@ static int os_numa_verify_gpu_memory_zone(struct notifier_block *nb,
|
||||
return NOTIFY_OK;
|
||||
}
|
||||
|
||||
#define ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS 4
|
||||
|
||||
NV_STATUS NV_API_CALL os_numa_add_gpu_memory
|
||||
(
|
||||
void *handle,
|
||||
@ -2143,7 +2145,12 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
|
||||
nv_linux_state_t *nvl = pci_get_drvdata(handle);
|
||||
nv_state_t *nv = NV_STATE_PTR(nvl);
|
||||
NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa;
|
||||
int ret;
|
||||
int ret = 0;
|
||||
NvU64 memblock_size;
|
||||
NvU64 size_remaining;
|
||||
NvU64 calculated_segment_size;
|
||||
NvU64 segment_size;
|
||||
NvU64 segment_base;
|
||||
os_numa_gpu_mem_hotplug_notifier_t notifier =
|
||||
{
|
||||
.start_pa = base,
|
||||
@ -2176,11 +2183,49 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
|
||||
goto failed;
|
||||
}
|
||||
|
||||
//
|
||||
// Adding all memory at once can take a long time. Split up memory into segments
|
||||
// with schedule() in between to prevent soft lockups. Memory segments for
|
||||
// add_memory_driver_managed() need to be aligned to memblock size.
|
||||
//
|
||||
// If there are any issues splitting into segments, then add all memory at once.
|
||||
//
|
||||
if (os_numa_memblock_size(&memblock_size) == NV_OK)
|
||||
{
|
||||
calculated_segment_size = NV_ALIGN_UP(size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Don't split into segments, add all memory at once
|
||||
calculated_segment_size = size;
|
||||
}
|
||||
|
||||
segment_size = calculated_segment_size;
|
||||
segment_base = base;
|
||||
size_remaining = size;
|
||||
|
||||
while ((size_remaining > 0) &&
|
||||
(ret == 0))
|
||||
{
|
||||
if (segment_size > size_remaining)
|
||||
{
|
||||
segment_size = size_remaining;
|
||||
}
|
||||
|
||||
#ifdef NV_ADD_MEMORY_DRIVER_MANAGED_HAS_MHP_FLAGS_ARG
|
||||
ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)", MHP_NONE);
|
||||
ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)", MHP_NONE);
|
||||
#else
|
||||
ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)");
|
||||
ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)");
|
||||
#endif
|
||||
nv_printf(NV_DBG_SETUP, "NVRM: add_memory_driver_managed() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
|
||||
ret, segment_base, segment_size);
|
||||
|
||||
segment_base += segment_size;
|
||||
size_remaining -= segment_size;
|
||||
|
||||
// Yield CPU to prevent soft lockups
|
||||
schedule();
|
||||
}
|
||||
unregister_memory_notifier(¬ifier.memory_notifier);
|
||||
|
||||
if (ret == 0)
|
||||
@ -2194,14 +2239,33 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
|
||||
zone_end_pfn(zone) != end_pfn)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: GPU memory zone movable auto onlining failed!\n");
|
||||
|
||||
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
||||
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
||||
if (offline_and_remove_memory(node, base, size) != 0)
|
||||
#else
|
||||
if (offline_and_remove_memory(base, size) != 0)
|
||||
#endif
|
||||
// Since zone movable auto onlining failed, need to remove the added memory.
|
||||
segment_size = calculated_segment_size;
|
||||
segment_base = base;
|
||||
size_remaining = size;
|
||||
|
||||
while (size_remaining > 0)
|
||||
{
|
||||
nv_printf(NV_DBG_ERRORS, "NVRM: offline_and_remove_memory failed\n");
|
||||
if (segment_size > size_remaining)
|
||||
{
|
||||
segment_size = size_remaining;
|
||||
}
|
||||
|
||||
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
||||
ret = offline_and_remove_memory(node, segment_base, segment_size);
|
||||
#else
|
||||
ret = offline_and_remove_memory(segment_base, segment_size);
|
||||
#endif
|
||||
nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
|
||||
ret, segment_base, segment_size);
|
||||
|
||||
segment_base += segment_size;
|
||||
size_remaining -= segment_size;
|
||||
|
||||
// Yield CPU to prevent soft lockups
|
||||
schedule();
|
||||
}
|
||||
#endif
|
||||
goto failed;
|
||||
@ -2221,6 +2285,77 @@ failed:
|
||||
return NV_ERR_NOT_SUPPORTED;
|
||||
}
|
||||
|
||||
|
||||
typedef struct {
|
||||
NvU64 base;
|
||||
NvU64 size;
|
||||
NvU32 nodeId;
|
||||
int ret;
|
||||
} remove_numa_memory_info_t;
|
||||
|
||||
static void offline_numa_memory_callback
|
||||
(
|
||||
void *args
|
||||
)
|
||||
{
|
||||
#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
|
||||
remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
|
||||
int ret = 0;
|
||||
NvU64 memblock_size;
|
||||
NvU64 size_remaining;
|
||||
NvU64 calculated_segment_size;
|
||||
NvU64 segment_size;
|
||||
NvU64 segment_base;
|
||||
|
||||
//
|
||||
// Removing all memory at once can take a long time. Split up memory into segments
|
||||
// with schedule() in between to prevent soft lockups. Memory segments for
|
||||
// offline_and_remove_memory() need to be aligned to memblock size.
|
||||
//
|
||||
// If there are any issues splitting into segments, then remove all memory at once.
|
||||
//
|
||||
if (os_numa_memblock_size(&memblock_size) == NV_OK)
|
||||
{
|
||||
calculated_segment_size = NV_ALIGN_UP(pNumaInfo->size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
|
||||
}
|
||||
else
|
||||
{
|
||||
// Don't split into segments, remove all memory at once
|
||||
calculated_segment_size = pNumaInfo->size;
|
||||
}
|
||||
|
||||
segment_size = calculated_segment_size;
|
||||
segment_base = pNumaInfo->base;
|
||||
size_remaining = pNumaInfo->size;
|
||||
|
||||
while (size_remaining > 0)
|
||||
{
|
||||
if (segment_size > size_remaining)
|
||||
{
|
||||
segment_size = size_remaining;
|
||||
}
|
||||
|
||||
#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
|
||||
ret = offline_and_remove_memory(pNumaInfo->nodeId,
|
||||
segment_base,
|
||||
segment_size);
|
||||
#else
|
||||
ret = offline_and_remove_memory(segment_base,
|
||||
segment_size);
|
||||
#endif
|
||||
nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
|
||||
ret, segment_base, segment_size);
|
||||
pNumaInfo->ret |= ret;
|
||||
|
||||
segment_base += segment_size;
|
||||
size_remaining -= segment_size;
|
||||
|
||||
// Yield CPU to prevent soft lockups
|
||||
schedule();
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
NV_STATUS NV_API_CALL os_numa_remove_gpu_memory
|
||||
(
|
||||
void *handle,
|
||||
|
@ -26,6 +26,12 @@
|
||||
#include "os-interface.h"
|
||||
#include "nv-linux.h"
|
||||
|
||||
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
|
||||
(defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) || \
|
||||
defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
|
||||
#define NV_NUM_PIN_PAGES_PER_ITERATION 0x80000
|
||||
#endif
|
||||
|
||||
static inline int nv_follow_pfn(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned long *pfn)
|
||||
@ -163,9 +169,15 @@ NV_STATUS NV_API_CALL os_lock_user_pages(
|
||||
NV_STATUS rmStatus;
|
||||
struct mm_struct *mm = current->mm;
|
||||
struct page **user_pages;
|
||||
NvU64 i, pinned;
|
||||
NvU64 i;
|
||||
NvU64 npages = page_count;
|
||||
NvU64 pinned = 0;
|
||||
unsigned int gup_flags = DRF_VAL(_LOCK_USER_PAGES, _FLAGS, _WRITE, flags) ? FOLL_WRITE : 0;
|
||||
int ret;
|
||||
long ret;
|
||||
|
||||
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT)
|
||||
gup_flags |= FOLL_LONGTERM;
|
||||
#endif
|
||||
|
||||
if (!NV_MAY_SLEEP())
|
||||
{
|
||||
@ -185,16 +197,51 @@ NV_STATUS NV_API_CALL os_lock_user_pages(
|
||||
|
||||
nv_mmap_read_lock(mm);
|
||||
ret = NV_PIN_USER_PAGES((unsigned long)address,
|
||||
page_count, gup_flags, user_pages, NULL);
|
||||
nv_mmap_read_unlock(mm);
|
||||
pinned = ret;
|
||||
|
||||
if (ret < 0)
|
||||
npages, gup_flags, user_pages, NULL);
|
||||
if (ret > 0)
|
||||
{
|
||||
os_free_mem(user_pages);
|
||||
return NV_ERR_INVALID_ADDRESS;
|
||||
pinned = ret;
|
||||
}
|
||||
else if (pinned < page_count)
|
||||
#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
|
||||
(defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) || \
|
||||
defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
|
||||
//
|
||||
// NV_PIN_USER_PAGES() passes in NULL for the vmas parameter (if required)
|
||||
// in pin_user_pages() (or get_user_pages() if pin_user_pages() does not
|
||||
// exist). For kernels which do not contain the commit 52650c8b466b
|
||||
// (mm/gup: remove the vma allocation from gup_longterm_locked()), if
|
||||
// FOLL_LONGTERM is passed in, this results in the kernel trying to kcalloc
|
||||
// the vmas array, and since the limit for kcalloc is 4 MB, it results in
|
||||
// NV_PIN_USER_PAGES() failing with ENOMEM if more than
|
||||
// NV_NUM_PIN_PAGES_PER_ITERATION pages are requested on 64-bit systems.
|
||||
//
|
||||
// As a workaround, if we requested more than
|
||||
// NV_NUM_PIN_PAGES_PER_ITERATION pages and failed with ENOMEM, try again
|
||||
// with multiple calls of NV_NUM_PIN_PAGES_PER_ITERATION pages at a time.
|
||||
//
|
||||
else if ((ret == -ENOMEM) &&
|
||||
(page_count > NV_NUM_PIN_PAGES_PER_ITERATION))
|
||||
{
|
||||
for (pinned = 0; pinned < page_count; pinned += ret)
|
||||
{
|
||||
npages = page_count - pinned;
|
||||
if (npages > NV_NUM_PIN_PAGES_PER_ITERATION)
|
||||
{
|
||||
npages = NV_NUM_PIN_PAGES_PER_ITERATION;
|
||||
}
|
||||
|
||||
ret = NV_PIN_USER_PAGES(((unsigned long) address) + (pinned * PAGE_SIZE),
|
||||
npages, gup_flags, &user_pages[pinned], NULL);
|
||||
if (ret <= 0)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
nv_mmap_read_unlock(mm);
|
||||
|
||||
if (pinned < page_count)
|
||||
{
|
||||
for (i = 0; i < pinned; i++)
|
||||
NV_UNPIN_USER_PAGE(user_pages[i]);
|
||||
|
@ -36,25 +36,25 @@
|
||||
// and then checked back in. You cannot make changes to these sections without
|
||||
// corresponding changes to the buildmeister script
|
||||
#ifndef NV_BUILD_BRANCH
|
||||
#define NV_BUILD_BRANCH r538_27
|
||||
#define NV_BUILD_BRANCH r538_49
|
||||
#endif
|
||||
#ifndef NV_PUBLIC_BRANCH
|
||||
#define NV_PUBLIC_BRANCH r538_27
|
||||
#define NV_PUBLIC_BRANCH r538_49
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_27-451"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33992350)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r535/r538_49-495"
|
||||
#define NV_BUILD_CHANGELIST_NUM (34058561)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r535/r538_27-451"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33992350)
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r535/r538_49-495"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34058561)
|
||||
|
||||
#else /* Windows builds */
|
||||
#define NV_BUILD_BRANCH_VERSION "r538_27-6"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33992350)
|
||||
#define NV_BUILD_BRANCH_VERSION "r538_49-2"
|
||||
#define NV_BUILD_CHANGELIST_NUM (34058561)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "538.46"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33992350)
|
||||
#define NV_BUILD_NAME "538.52"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34058561)
|
||||
#define NV_BUILD_BRANCH_BASE_VERSION R535
|
||||
#endif
|
||||
// End buildmeister python edited section
|
||||
|
@ -4,7 +4,7 @@
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
||||
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
|
||||
|
||||
#define NV_VERSION_STRING "535.161.08"
|
||||
#define NV_VERSION_STRING "535.171.04"
|
||||
|
||||
#else
|
||||
|
||||
|
@ -95,6 +95,7 @@ endif
|
||||
ifeq ($(TARGET_ARCH),aarch64)
|
||||
CFLAGS += -mgeneral-regs-only
|
||||
CFLAGS += -march=armv8-a
|
||||
CFLAGS += -ffixed-x18
|
||||
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
|
||||
endif
|
||||
|
||||
|
@ -38,7 +38,7 @@ void nvUpdateHdmiInfoFrames(const NVDispEvoRec *pDispEvo,
|
||||
|
||||
void nvDpyUpdateHdmiPreModesetEvo(NVDpyEvoPtr pDpyEvo);
|
||||
void nvDpyUpdateHdmiVRRCaps(NVDpyEvoPtr pDpyEvo);
|
||||
void nvUpdateHdmiCaps(NVDpyEvoPtr pDpyEvo);
|
||||
void nvSendHdmiCapsToRm(NVDpyEvoPtr pDpyEvo);
|
||||
|
||||
void nvLogEdidCea861InfoEvo(NVDpyEvoPtr pDpyEvo,
|
||||
NVEvoInfoStringPtr pInfoString);
|
||||
|
@ -71,7 +71,7 @@ static NvBool ValidateEdid (const NVDpyEvoRec *pDpyEvo,
|
||||
const NvBool ignoreEdidChecksum);
|
||||
static void LogEdid (NVDpyEvoPtr pDpyEvo,
|
||||
NVEvoInfoStringPtr pInfoString);
|
||||
static void ClearEdid (NVDpyEvoPtr pDpyEvo);
|
||||
static void ClearEdid (NVDpyEvoPtr pDpyEvo, const NvBool bSendHdmiCapsToRm);
|
||||
static void ClearCustomEdid (const NVDpyEvoRec *pDpyEvo);
|
||||
static void WriteEdidToResman (const NVDpyEvoRec *pDpyEvo,
|
||||
const NVEdidRec *pEdid);
|
||||
@ -90,14 +90,14 @@ static void AssignDpyEvoName (NVDpyEvoPtr pDpyEvo);
|
||||
static NvBool IsConnectorTMDS (NVConnectorEvoPtr);
|
||||
|
||||
|
||||
static void DpyDisconnectEvo(NVDpyEvoPtr pDpyEvo)
|
||||
static void DpyDisconnectEvo(NVDpyEvoPtr pDpyEvo, const NvBool bSendHdmiCapsToRm)
|
||||
{
|
||||
NVDispEvoPtr pDispEvo = pDpyEvo->pDispEvo;
|
||||
|
||||
pDispEvo->connectedDisplays =
|
||||
nvDpyIdListMinusDpyId(pDispEvo->connectedDisplays, pDpyEvo->id);
|
||||
|
||||
ClearEdid(pDpyEvo);
|
||||
ClearEdid(pDpyEvo, bSendHdmiCapsToRm);
|
||||
}
|
||||
|
||||
static NvBool DpyConnectEvo(
|
||||
@ -351,6 +351,7 @@ static void ApplyNewEdid(
|
||||
NVDpyEvoPtr pDpyEvo,
|
||||
const NVEdidRec *pEdid,
|
||||
const NVParsedEdidEvoRec *pParsedEdid,
|
||||
const NvBool bSendHdmiCapsToRm,
|
||||
NVEvoInfoStringPtr pInfoString)
|
||||
{
|
||||
if (pDpyEvo->edid.buffer != NULL) {
|
||||
@ -392,7 +393,9 @@ static void ApplyNewEdid(
|
||||
DpyAssignColorSpaceCaps(pDpyEvo, pInfoString);
|
||||
}
|
||||
|
||||
nvUpdateHdmiCaps(pDpyEvo);
|
||||
if (bSendHdmiCapsToRm) {
|
||||
nvSendHdmiCapsToRm(pDpyEvo);
|
||||
}
|
||||
|
||||
nvDpyProbeMaxPixelClock(pDpyEvo);
|
||||
|
||||
@ -574,7 +577,8 @@ static void ReadAndApplyEdidEvo(
|
||||
* worrying that this request has different parameters (like CustomEdid
|
||||
* or mode validation overrides).
|
||||
*/
|
||||
ApplyNewEdid(pDpyEvo, &edid, pParsedEdid, &infoString);
|
||||
ApplyNewEdid(pDpyEvo, &edid, pParsedEdid, TRUE /* bSendHdmiCapsToRm */,
|
||||
&infoString);
|
||||
} else {
|
||||
nvFree(edid.buffer);
|
||||
}
|
||||
@ -1844,14 +1848,15 @@ static void LogEdid(NVDpyEvoPtr pDpyEvo, NVEvoInfoStringPtr pInfoString)
|
||||
* structure.
|
||||
*/
|
||||
|
||||
static void ClearEdid(NVDpyEvoPtr pDpyEvo)
|
||||
static void ClearEdid(NVDpyEvoPtr pDpyEvo, const NvBool bSendHdmiCapsToRm)
|
||||
{
|
||||
NVEdidRec edid = { };
|
||||
NVEvoInfoStringRec infoString;
|
||||
nvInitInfoString(&infoString, NULL, 0);
|
||||
|
||||
if (EdidHasChanged(pDpyEvo, &edid, NULL)) {
|
||||
ApplyNewEdid(pDpyEvo, &edid, NULL, &infoString);
|
||||
ApplyNewEdid(pDpyEvo, &edid, NULL,
|
||||
bSendHdmiCapsToRm, &infoString);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2283,7 +2288,7 @@ NVDpyEvoPtr nvAllocDpyEvo(NVDispEvoPtr pDispEvo,
|
||||
|
||||
void nvFreeDpyEvo(NVDispEvoPtr pDispEvo, NVDpyEvoPtr pDpyEvo)
|
||||
{
|
||||
DpyDisconnectEvo(pDpyEvo);
|
||||
DpyDisconnectEvo(pDpyEvo, FALSE /* bSendHdmiCapsToRm */);
|
||||
|
||||
// Let the DP library host implementation handle deleting a pDpy as if the
|
||||
// library had notified it of a lost device.
|
||||
@ -2826,7 +2831,7 @@ NvBool nvDpyGetDynamicData(
|
||||
return FALSE;
|
||||
}
|
||||
} else {
|
||||
DpyDisconnectEvo(pDpyEvo);
|
||||
DpyDisconnectEvo(pDpyEvo, TRUE /* bSendHdmiCapsToRm */);
|
||||
}
|
||||
|
||||
if (nvConnectorUsesDPLib(pConnectorEvo)) {
|
||||
|
@ -8602,6 +8602,7 @@ void nvEvoEnableMergeModePreModeset(NVDispEvoRec *pDispEvo,
|
||||
pHC->serverLock = NV_EVO_RASTER_LOCK;
|
||||
pHC->serverLockPin = NV_EVO_LOCK_PIN_INTERNAL(primaryHead);
|
||||
pHC->setLockOffsetX = TRUE;
|
||||
pHC->crashLockUnstallMode = FALSE;
|
||||
} else {
|
||||
pHC->clientLock = NV_EVO_RASTER_LOCK;
|
||||
pHC->clientLockPin = NV_EVO_LOCK_PIN_INTERNAL(primaryHead);
|
||||
@ -8612,11 +8613,10 @@ void nvEvoEnableMergeModePreModeset(NVDispEvoRec *pDispEvo,
|
||||
} else {
|
||||
pHC->clientLockoutWindow = 2;
|
||||
}
|
||||
pHC->crashLockUnstallMode =
|
||||
(pTimings->vrr.type != NVKMS_DPY_VRR_TYPE_NONE);
|
||||
}
|
||||
|
||||
if (pTimings->vrr.type != NVKMS_DPY_VRR_TYPE_NONE) {
|
||||
pHC->crashLockUnstallMode = TRUE;
|
||||
}
|
||||
pHC->stereoLocked = FALSE;
|
||||
|
||||
EvoUpdateHeadParams(pDispEvo, head, pUpdateState);
|
||||
|
@ -6639,11 +6639,18 @@ static void EvoSetStallLockC3(NVDispEvoPtr pDispEvo, const int head,
|
||||
NVEvoChannelPtr pChannel = pDevEvo->core;
|
||||
NVEvoSubDevPtr pEvoSubDev = &pDevEvo->gpus[pDispEvo->displayOwner];
|
||||
NVEvoHeadControlPtr pHC = &pEvoSubDev->headControl[head];
|
||||
NvU32 data = 0x0;
|
||||
|
||||
nvUpdateUpdateState(pDevEvo, updateState, pChannel);
|
||||
|
||||
if (pHC->crashLockUnstallMode) {
|
||||
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _CRASH_LOCK);
|
||||
} else {
|
||||
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _LINE_LOCK);
|
||||
}
|
||||
|
||||
if (enable) {
|
||||
NvU32 data = DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _TRUE) |
|
||||
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _TRUE) |
|
||||
DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _MODE, _ONE_SHOT);
|
||||
|
||||
if (!pHC->useStallLockPin) {
|
||||
@ -6657,20 +6664,12 @@ static void EvoSetStallLockC3(NVDispEvoPtr pDispEvo, const int head,
|
||||
data |= DRF_NUM(C37D, _HEAD_SET_STALL_LOCK, _LOCK_PIN,
|
||||
NVC37D_HEAD_SET_STALL_LOCK_LOCK_PIN_LOCK_PIN(pin));
|
||||
}
|
||||
|
||||
if (pHC->crashLockUnstallMode) {
|
||||
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _CRASH_LOCK);
|
||||
} else {
|
||||
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _LINE_LOCK);
|
||||
data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _FALSE);
|
||||
}
|
||||
|
||||
nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_STALL_LOCK(head), 1);
|
||||
nvDmaSetEvoMethodData(pChannel, data);
|
||||
} else {
|
||||
nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_STALL_LOCK(head), 1);
|
||||
nvDmaSetEvoMethodData(pChannel,
|
||||
DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _FALSE));
|
||||
}
|
||||
}
|
||||
|
||||
static NvBool GetChannelState(NVDevEvoPtr pDevEvo,
|
||||
|
@ -203,7 +203,7 @@ NvBool nvDpyIsHdmiEvo(const NVDpyEvoRec *pDpyEvo)
|
||||
/*!
|
||||
* Updates the display's HDMI 2.0 capabilities to the RM.
|
||||
*/
|
||||
void nvUpdateHdmiCaps(NVDpyEvoPtr pDpyEvo)
|
||||
void nvSendHdmiCapsToRm(NVDpyEvoPtr pDpyEvo)
|
||||
{
|
||||
NV0073_CTRL_SPECIFIC_SET_HDMI_SINK_CAPS_PARAMS params = { 0 };
|
||||
NVParsedEdidEvoPtr pParsedEdid = &pDpyEvo->parsedEdid;
|
||||
@ -221,7 +221,7 @@ void nvUpdateHdmiCaps(NVDpyEvoPtr pDpyEvo)
|
||||
params.caps = 0;
|
||||
|
||||
/*
|
||||
* nvUpdateHdmiCaps() gets called on dpy's connect/disconnect events
|
||||
* nvSendHdmiCapsToRm() gets called on dpy's connect/disconnect events
|
||||
* to set/clear capabilities, clear capabilities if parsed edid
|
||||
* is not valid.
|
||||
*/
|
||||
|
@ -91,6 +91,7 @@ ifeq ($(TARGET_ARCH),aarch64)
|
||||
CFLAGS += -mgeneral-regs-only
|
||||
CFLAGS += -march=armv8-a
|
||||
CFLAGS += -mstrict-align
|
||||
CFLAGS += -ffixed-x18
|
||||
CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
|
||||
endif
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -77,6 +77,9 @@
|
||||
#define NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(i) \
|
||||
((NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(i)) / 2)
|
||||
|
||||
// First index of doorbell which is controlled by VF
|
||||
#define NV_CTRL_INTR_GPU_DOORBELL_INDEX_VF_START 2048
|
||||
|
||||
// The max number of leaf registers we expect
|
||||
#define NV_MAX_INTR_LEAVES 16
|
||||
|
||||
|
@ -1042,6 +1042,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
|
||||
{ 0x28A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
|
||||
{ 0x28A1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
|
||||
{ 0x28B8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Laptop GPU" },
|
||||
{ 0x28B9, 0x0000, 0x0000, "NVIDIA RTX 1000 Ada Generation Laptop GPU" },
|
||||
{ 0x28BB, 0x0000, 0x0000, "NVIDIA RTX 500 Ada Generation Laptop GPU" },
|
||||
{ 0x28E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
|
||||
{ 0x28E1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
|
||||
{ 0x28F8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Embedded GPU" },
|
||||
|
@ -7,7 +7,7 @@ extern "C" {
|
||||
#endif
|
||||
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
|
@ -103,4 +103,24 @@ typedef struct MESSAGE_QUEUE_COLLECTION
|
||||
#define GSP_MSG_QUEUE_HEADER_SIZE RM_PAGE_SIZE
|
||||
#define GSP_MSG_QUEUE_HEADER_ALIGN 4 // 2 ^ 4 = 16
|
||||
|
||||
/*!
|
||||
* Calculate 32-bit checksum
|
||||
*
|
||||
* This routine assumes that the data is padded out with zeros to the next
|
||||
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
|
||||
*/
|
||||
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
|
||||
{
|
||||
NvU64 *p = (NvU64 *)pData;
|
||||
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
|
||||
NvU64 checkSum = 0;
|
||||
|
||||
NV_ASSERT_CHECKED(uLen > 0);
|
||||
|
||||
while (p < pEnd)
|
||||
checkSum ^= *p++;
|
||||
|
||||
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
|
||||
}
|
||||
|
||||
#endif // _MESSAGE_QUEUE_PRIV_H_
|
||||
|
@ -244,32 +244,50 @@ kfspPollForQueueEmpty_IMPL
|
||||
KernelFsp *pKernelFsp
|
||||
)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
RMTIMEOUT timeout;
|
||||
|
||||
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_OSTIMER | GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
|
||||
gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
|
||||
GPU_TIMEOUT_FLAGS_OSTIMER |
|
||||
GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
|
||||
|
||||
while (!kfspIsQueueEmpty(pGpu, pKernelFsp))
|
||||
{
|
||||
//
|
||||
// For now we assume that any response from FSP before RM message send is complete
|
||||
// indicates an error and we should abort.
|
||||
// For now we assume that any response from FSP before RM message
|
||||
// send is complete indicates an error and we should abort.
|
||||
//
|
||||
// Ongoing dicussion on usefullness of this check. Bug to be filed.
|
||||
//
|
||||
if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp))
|
||||
{
|
||||
kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
|
||||
NV_PRINTF(LEVEL_ERROR, "Received error message from FSP while waiting for CMDQ to be empty.\n");
|
||||
return NV_ERR_GENERIC;
|
||||
NV_PRINTF(LEVEL_ERROR,
|
||||
"Received error message from FSP while waiting for CMDQ to be empty.\n");
|
||||
status = NV_ERR_GENERIC;
|
||||
break;
|
||||
}
|
||||
|
||||
if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Timed out waiting for FSP command queue to be empty.\n");
|
||||
return NV_ERR_TIMEOUT;
|
||||
}
|
||||
osSpinLoop();
|
||||
|
||||
status = gpuCheckTimeout(pGpu, &timeout);
|
||||
if (status != NV_OK)
|
||||
{
|
||||
if ((status == NV_ERR_TIMEOUT) &&
|
||||
kfspIsQueueEmpty(pGpu, pKernelFsp))
|
||||
{
|
||||
status = NV_OK;
|
||||
}
|
||||
else
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR,
|
||||
"Timed out waiting for FSP command queue to be empty.\n");
|
||||
}
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
return status;
|
||||
}
|
||||
|
||||
/*!
|
||||
|
@ -476,24 +476,6 @@ void GspMsgQueuesCleanup(MESSAGE_QUEUE_COLLECTION **ppMQCollection)
|
||||
*ppMQCollection = NULL;
|
||||
}
|
||||
|
||||
/*!
|
||||
* Calculate 32-bit checksum
|
||||
*
|
||||
* This routine assumes that the data is padded out with zeros to the next
|
||||
* 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
|
||||
*/
|
||||
static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
|
||||
{
|
||||
NvU64 *p = (NvU64 *)pData;
|
||||
NvU64 *pEnd = (NvU64 *)((NvUPtr)pData + uLen);
|
||||
NvU64 checkSum = 0;
|
||||
|
||||
while (p < pEnd)
|
||||
checkSum ^= *p++;
|
||||
|
||||
return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
|
||||
}
|
||||
|
||||
/*!
|
||||
* GspMsgQueueSendCommand
|
||||
*
|
||||
@ -533,7 +515,7 @@ NV_STATUS GspMsgQueueSendCommand(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
||||
|
||||
pCQE->seqNum = pMQI->txSeqNum;
|
||||
pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize);
|
||||
pCQE->checkSum = 0;
|
||||
pCQE->checkSum = 0; // The checkSum field is included in the checksum calculation, so zero it.
|
||||
|
||||
ConfidentialCompute *pCC = GPU_GET_CONF_COMPUTE(pGpu);
|
||||
if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_ENABLED))
|
||||
@ -660,7 +642,8 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
||||
NvU32 nRetries;
|
||||
NvU32 nMaxRetries = 3;
|
||||
NvU32 nElements = 1; // Assume record fits in one queue element for now.
|
||||
NvU32 uElementSize = 0;
|
||||
NvU32 uElementSize;
|
||||
NvU32 checkSum;
|
||||
NvU32 seqMismatchDiff = NV_U32_MAX;
|
||||
NV_STATUS nvStatus = NV_OK;
|
||||
ConfidentialCompute *pCC = NULL;
|
||||
@ -713,15 +696,23 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
|
||||
pCC = GPU_GET_CONF_COMPUTE(pGpu);
|
||||
if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_READY))
|
||||
{
|
||||
// In Confidential Compute scenario, checksum includes complete element range.
|
||||
if (_checkSum32(pMQI->pCmdQueueElement, (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN)) != 0)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
|
||||
nvStatus = NV_ERR_INVALID_DATA;
|
||||
continue;
|
||||
}
|
||||
//
|
||||
// In the Confidential Compute scenario, the actual message length
|
||||
// is inside the encrypted payload, and we can't access it before
|
||||
// decryption, therefore the checksum encompasses the whole element
|
||||
// range. This makes checksum verification significantly slower
|
||||
// because messages are typically much smaller than element size.
|
||||
//
|
||||
checkSum = _checkSum32(pMQI->pCmdQueueElement,
|
||||
(nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN));
|
||||
} else
|
||||
if (_checkSum32(pMQI->pCmdQueueElement, uElementSize) != 0)
|
||||
{
|
||||
checkSum = _checkSum32(pMQI->pCmdQueueElement,
|
||||
(GSP_MSG_QUEUE_ELEMENT_HDR_SIZE +
|
||||
pMQI->pCmdQueueElement->rpc.length));
|
||||
}
|
||||
|
||||
if (checkSum != 0)
|
||||
{
|
||||
NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
|
||||
nvStatus = NV_ERR_INVALID_DATA;
|
||||
|
@ -1587,6 +1587,7 @@ memdescFree
|
||||
}
|
||||
|
||||
if (pMemDesc->_addressSpace != ADDR_FBMEM &&
|
||||
pMemDesc->_addressSpace != ADDR_EGM &&
|
||||
pMemDesc->_addressSpace != ADDR_SYSMEM)
|
||||
{
|
||||
return;
|
||||
@ -1991,6 +1992,7 @@ memdescUnmap
|
||||
switch (pMemDesc->_addressSpace)
|
||||
{
|
||||
case ADDR_SYSMEM:
|
||||
case ADDR_EGM:
|
||||
{
|
||||
osUnmapSystemMemory(pMemDesc, Kernel, ProcessId, Address, Priv);
|
||||
break;
|
||||
|
@ -733,8 +733,9 @@ memUnmap_IMPL
|
||||
//
|
||||
}
|
||||
// System Memory case
|
||||
else if ((pGpu == NULL) || ((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) &&
|
||||
FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
|
||||
else if ((pGpu == NULL) || (((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM)
|
||||
|| (memdescGetAddressSpace(pMemDesc) == ADDR_EGM)
|
||||
) && FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
|
||||
{
|
||||
if (FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags))
|
||||
{
|
||||
|
@ -1,4 +1,4 @@
|
||||
NVIDIA_VERSION = 535.161.08
|
||||
NVIDIA_VERSION = 535.171.04
|
||||
|
||||
# This file.
|
||||
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))
|
||||
|
Loading…
x
Reference in New Issue
Block a user