535.171.04

2025-02-27 09:54:14 +01:00 · 2024-03-21 14:22:31 +01:00 · 2024-03-21 14:22:31 +01:00 · c042c7903d
commit c042c7903d
parent 044f70bbb8
36 changed files with 691 additions and 265 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -2,6 +2,8 @@

 ## Release 535 Entries

+### [535.171.04] 2024-03-21
+
 ### [535.161.08] 2024-03-18

 ### [535.161.07] 2024-02-22
--- a/README.md
+++ b/README.md
@ -1,7 +1,7 @@
 # NVIDIA Linux Open GPU Kernel Module Source

 This is the source release of the NVIDIA Linux open GPU kernel modules,
-version 535.161.08.
+version 535.171.04.


 ## How to Build
@ -17,7 +17,7 @@ as root:

 Note that the kernel modules built here must be used with GSP
 firmware and user-space NVIDIA GPU driver components from a corresponding
-535.161.08 driver release.  This can be achieved by installing
+535.171.04 driver release.  This can be achieved by installing
 the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
 option.  E.g.,

@ -180,7 +180,7 @@ software applications.
 ## Compatible GPUs

 The open-gpu-kernel-modules can be used on any Turing or later GPU
-(see the table below). However, in the 535.161.08 release,
+(see the table below). However, in the 535.171.04 release,
 GeForce and Workstation support is still considered alpha-quality.

 To enable use of the open kernel modules on GeForce and Workstation GPUs,
@ -188,7 +188,7 @@ set the "NVreg_OpenRmEnableUnsupportedGpus" nvidia.ko kernel module
 parameter to 1. For more details, see the NVIDIA GPU driver end user
 README here:

-https://us.download.nvidia.com/XFree86/Linux-x86_64/535.161.08/README/kernel_open.html
+https://us.download.nvidia.com/XFree86/Linux-x86_64/535.171.04/README/kernel_open.html

 In the below table, if three IDs are listed, the first is the PCI Device 
 ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
@ -892,6 +892,8 @@ Subsystem Device ID.
 | NVIDIA GeForce RTX 4060 Laptop GPU              | 28A0           |
 | NVIDIA GeForce RTX 4050 Laptop GPU              | 28A1           |
 | NVIDIA RTX 2000 Ada Generation Laptop GPU       | 28B8           |
+| NVIDIA RTX 1000 Ada Generation Laptop GPU       | 28B9           |
+| NVIDIA RTX 500 Ada Generation Laptop GPU        | 28BB           |
 | NVIDIA GeForce RTX 4060 Laptop GPU              | 28E0           |
 | NVIDIA GeForce RTX 4050 Laptop GPU              | 28E1           |
 | NVIDIA RTX 2000 Ada Generation Embedded GPU     | 28F8           |
--- a/kernel-open/Kbuild
+++ b/kernel-open/Kbuild
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
 EXTRA_CFLAGS += -I$(src)
 EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
 EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
-EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.161.08\"
+EXTRA_CFLAGS += -DNV_VERSION_STRING=\"535.171.04\"

 ifneq ($(SYSSRCHOST1X),)
 EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
@ -152,6 +152,8 @@ NV_CONFTEST_CMD := /bin/sh $(NV_CONFTEST_SCRIPT) \
 NV_CFLAGS_FROM_CONFTEST := $(shell $(NV_CONFTEST_CMD) build_cflags)

 NV_CONFTEST_CFLAGS = $(NV_CFLAGS_FROM_CONFTEST) $(EXTRA_CFLAGS) -fno-pie
+NV_CONFTEST_CFLAGS += $(call cc-disable-warning,pointer-sign)
+NV_CONFTEST_CFLAGS += $(call cc-option,-fshort-wchar,)

 NV_CONFTEST_COMPILE_TEST_HEADERS := $(obj)/conftest/macros.h
 NV_CONFTEST_COMPILE_TEST_HEADERS += $(obj)/conftest/functions.h
--- a/kernel-open/common/inc/nv-linux.h
+++ b/kernel-open/common/inc/nv-linux.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2001-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2001-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -1982,31 +1982,6 @@ static inline NvBool nv_platform_use_auto_online(nv_linux_state_t *nvl)
    return nvl->numa_info.use_auto_online;
 }

-typedef struct {
-    NvU64 base;
-    NvU64 size;
-    NvU32 nodeId;
-    int ret;
-} remove_numa_memory_info_t;
-
-static void offline_numa_memory_callback
-(
-    void *args
-)
-{
-#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
-    remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
-#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
-    pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->nodeId,
-                                               pNumaInfo->base,
-                                               pNumaInfo->size);
-#else
-    pNumaInfo->ret = offline_and_remove_memory(pNumaInfo->base,
-                                               pNumaInfo->size);
-#endif
-#endif
-}
-
 typedef enum
 {
    NV_NUMA_STATUS_DISABLED             = 0,
--- a/kernel-open/conftest.sh
+++ b/kernel-open/conftest.sh
@ -3032,6 +3032,22 @@ compile_test() {

        ;;

+        foll_longterm_present)
+            #
+            # Determine if FOLL_LONGTERM enum is present or not
+            #
+            # Added by commit 932f4a630a69 ("mm/gup: replace
+            # get_user_pages_longterm() with FOLL_LONGTERM") in
+            # v5.2
+            #
+            CODE="
+            #include <linux/mm.h>
+            int foll_longterm = FOLL_LONGTERM;
+            "
+
+            compile_check_conftest "$CODE" "NV_FOLL_LONGTERM_PRESENT" "" "types"
+        ;;
+
        vfio_pin_pages_has_vfio_device_arg)
            #
            # Determine if vfio_pin_pages() kABI accepts "struct vfio_device *"
@ -5081,11 +5097,15 @@ compile_test() {
            # vmap ops and convert GEM backends") update
            # drm_gem_object_funcs::vmap to take 'map' argument.
            #
+            # Note that the 'map' argument type is changed from 'struct dma_buf_map'
+            # to 'struct iosys_map' by commit 7938f4218168 ("dma-buf-map: Rename
+            # to iosys-map) in v5.18.
+            #
            CODE="
            #include <drm/drm_gem.h>
            int conftest_drm_gem_object_vmap_has_map_arg(
-                    struct drm_gem_object *obj, struct dma_buf_map *map) {
-                return obj->funcs->vmap(obj, map);
+                    struct drm_gem_object *obj) {
+                return obj->funcs->vmap(obj, NULL);
            }"

            compile_check_conftest "$CODE" "NV_DRM_GEM_OBJECT_VMAP_HAS_MAP_ARG" "" "types"
--- a/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
+++ b/kernel-open/nvidia-modeset/nvidia-modeset-linux.c
@ -54,7 +54,11 @@
 #include "nv-time.h"
 #include "nv-lock.h"

-#if !defined(CONFIG_RETPOLINE)
+/*
+ * Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
+ * CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
+ */
+#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
 #include "nv-retpoline.h"
 #endif

--- a/kernel-open/nvidia-uvm/uvm_ats.h
+++ b/kernel-open/nvidia-uvm/uvm_ats.h
@ -34,16 +34,6 @@

    #define UVM_ATS_SUPPORTED() (UVM_ATS_IBM_SUPPORTED() || UVM_ATS_SVA_SUPPORTED())

-// ATS prefetcher uses hmm_range_fault() to query residency information.
-// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
-// of memory regions while hmm_range_fault() is being called, MMU interval
-// notifiers are needed.
-    #if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
-        #define UVM_ATS_PREFETCH_SUPPORTED() 1
-    #else
-        #define UVM_ATS_PREFETCH_SUPPORTED() 0
-    #endif
-
 typedef struct
 {
    // Mask of gpu_va_spaces which are registered for ATS access. The mask is
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.c
@ -30,23 +30,36 @@
 #include <linux/mempolicy.h>
 #include <linux/mmu_notifier.h>

-#if UVM_ATS_PREFETCH_SUPPORTED()
+#if UVM_HMM_RANGE_FAULT_SUPPORTED()
 #include <linux/hmm.h>
 #endif

-static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
+typedef enum
+{
+    UVM_ATS_SERVICE_TYPE_FAULTS = 0,
+    UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS,
+    UVM_ATS_SERVICE_TYPE_COUNT
+} uvm_ats_service_type_t;
+
+static NV_STATUS service_ats_requests(uvm_gpu_va_space_t *gpu_va_space,
                                      struct vm_area_struct *vma,
                                      NvU64 start,
                                      size_t length,
                                      uvm_fault_access_type_t access_type,
+                                      uvm_ats_service_type_t service_type,
                                      uvm_ats_fault_context_t *ats_context)
 {
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    struct mm_struct *mm = va_space->va_space_mm.mm;
-    bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
    NV_STATUS status;
    NvU64 user_space_start;
    NvU64 user_space_length;
+    bool write = (access_type >= UVM_FAULT_ACCESS_TYPE_WRITE);
+    bool fault_service_type = (service_type == UVM_ATS_SERVICE_TYPE_FAULTS);
+    uvm_populate_permissions_t populate_permissions = fault_service_type ?
+                                            (write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY) :
+                                            UVM_POPULATE_PERMISSIONS_INHERIT;
+

    // Request uvm_migrate_pageable() to touch the corresponding page after
    // population.
@ -83,10 +96,10 @@ static NV_STATUS service_ats_faults(uvm_gpu_va_space_t *gpu_va_space,
        .dst_node_id                    = ats_context->residency_node,
        .start                          = start,
        .length                         = length,
-        .populate_permissions           = write ? UVM_POPULATE_PERMISSIONS_WRITE : UVM_POPULATE_PERMISSIONS_ANY,
-        .touch                          = true,
-        .skip_mapped                    = true,
-        .populate_on_cpu_alloc_failures = true,
+        .populate_permissions           = populate_permissions,
+        .touch                          = fault_service_type,
+        .skip_mapped                    = fault_service_type,
+        .populate_on_cpu_alloc_failures = fault_service_type,
        .user_space_start               = &user_space_start,
        .user_space_length              = &user_space_length,
    };
@ -233,7 +246,7 @@ static uvm_va_block_region_t uvm_ats_region_from_vma(struct vm_area_struct *vma,
    return uvm_ats_region_from_start_end(start, end);
 }

-#if UVM_ATS_PREFETCH_SUPPORTED()
+#if UVM_HMM_RANGE_FAULT_SUPPORTED()

 static bool uvm_ats_invalidate_notifier(struct mmu_interval_notifier *mni, unsigned long cur_seq)
 {
@ -271,12 +284,12 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,
                                            uvm_ats_fault_context_t *ats_context)
 {
    NV_STATUS status = NV_OK;
+    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;

-#if UVM_ATS_PREFETCH_SUPPORTED()
+#if UVM_HMM_RANGE_FAULT_SUPPORTED()
    int ret;
    NvU64 start;
    NvU64 end;
-    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
    struct hmm_range range;
    uvm_page_index_t page_index;
    uvm_va_block_region_t vma_region;
@ -357,78 +370,83 @@ static NV_STATUS ats_compute_residency_mask(uvm_gpu_va_space_t *gpu_va_space,

    mmu_interval_notifier_remove(range.notifier);

+#else
+    uvm_page_mask_zero(residency_mask);
 #endif

    return status;
 }

-static void ats_expand_fault_region(uvm_gpu_va_space_t *gpu_va_space,
+static void ats_compute_prefetch_mask(uvm_gpu_va_space_t *gpu_va_space,
                                      struct vm_area_struct *vma,
                                      uvm_ats_fault_context_t *ats_context,
-                                    uvm_va_block_region_t max_prefetch_region,
-                                    uvm_page_mask_t *faulted_mask)
+                                      uvm_va_block_region_t max_prefetch_region)
 {
-    uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
-    uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
+    uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
    uvm_page_mask_t *residency_mask = &ats_context->prefetch_state.residency_mask;
    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
    uvm_perf_prefetch_bitmap_tree_t *bitmap_tree = &ats_context->prefetch_state.bitmap_tree;

-    if (uvm_page_mask_empty(faulted_mask))
+    if (uvm_page_mask_empty(accessed_mask))
        return;

    uvm_perf_prefetch_compute_ats(gpu_va_space->va_space,
-                                  faulted_mask,
-                                  uvm_va_block_region_from_mask(NULL, faulted_mask),
+                                  accessed_mask,
+                                  uvm_va_block_region_from_mask(NULL, accessed_mask),
                                  max_prefetch_region,
                                  residency_mask,
                                  bitmap_tree,
                                  prefetch_mask);
-
-    uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);
-
-    if (vma->vm_flags & VM_WRITE)
-        uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
 }

-static NV_STATUS ats_fault_prefetch(uvm_gpu_va_space_t *gpu_va_space,
+static NV_STATUS ats_compute_prefetch(uvm_gpu_va_space_t *gpu_va_space,
                                      struct vm_area_struct *vma,
                                      NvU64 base,
+                                      uvm_ats_service_type_t service_type,
                                      uvm_ats_fault_context_t *ats_context)
 {
-    NV_STATUS status = NV_OK;
-    uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
-    uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
-    uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
+    NV_STATUS status;
+    uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;
    uvm_page_mask_t *prefetch_mask = &ats_context->prefetch_state.prefetch_pages_mask;
    uvm_va_block_region_t max_prefetch_region = uvm_ats_region_from_vma(vma, base);

+    // Residency mask needs to be computed even if prefetching is disabled since
+    // the residency information is also needed by access counters servicing in
+    // uvm_ats_service_access_counters()
+    status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
+    if (status != NV_OK)
+        return status;
+
    if (!uvm_perf_prefetch_enabled(gpu_va_space->va_space))
        return status;

-    if (uvm_page_mask_empty(faulted_mask))
-        return status;
-
-    status = ats_compute_residency_mask(gpu_va_space, vma, base, ats_context);
-    if (status != NV_OK)
+    if (uvm_page_mask_empty(accessed_mask))
        return status;

    // Prefetch the entire region if none of the pages are resident on any node
    // and if preferred_location is the faulting GPU.
    if (ats_context->prefetch_state.has_preferred_location &&
-        ats_context->prefetch_state.first_touch &&
-        uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->parent->id)) {
+        (ats_context->prefetch_state.first_touch || (service_type == UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS)) &&
+        uvm_id_equal(ats_context->residency_id, gpu_va_space->gpu->id)) {

        uvm_page_mask_init_from_region(prefetch_mask, max_prefetch_region, NULL);
+    }
+    else {
+        ats_compute_prefetch_mask(gpu_va_space, vma, ats_context, max_prefetch_region);
+    }
+
+    if (service_type == UVM_ATS_SERVICE_TYPE_FAULTS) {
+        uvm_page_mask_t *read_fault_mask = &ats_context->read_fault_mask;
+        uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
+
        uvm_page_mask_or(read_fault_mask, read_fault_mask, prefetch_mask);

        if (vma->vm_flags & VM_WRITE)
            uvm_page_mask_or(write_fault_mask, write_fault_mask, prefetch_mask);
-
-        return status;
    }
-
-    ats_expand_fault_region(gpu_va_space, vma, ats_context, max_prefetch_region, faulted_mask);
+    else {
+        uvm_page_mask_or(accessed_mask, accessed_mask, prefetch_mask);
+    }

    return status;
 }
@ -446,6 +464,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
    uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
    uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
    uvm_fault_client_type_t client_type = ats_context->client_type;
+    uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_FAULTS;

    UVM_ASSERT(vma);
    UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
@ -454,6 +473,9 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
    UVM_ASSERT(gpu_va_space->ats.enabled);
    UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);

+    uvm_assert_mmap_lock_locked(vma->vm_mm);
+    uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
+
    uvm_page_mask_zero(faults_serviced_mask);
    uvm_page_mask_zero(reads_serviced_mask);

@ -479,7 +501,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,

    ats_batch_select_residency(gpu_va_space, vma, ats_context);

-    ats_fault_prefetch(gpu_va_space, vma, base, ats_context);
+    ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);

    for_each_va_block_subregion_in_mask(subregion, write_fault_mask, region) {
        NvU64 start = base + (subregion.first * PAGE_SIZE);
@ -491,7 +513,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
        UVM_ASSERT(start >= vma->vm_start);
        UVM_ASSERT((start + length) <= vma->vm_end);

-        status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
+        status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
        if (status != NV_OK)
            return status;

@ -526,7 +548,7 @@ NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
        UVM_ASSERT(start >= vma->vm_start);
        UVM_ASSERT((start + length) <= vma->vm_end);

-        status = service_ats_faults(gpu_va_space, vma, start, length, access_type, ats_context);
+        status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
        if (status != NV_OK)
            return status;

@ -598,3 +620,53 @@ NV_STATUS uvm_ats_invalidate_tlbs(uvm_gpu_va_space_t *gpu_va_space,

    return status;
 }
+
+NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
+                                          struct vm_area_struct *vma,
+                                          NvU64 base,
+                                          uvm_ats_fault_context_t *ats_context)
+{
+    uvm_va_block_region_t subregion;
+    uvm_va_block_region_t region = uvm_va_block_region(0, PAGES_PER_UVM_VA_BLOCK);
+    uvm_ats_service_type_t service_type = UVM_ATS_SERVICE_TYPE_ACCESS_COUNTERS;
+
+    UVM_ASSERT(vma);
+    UVM_ASSERT(IS_ALIGNED(base, UVM_VA_BLOCK_SIZE));
+    UVM_ASSERT(g_uvm_global.ats.enabled);
+    UVM_ASSERT(gpu_va_space);
+    UVM_ASSERT(gpu_va_space->ats.enabled);
+    UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
+
+    uvm_assert_mmap_lock_locked(vma->vm_mm);
+    uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
+
+    ats_batch_select_residency(gpu_va_space, vma, ats_context);
+
+    // Ignoring the return value of ats_compute_prefetch is ok since prefetching
+    // is just an optimization and servicing access counter migrations is still
+    // worthwhile even without any prefetching added. So, let servicing continue
+    // instead of returning early even if the prefetch computation fails.
+    ats_compute_prefetch(gpu_va_space, vma, base, service_type, ats_context);
+
+    // Remove pages which are already resident at the intended destination from
+    // the accessed_mask.
+    uvm_page_mask_andnot(&ats_context->accessed_mask,
+                         &ats_context->accessed_mask,
+                         &ats_context->prefetch_state.residency_mask);
+
+    for_each_va_block_subregion_in_mask(subregion, &ats_context->accessed_mask, region) {
+        NV_STATUS status;
+        NvU64 start = base + (subregion.first * PAGE_SIZE);
+        size_t length = uvm_va_block_region_num_pages(subregion) * PAGE_SIZE;
+        uvm_fault_access_type_t access_type = UVM_FAULT_ACCESS_TYPE_COUNT;
+
+        UVM_ASSERT(start >= vma->vm_start);
+        UVM_ASSERT((start + length) <= vma->vm_end);
+
+        status = service_ats_requests(gpu_va_space, vma, start, length, access_type, service_type, ats_context);
+        if (status != NV_OK)
+            return status;
+    }
+
+    return NV_OK;
+}
--- a/kernel-open/nvidia-uvm/uvm_ats_faults.h
+++ b/kernel-open/nvidia-uvm/uvm_ats_faults.h
@ -42,11 +42,31 @@
 // corresponding bit in read_fault_mask. These returned masks are only valid if
 // the return status is NV_OK. Status other than NV_OK indicate system global
 // fault servicing failures.
+//
+// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
+// lock.
 NV_STATUS uvm_ats_service_faults(uvm_gpu_va_space_t *gpu_va_space,
                                 struct vm_area_struct *vma,
                                 NvU64 base,
                                 uvm_ats_fault_context_t *ats_context);

+// Service access counter notifications on ATS regions in the range (base, base
+// + UVM_VA_BLOCK_SIZE) for individual pages in the range requested by page_mask
+// set in ats_context->accessed_mask. base must be aligned to UVM_VA_BLOCK_SIZE.
+// The caller is responsible for ensuring that the addresses in the
+// accessed_mask is completely covered by the VMA. The caller is also
+// responsible for handling any errors returned by this function.
+//
+// Returns NV_OK if servicing was successful. Any other error indicates an error
+// while servicing the range.
+//
+// LOCKING: The caller must retain and hold the mmap_lock and hold the va_space
+// lock.
+NV_STATUS uvm_ats_service_access_counters(uvm_gpu_va_space_t *gpu_va_space,
+                                          struct vm_area_struct *vma,
+                                          NvU64 base,
+                                          uvm_ats_fault_context_t *ats_context);
+
 // Return whether there are any VA ranges (and thus GMMU mappings) within the
 // UVM_GMMU_ATS_GRANULARITY-aligned region containing address.
 bool uvm_ats_check_in_gmmu_region(uvm_va_space_t *va_space, NvU64 address, uvm_va_range_t *next);
--- a/kernel-open/nvidia-uvm/uvm_gpu.h
+++ b/kernel-open/nvidia-uvm/uvm_gpu.h
@ -181,23 +181,28 @@ struct uvm_service_block_context_struct
 typedef struct
 {
    // Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
-    // VMA. Used for batching ATS faults in a vma.
+    // VMA. Used for batching ATS faults in a vma. This is unused for access
+    // counter service requests.
    uvm_page_mask_t read_fault_mask;

    // Mask of write faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
-    // SAM VMA. Used for batching ATS faults in a vma.
+    // SAM VMA. Used for batching ATS faults in a vma. This is unused for access
+    // counter service requests.
    uvm_page_mask_t write_fault_mask;

    // Mask of successfully serviced pages in a UVM_VA_BLOCK_SIZE aligned region
-    // of a SAM VMA. Used to return ATS fault status.
+    // of a SAM VMA. Used to return ATS fault status. This is unused for access
+    // counter service requests.
    uvm_page_mask_t faults_serviced_mask;

    // Mask of successfully serviced read faults on pages in write_fault_mask.
+    // This is unused for access counter service requests.
    uvm_page_mask_t reads_serviced_mask;

-    // Mask of all faulted pages in a UVM_VA_BLOCK_SIZE aligned region of a
-    // SAM VMA. This is used as input to the prefetcher.
-    uvm_page_mask_t faulted_mask;
+    // Mask of all accessed pages in a UVM_VA_BLOCK_SIZE aligned region of a SAM
+    // VMA. This is used as input for access counter service requests and output
+    // of fault service requests.
+    uvm_page_mask_t accessed_mask;

    // Client type of the service requestor.
    uvm_fault_client_type_t client_type;
@ -466,6 +471,9 @@ struct uvm_access_counter_service_batch_context_struct
    // Structure used to coalesce access counter servicing in a VA block
    uvm_service_block_context_t block_service_context;

+    // Structure used to service access counter migrations in an ATS block.
+    uvm_ats_fault_context_t ats_context;
+
    // Unique id (per-GPU) generated for tools events recording
    NvU32 batch_id;
 };
--- a/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_access_counters.c
@ -33,7 +33,8 @@
 #include "uvm_va_space_mm.h"
 #include "uvm_pmm_sysmem.h"
 #include "uvm_perf_module.h"
-#include "uvm_ats_ibm.h"
+#include "uvm_ats.h"
+#include "uvm_ats_faults.h"

 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN     1
 #define UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_DEFAULT 256
@ -125,7 +126,7 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va

 // Whether access counter migrations are enabled or not. The policy is as
 // follows:
-// - MIMC migrations are disabled by default on all systems except P9.
+// - MIMC migrations are disabled by default on all non-ATS systems.
 // - MOMC migrations are disabled by default on all systems
 // - Users can override this policy by specifying on/off
 static bool is_migration_enabled(uvm_access_counter_type_t type)
@ -148,7 +149,7 @@ static bool is_migration_enabled(uvm_access_counter_type_t type)
    if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
        return false;

-    if (UVM_ATS_IBM_SUPPORTED())
+    if (UVM_ATS_SUPPORTED())
        return g_uvm_global.ats.supported;

    return false;
@ -1507,8 +1508,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
                                                             accessed_pages));
 }

-static void expand_notification_block(struct mm_struct *mm,
-                                      uvm_gpu_va_space_t *gpu_va_space,
+static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
                                      uvm_va_block_t *va_block,
                                      uvm_page_mask_t *accessed_pages,
                                      const uvm_access_counter_buffer_entry_t *current_entry)
@ -1543,7 +1543,7 @@ static void expand_notification_block(struct mm_struct *mm,
    // which received the notification if the memory was already migrated before
    // acquiring the locks either during the servicing of previous notifications
    // or during faults or because of explicit migrations or if the VA range was
-    // freed after receving the notification. Return NV_OK in such cases.
+    // freed after receiving the notification. Return NV_OK in such cases.
    if (!UVM_ID_IS_VALID(resident_id) || uvm_id_equal(resident_id, gpu->id))
        return;

@ -1578,14 +1578,14 @@ static void expand_notification_block(struct mm_struct *mm,
    }
 }

-static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
-                                                     uvm_gpu_va_space_t *gpu_va_space,
+static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
+                                                     struct mm_struct *mm,
                                                     uvm_va_block_t *va_block,
                                                     uvm_access_counter_service_batch_context_t *batch_context,
                                                     NvU32 index,
                                                     NvU32 *out_index)
 {
-    NvU32 i = index;
+    NvU32 i;
    NvU32 flags = 0;
    NV_STATUS status = NV_OK;
    NV_STATUS flags_status;
@ -1595,7 +1595,7 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
    uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;

    UVM_ASSERT(va_block);
-    UVM_ASSERT(i < batch_context->virt.num_notifications);
+    UVM_ASSERT(index < batch_context->virt.num_notifications);

    uvm_assert_rwsem_locked(&va_space->lock);

@ -1603,28 +1603,25 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,

    uvm_mutex_lock(&va_block->lock);

-    while (i < batch_context->virt.num_notifications) {
+    for (i = index; i < batch_context->virt.num_notifications; i++) {
        uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
        NvU64 address = current_entry->address.address;

-        if ((current_entry->virtual_info.va_space != va_space) || (address > va_block->end)) {
-            *out_index = i;
+        if ((current_entry->virtual_info.va_space == va_space) && (address <= va_block->end))
+            expand_notification_block(gpu_va_space, va_block, accessed_pages, current_entry);
+        else
            break;
    }

-        expand_notification_block(mm, gpu_va_space, va_block, accessed_pages, current_entry);
-
-        i++;
    *out_index = i;
-    }
+
+    // Atleast one notification should have been processed.
+    UVM_ASSERT(index < *out_index);

    status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);

    uvm_mutex_unlock(&va_block->lock);

-    // Atleast one notification should have been processed.
-    UVM_ASSERT(index < *out_index);
-
    if (status == NV_OK)
        flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;

@ -1636,62 +1633,154 @@ static NV_STATUS service_virt_notifications_in_block(struct mm_struct *mm,
    return status;
 }

-static NV_STATUS service_virt_notifications_batch(struct mm_struct *mm,
-                                                  uvm_gpu_va_space_t *gpu_va_space,
+static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
+                                               struct mm_struct *mm,
+                                               uvm_access_counter_service_batch_context_t *batch_context,
+                                               NvU32 index,
+                                               NvU32 *out_index)
+{
+
+    NvU32 i;
+    NvU64 base;
+    NvU64 end;
+    NvU64 address;
+    NvU32 flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
+    NV_STATUS status = NV_OK;
+    NV_STATUS flags_status;
+    struct vm_area_struct *vma = NULL;
+    uvm_gpu_t *gpu = gpu_va_space->gpu;
+    uvm_va_space_t *va_space = gpu_va_space->va_space;
+    uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
+    uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
+
+    UVM_ASSERT(index < batch_context->virt.num_notifications);
+
+    uvm_assert_mmap_lock_locked(mm);
+    uvm_assert_rwsem_locked(&va_space->lock);
+
+    address = notifications[index]->address.address;
+
+    vma = find_vma_intersection(mm, address, address + 1);
+    if (!vma) {
+        // Clear the notification entry to continue receiving access counter
+        // notifications when a new VMA is allocated in this range.
+        status = notify_tools_and_process_flags(gpu, &notifications[index], 1, flags);
+        *out_index = index + 1;
+        return status;
+    }
+
+    base = UVM_VA_BLOCK_ALIGN_DOWN(address);
+    end = min(base + UVM_VA_BLOCK_SIZE, (NvU64)vma->vm_end);
+
+    uvm_page_mask_zero(&ats_context->accessed_mask);
+
+    for (i = index; i < batch_context->virt.num_notifications; i++) {
+        uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
+        address = current_entry->address.address;
+
+        if ((current_entry->virtual_info.va_space == va_space) && (address < end))
+            uvm_page_mask_set(&ats_context->accessed_mask, (address - base) / PAGE_SIZE);
+        else
+            break;
+    }
+
+    *out_index = i;
+
+    // Atleast one notification should have been processed.
+    UVM_ASSERT(index < *out_index);
+
+    // TODO: Bug 2113632: [UVM] Don't clear access counters when the preferred
+    //                    location is set
+    // If no pages were actually migrated, don't clear the access counters.
+    status = uvm_ats_service_access_counters(gpu_va_space, vma, base, ats_context);
+    if (status != NV_OK)
+        flags &= ~UVM_ACCESS_COUNTER_ACTION_CLEAR;
+
+    flags_status = notify_tools_and_process_flags(gpu, &notifications[index], *out_index - index, flags);
+    if ((status == NV_OK) && (flags_status != NV_OK))
+        status = flags_status;
+
+    return status;
+}
+
+static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
+                                                  struct mm_struct *mm,
                                                  uvm_access_counter_service_batch_context_t *batch_context,
                                                  NvU32 index,
                                                  NvU32 *out_index)
 {
    NV_STATUS status;
-    uvm_va_block_t *va_block;
+    uvm_va_range_t *va_range;
    uvm_va_space_t *va_space = gpu_va_space->va_space;
    uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
    NvU64 address = current_entry->address.address;

    UVM_ASSERT(va_space);

+    if (mm)
+        uvm_assert_mmap_lock_locked(mm);
+
    uvm_assert_rwsem_locked(&va_space->lock);

    // Virtual address notifications are always 64K aligned
    UVM_ASSERT(IS_ALIGNED(address, UVM_PAGE_SIZE_64K));

-    // TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
-    //                    migrations for virtual notifications on configs with
-    //                    4KB page size
-    status = uvm_va_block_find(va_space, address, &va_block);
-    if ((status == NV_OK) && !uvm_va_block_is_hmm(va_block)) {
+    va_range = uvm_va_range_find(va_space, address);
+    if (va_range) {
+        // Avoid clearing the entry by default.
+        NvU32 flags = 0;
+        uvm_va_block_t *va_block = NULL;

-        UVM_ASSERT(va_block);
+        if (va_range->type == UVM_VA_RANGE_TYPE_MANAGED) {
+            size_t index = uvm_va_range_block_index(va_range, address);

-        status = service_virt_notifications_in_block(mm, gpu_va_space, va_block, batch_context, index, out_index);
+            va_block = uvm_va_range_block(va_range, index);
+
+            // If the va_range is a managed range, the notification belongs to a
+            // recently freed va_range if va_block is NULL. If va_block is not
+            // NULL, service_virt_notifications_in_block will process flags.
+            // Clear the notification entry to continue receiving notifications
+            // when a new va_range is allocated in that region.
+            flags = UVM_ACCESS_COUNTER_ACTION_CLEAR;
+        }
+
+        if (va_block) {
+            status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
        }
        else {
-        NvU32 flags = 0;
+            status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);
+            *out_index = index + 1;
+        }
+    }
+    else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
+        status = service_virt_notification_ats(gpu_va_space, mm, batch_context, index, out_index);
+    }
+    else {
+        NvU32 flags;
+        uvm_va_block_t *va_block = NULL;
+
+        status = uvm_hmm_va_block_find(va_space, address, &va_block);
+
+        // TODO: Bug 4309292: [UVM][HMM] Re-enable access counter HMM block
+        //                    migrations for virtual notifications
+        //
+        // - If the va_block is HMM, don't clear the notification since HMM
+        // migrations are currently disabled.
+        //
+        // - If the va_block isn't HMM, the notification belongs to a recently
+        // freed va_range. Clear the notification entry to continue receiving
+        // notifications when a new va_range is allocated in this region.
+        flags = va_block ? 0 : UVM_ACCESS_COUNTER_ACTION_CLEAR;

        UVM_ASSERT((status == NV_ERR_OBJECT_NOT_FOUND) ||
                   (status == NV_ERR_INVALID_ADDRESS)  ||
                   uvm_va_block_is_hmm(va_block));

-        // NV_ERR_OBJECT_NOT_FOUND is returned if the VA range is valid but no
-        // VA block has been allocated yet. This can happen if there are stale
-        // notifications in the batch. A new VA range may have been allocated in
-        // that range. So, clear the notification entry to continue getting
-        // notifications for the new VA range.
-        if (status == NV_ERR_OBJECT_NOT_FOUND)
-            flags |= UVM_ACCESS_COUNTER_ACTION_CLEAR;
+        // Clobber status to continue processing the rest of the notifications
+        // in the batch.
+        status = notify_tools_and_process_flags(gpu_va_space->gpu, batch_context->virt.notifications, 1, flags);

-        // NV_ERR_INVALID_ADDRESS is returned if the corresponding VA range
-        // doesn't exist or it's not a managed range. Access counter migrations
-        // are not currently supported on such ranges.
-        //
-        // TODO: Bug 1990466: [uvm] Use access counters to trigger migrations
-        // When support for SAM migrations is addded, clear the notification
-        // entry if the VA range doesn't exist in order to receive notifications
-        // when a new VA range is allocated in that region.
-        status = notify_tools_and_process_flags(gpu_va_space->gpu, &batch_context->virt.notifications[index], 1, flags);
        *out_index = index + 1;
-
-        status = NV_OK;
    }

    return status;
@ -1745,7 +1834,7 @@ static NV_STATUS service_virt_notifications(uvm_gpu_t *gpu,
        }

        if (va_space && gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
-            status = service_virt_notifications_batch(mm, gpu_va_space, batch_context, i, &i);
+            status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
        }
        else {
            status = notify_tools_and_process_flags(gpu, &batch_context->virt.notifications[i], 1, 0);
--- a/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
+++ b/kernel-open/nvidia-uvm/uvm_gpu_replayable_faults.c
@ -1632,23 +1632,23 @@ static NV_STATUS service_fault_batch_ats_sub_vma(uvm_gpu_va_space_t *gpu_va_spac
    const uvm_page_mask_t *write_fault_mask = &ats_context->write_fault_mask;
    const uvm_page_mask_t *reads_serviced_mask = &ats_context->reads_serviced_mask;
    uvm_page_mask_t *faults_serviced_mask = &ats_context->faults_serviced_mask;
-    uvm_page_mask_t *faulted_mask = &ats_context->faulted_mask;
+    uvm_page_mask_t *accessed_mask = &ats_context->accessed_mask;

    UVM_ASSERT(vma);

    ats_context->client_type = UVM_FAULT_CLIENT_TYPE_GPC;

-    uvm_page_mask_or(faulted_mask, write_fault_mask, read_fault_mask);
+    uvm_page_mask_or(accessed_mask, write_fault_mask, read_fault_mask);

    status = uvm_ats_service_faults(gpu_va_space, vma, base, &batch_context->ats_context);

    // Remove prefetched pages from the serviced mask since fault servicing
    // failures belonging to prefetch pages need to be ignored.
-    uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, faulted_mask);
+    uvm_page_mask_and(faults_serviced_mask, faults_serviced_mask, accessed_mask);

-    UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, faulted_mask));
+    UVM_ASSERT(uvm_page_mask_subset(faults_serviced_mask, accessed_mask));

-    if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, faulted_mask)) {
+    if ((status != NV_OK) || uvm_page_mask_equal(faults_serviced_mask, accessed_mask)) {
        (*block_faults) += (fault_index_end - fault_index_start);
        return status;
    }
--- a/kernel-open/nvidia-uvm/uvm_linux.h
+++ b/kernel-open/nvidia-uvm/uvm_linux.h
@ -114,6 +114,16 @@ static inline const struct cpumask *uvm_cpumask_of_node(int node)
        #define UVM_IS_CONFIG_HMM() 0
    #endif

+// ATS prefetcher uses hmm_range_fault() to query residency information.
+// hmm_range_fault() needs CONFIG_HMM_MIRROR. To detect racing CPU invalidates
+// of memory regions while hmm_range_fault() is being called, MMU interval
+// notifiers are needed.
+    #if defined(CONFIG_HMM_MIRROR) && defined(NV_MMU_INTERVAL_NOTIFIER)
+        #define UVM_HMM_RANGE_FAULT_SUPPORTED() 1
+    #else
+        #define UVM_HMM_RANGE_FAULT_SUPPORTED() 0
+    #endif
+
 // Various issues prevent us from using mmu_notifiers in older kernels. These
 // include:
 //  - ->release being called under RCU instead of SRCU: fixed by commit
--- a/kernel-open/nvidia-uvm/uvm_va_space_mm.c
+++ b/kernel-open/nvidia-uvm/uvm_va_space_mm.c
@ -280,7 +280,9 @@ NV_STATUS uvm_va_space_mm_register(uvm_va_space_t *va_space)
        }
    }

-    if ((UVM_IS_CONFIG_HMM() || UVM_ATS_PREFETCH_SUPPORTED()) && uvm_va_space_pageable_mem_access_supported(va_space)) {
+    if ((UVM_IS_CONFIG_HMM() || UVM_HMM_RANGE_FAULT_SUPPORTED()) &&
+        uvm_va_space_pageable_mem_access_supported(va_space)) {
+
        #if UVM_CAN_USE_MMU_NOTIFIERS()
            // Initialize MMU interval notifiers for this process. This allows
            // mmu_interval_notifier_insert() to be called without holding the
--- a/kernel-open/nvidia/nv.c
+++ b/kernel-open/nvidia/nv.c
@ -56,7 +56,11 @@
 #include "nv-pat.h"
 #include "nv-dmabuf.h"

-#if !defined(CONFIG_RETPOLINE)
+/*
+ * Commit aefb2f2e619b ("x86/bugs: Rename CONFIG_RETPOLINE =>
+ * CONFIG_MITIGATION_RETPOLINE) in v6.8 renamed CONFIG_RETPOLINE.
+ */
+#if !defined(CONFIG_RETPOLINE) && !defined(CONFIG_MITIGATION_RETPOLINE)
 #include "nv-retpoline.h"
 #endif

--- a/kernel-open/nvidia/nvidia.Kbuild
+++ b/kernel-open/nvidia/nvidia.Kbuild
@ -250,6 +250,7 @@ NV_CONFTEST_TYPE_COMPILE_TESTS += num_registered_fb
 NV_CONFTEST_TYPE_COMPILE_TESTS += pci_driver_has_driver_managed_dma
 NV_CONFTEST_TYPE_COMPILE_TESTS += vm_area_struct_has_const_vm_flags
 NV_CONFTEST_TYPE_COMPILE_TESTS += memory_failure_has_trapno_arg
+NV_CONFTEST_TYPE_COMPILE_TESTS += foll_longterm_present

 NV_CONFTEST_GENERIC_COMPILE_TESTS += dom0_kernel_present
 NV_CONFTEST_GENERIC_COMPILE_TESTS += nvidia_vgpu_kvm_build
--- a/kernel-open/nvidia/os-interface.c
+++ b/kernel-open/nvidia/os-interface.c
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 1999-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -2130,6 +2130,8 @@ static int os_numa_verify_gpu_memory_zone(struct notifier_block *nb,
    return NOTIFY_OK;
 }

+#define ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS 4
+
 NV_STATUS NV_API_CALL os_numa_add_gpu_memory
 (
    void *handle,
@ -2143,7 +2145,12 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
    nv_linux_state_t *nvl = pci_get_drvdata(handle);
    nv_state_t *nv = NV_STATE_PTR(nvl);
    NvU64 base = offset + nvl->coherent_link_info.gpu_mem_pa;
-    int ret;
+    int ret = 0;
+    NvU64 memblock_size;
+    NvU64 size_remaining;
+    NvU64 calculated_segment_size;
+    NvU64 segment_size;
+    NvU64 segment_base;
    os_numa_gpu_mem_hotplug_notifier_t notifier =
    {
        .start_pa = base,
@ -2176,11 +2183,49 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
        goto failed;
    }

+    //
+    // Adding all memory at once can take a long time. Split up memory into segments
+    // with schedule() in between to prevent soft lockups. Memory segments for
+    // add_memory_driver_managed() need to be aligned to memblock size.
+    //
+    // If there are any issues splitting into segments, then add all memory at once.
+    //
+    if (os_numa_memblock_size(&memblock_size) == NV_OK)
+    {
+        calculated_segment_size = NV_ALIGN_UP(size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
+    }
+    else
+    {
+        // Don't split into segments, add all memory at once
+        calculated_segment_size = size;
+    }
+
+    segment_size = calculated_segment_size;
+    segment_base = base;
+    size_remaining = size;
+
+    while ((size_remaining > 0) &&
+           (ret == 0))
+    {
+        if (segment_size > size_remaining)
+        {
+            segment_size = size_remaining;
+        }
+
 #ifdef NV_ADD_MEMORY_DRIVER_MANAGED_HAS_MHP_FLAGS_ARG
-    ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)", MHP_NONE);
+        ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)", MHP_NONE);
 #else
-    ret = add_memory_driver_managed(node, base, size, "System RAM (NVIDIA)");
+        ret = add_memory_driver_managed(node, segment_base, segment_size, "System RAM (NVIDIA)");
 #endif
+        nv_printf(NV_DBG_SETUP, "NVRM: add_memory_driver_managed() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
+                  ret, segment_base, segment_size);
+
+        segment_base += segment_size;
+        size_remaining -= segment_size;
+
+        // Yield CPU to prevent soft lockups
+        schedule();
+    }
    unregister_memory_notifier(&notifier.memory_notifier);

    if (ret == 0)
@ -2194,14 +2239,33 @@ NV_STATUS NV_API_CALL os_numa_add_gpu_memory
            zone_end_pfn(zone) != end_pfn)
        {
            nv_printf(NV_DBG_ERRORS, "NVRM: GPU memory zone movable auto onlining failed!\n");
+
 #ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
-#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
-            if (offline_and_remove_memory(node, base, size) != 0)
-#else
-            if (offline_and_remove_memory(base, size) != 0)
-#endif
+            // Since zone movable auto onlining failed, need to remove the added memory.
+            segment_size = calculated_segment_size;
+            segment_base = base;
+            size_remaining = size;
+
+            while (size_remaining > 0)
            {
-                nv_printf(NV_DBG_ERRORS, "NVRM: offline_and_remove_memory failed\n");
+                if (segment_size > size_remaining)
+                {
+                    segment_size = size_remaining;
+                }
+
+#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
+                ret = offline_and_remove_memory(node, segment_base, segment_size);
+#else
+                ret = offline_and_remove_memory(segment_base, segment_size);
+#endif
+                nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
+                          ret, segment_base, segment_size);
+
+                segment_base += segment_size;
+                size_remaining -= segment_size;
+
+                // Yield CPU to prevent soft lockups
+                schedule();
            }
 #endif
            goto failed;
@ -2221,6 +2285,77 @@ failed:
    return NV_ERR_NOT_SUPPORTED;
 }

+
+typedef struct {
+    NvU64 base;
+    NvU64 size;
+    NvU32 nodeId;
+    int ret;
+} remove_numa_memory_info_t;
+
+static void offline_numa_memory_callback
+(
+    void *args
+)
+{
+#ifdef NV_OFFLINE_AND_REMOVE_MEMORY_PRESENT
+    remove_numa_memory_info_t *pNumaInfo = (remove_numa_memory_info_t *)args;
+    int ret = 0;
+    NvU64 memblock_size;
+    NvU64 size_remaining;
+    NvU64 calculated_segment_size;
+    NvU64 segment_size;
+    NvU64 segment_base;
+
+    //
+    // Removing all memory at once can take a long time. Split up memory into segments
+    // with schedule() in between to prevent soft lockups. Memory segments for
+    // offline_and_remove_memory() need to be aligned to memblock size.
+    //
+    // If there are any issues splitting into segments, then remove all memory at once.
+    //
+    if (os_numa_memblock_size(&memblock_size) == NV_OK)
+    {
+        calculated_segment_size = NV_ALIGN_UP(pNumaInfo->size / ADD_REMOVE_GPU_MEMORY_NUM_SEGMENTS, memblock_size);
+    }
+    else
+    {
+        // Don't split into segments, remove all memory at once
+        calculated_segment_size = pNumaInfo->size;
+    }
+
+    segment_size = calculated_segment_size;
+    segment_base = pNumaInfo->base;
+    size_remaining = pNumaInfo->size;
+
+    while (size_remaining > 0)
+    {
+        if (segment_size > size_remaining)
+        {
+            segment_size = size_remaining;
+        }
+
+#ifdef NV_REMOVE_MEMORY_HAS_NID_ARG
+        ret = offline_and_remove_memory(pNumaInfo->nodeId,
+                                        segment_base,
+                                        segment_size);
+#else
+        ret = offline_and_remove_memory(segment_base,
+                                        segment_size);
+#endif
+        nv_printf(NV_DBG_SETUP, "NVRM: offline_and_remove_memory() returns: %d for segment_base: 0x%llx, segment_size: 0x%llx\n",
+                  ret, segment_base, segment_size);
+        pNumaInfo->ret |= ret;
+
+        segment_base += segment_size;
+        size_remaining -= segment_size;
+
+        // Yield CPU to prevent soft lockups
+        schedule();
+    }
+#endif
+}
+
 NV_STATUS NV_API_CALL os_numa_remove_gpu_memory
 (
    void *handle,
--- a/kernel-open/nvidia/os-mlock.c
+++ b/kernel-open/nvidia/os-mlock.c
@ -26,6 +26,12 @@
 #include "os-interface.h"
 #include "nv-linux.h"

+#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
+    (defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) ||                      \
+     defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
+#define NV_NUM_PIN_PAGES_PER_ITERATION 0x80000
+#endif
+
 static inline int nv_follow_pfn(struct vm_area_struct *vma,
                                unsigned long address,
                                unsigned long *pfn)
@ -163,9 +169,15 @@ NV_STATUS NV_API_CALL os_lock_user_pages(
    NV_STATUS rmStatus;
    struct mm_struct *mm = current->mm;
    struct page **user_pages;
-    NvU64 i, pinned;
+    NvU64 i;
+    NvU64 npages = page_count;
+    NvU64 pinned = 0;
    unsigned int gup_flags = DRF_VAL(_LOCK_USER_PAGES, _FLAGS, _WRITE, flags) ? FOLL_WRITE : 0;
-    int ret;
+    long ret;
+
+#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT)
+    gup_flags |= FOLL_LONGTERM;
+#endif

    if (!NV_MAY_SLEEP())
    {
@ -185,16 +197,51 @@ NV_STATUS NV_API_CALL os_lock_user_pages(

    nv_mmap_read_lock(mm);
    ret = NV_PIN_USER_PAGES((unsigned long)address,
-                            page_count, gup_flags, user_pages, NULL);
-    nv_mmap_read_unlock(mm);
-    pinned = ret;
-
-    if (ret < 0)
+                            npages, gup_flags, user_pages, NULL);
+    if (ret > 0)
    {
-        os_free_mem(user_pages);
-        return NV_ERR_INVALID_ADDRESS;
+        pinned = ret;
    }
-    else if (pinned < page_count)
+#if defined(NVCPU_FAMILY_X86) && defined(NV_FOLL_LONGTERM_PRESENT) && \
+    (defined(NV_PIN_USER_PAGES_HAS_ARGS_VMAS) ||                      \
+     defined(NV_GET_USER_PAGES_HAS_ARGS_FLAGS_VMAS))
+    //
+    // NV_PIN_USER_PAGES() passes in NULL for the vmas parameter (if required)
+    // in pin_user_pages() (or get_user_pages() if pin_user_pages() does not
+    // exist). For kernels which do not contain the commit 52650c8b466b
+    // (mm/gup: remove the vma allocation from gup_longterm_locked()), if
+    // FOLL_LONGTERM is passed in, this results in the kernel trying to kcalloc
+    // the vmas array, and since the limit for kcalloc is 4 MB, it results in
+    // NV_PIN_USER_PAGES() failing with ENOMEM if more than
+    // NV_NUM_PIN_PAGES_PER_ITERATION pages are requested on 64-bit systems.
+    //
+    // As a workaround, if we requested more than
+    // NV_NUM_PIN_PAGES_PER_ITERATION pages and failed with ENOMEM, try again
+    // with multiple calls of NV_NUM_PIN_PAGES_PER_ITERATION pages at a time.
+    //
+    else if ((ret == -ENOMEM) &&
+             (page_count > NV_NUM_PIN_PAGES_PER_ITERATION))
+    {
+        for (pinned = 0; pinned < page_count; pinned += ret)
+        {
+            npages = page_count - pinned;
+            if (npages > NV_NUM_PIN_PAGES_PER_ITERATION)
+            {
+                npages = NV_NUM_PIN_PAGES_PER_ITERATION;
+            }
+
+            ret = NV_PIN_USER_PAGES(((unsigned long) address) + (pinned * PAGE_SIZE),
+                                    npages, gup_flags, &user_pages[pinned], NULL);
+            if (ret <= 0)
+            {
+                break;
+            }
+        }
+    }
+#endif
+    nv_mmap_read_unlock(mm);
+
+    if (pinned < page_count)
    {
        for (i = 0; i < pinned; i++)
            NV_UNPIN_USER_PAGE(user_pages[i]);
--- a/src/common/inc/nvBldVer.h
+++ b/src/common/inc/nvBldVer.h
@ -36,25 +36,25 @@
 // and then checked back in. You cannot make changes to these sections without
 // corresponding changes to the buildmeister script
 #ifndef NV_BUILD_BRANCH
-    #define NV_BUILD_BRANCH             r538_27
+    #define NV_BUILD_BRANCH             r538_49
 #endif
 #ifndef NV_PUBLIC_BRANCH
-    #define NV_PUBLIC_BRANCH             r538_27
+    #define NV_PUBLIC_BRANCH             r538_49
 #endif

 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
-#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r535/r538_27-451"
-#define NV_BUILD_CHANGELIST_NUM         (33992350)
+#define NV_BUILD_BRANCH_VERSION         "rel/gpu_drv/r535/r538_49-495"
+#define NV_BUILD_CHANGELIST_NUM         (34058561)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "rel/gpu_drv/r535/r538_27-451"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33992350)
+#define NV_BUILD_NAME                   "rel/gpu_drv/r535/r538_49-495"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34058561)

 #else     /* Windows builds */
-#define NV_BUILD_BRANCH_VERSION         "r538_27-6"
-#define NV_BUILD_CHANGELIST_NUM         (33992350)
+#define NV_BUILD_BRANCH_VERSION         "r538_49-2"
+#define NV_BUILD_CHANGELIST_NUM         (34058561)
 #define NV_BUILD_TYPE                   "Official"
-#define NV_BUILD_NAME                   "538.46"
-#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33992350)
+#define NV_BUILD_NAME                   "538.52"
+#define NV_LAST_OFFICIAL_CHANGELIST_NUM (34058561)
 #define NV_BUILD_BRANCH_BASE_VERSION    R535
 #endif
 // End buildmeister python edited section
--- a/src/common/inc/nvUnixVersion.h
+++ b/src/common/inc/nvUnixVersion.h
@ -4,7 +4,7 @@
 #if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
    (defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)

-#define NV_VERSION_STRING               "535.161.08"
+#define NV_VERSION_STRING               "535.171.04"

 #else

--- a/src/nvidia-modeset/Makefile
+++ b/src/nvidia-modeset/Makefile
@ -95,6 +95,7 @@ endif
 ifeq ($(TARGET_ARCH),aarch64)
  CFLAGS += -mgeneral-regs-only
  CFLAGS += -march=armv8-a
+  CFLAGS += -ffixed-x18
  CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
 endif

--- a/src/nvidia-modeset/include/nvkms-hdmi.h
+++ b/src/nvidia-modeset/include/nvkms-hdmi.h
@ -38,7 +38,7 @@ void nvUpdateHdmiInfoFrames(const NVDispEvoRec *pDispEvo,

 void nvDpyUpdateHdmiPreModesetEvo(NVDpyEvoPtr pDpyEvo);
 void nvDpyUpdateHdmiVRRCaps(NVDpyEvoPtr pDpyEvo);
-void nvUpdateHdmiCaps(NVDpyEvoPtr pDpyEvo);
+void nvSendHdmiCapsToRm(NVDpyEvoPtr pDpyEvo);

 void nvLogEdidCea861InfoEvo(NVDpyEvoPtr pDpyEvo,
                            NVEvoInfoStringPtr pInfoString);
--- a/src/nvidia-modeset/src/nvkms-dpy.c
+++ b/src/nvidia-modeset/src/nvkms-dpy.c
@ -71,7 +71,7 @@ static NvBool ValidateEdid                (const NVDpyEvoRec *pDpyEvo,
                                           const NvBool ignoreEdidChecksum);
 static void LogEdid                       (NVDpyEvoPtr pDpyEvo,
                                           NVEvoInfoStringPtr pInfoString);
-static void ClearEdid                     (NVDpyEvoPtr pDpyEvo);
+static void ClearEdid                     (NVDpyEvoPtr pDpyEvo, const NvBool bSendHdmiCapsToRm);
 static void ClearCustomEdid               (const NVDpyEvoRec *pDpyEvo);
 static void WriteEdidToResman             (const NVDpyEvoRec *pDpyEvo,
                                           const NVEdidRec *pEdid);
@ -90,14 +90,14 @@ static void AssignDpyEvoName              (NVDpyEvoPtr pDpyEvo);
 static NvBool IsConnectorTMDS             (NVConnectorEvoPtr);


-static void DpyDisconnectEvo(NVDpyEvoPtr pDpyEvo)
+static void DpyDisconnectEvo(NVDpyEvoPtr pDpyEvo, const NvBool bSendHdmiCapsToRm)
 {
    NVDispEvoPtr pDispEvo = pDpyEvo->pDispEvo;

    pDispEvo->connectedDisplays =
        nvDpyIdListMinusDpyId(pDispEvo->connectedDisplays, pDpyEvo->id);

-    ClearEdid(pDpyEvo);
+    ClearEdid(pDpyEvo, bSendHdmiCapsToRm);
 }

 static NvBool DpyConnectEvo(
@ -351,6 +351,7 @@ static void ApplyNewEdid(
    NVDpyEvoPtr pDpyEvo,
    const NVEdidRec *pEdid,
    const NVParsedEdidEvoRec *pParsedEdid,
+    const NvBool bSendHdmiCapsToRm,
    NVEvoInfoStringPtr pInfoString)
 {
    if (pDpyEvo->edid.buffer != NULL) {
@ -392,7 +393,9 @@ static void ApplyNewEdid(
        DpyAssignColorSpaceCaps(pDpyEvo, pInfoString);
    }

-    nvUpdateHdmiCaps(pDpyEvo);
+    if (bSendHdmiCapsToRm) {
+        nvSendHdmiCapsToRm(pDpyEvo);
+    }

    nvDpyProbeMaxPixelClock(pDpyEvo);

@ -574,7 +577,8 @@ static void ReadAndApplyEdidEvo(
         * worrying that this request has different parameters (like CustomEdid
         * or mode validation overrides).
         */
-        ApplyNewEdid(pDpyEvo, &edid, pParsedEdid, &infoString);
+        ApplyNewEdid(pDpyEvo, &edid, pParsedEdid, TRUE /* bSendHdmiCapsToRm */,
+                     &infoString);
    } else {
        nvFree(edid.buffer);
    }
@ -1844,14 +1848,15 @@ static void LogEdid(NVDpyEvoPtr pDpyEvo, NVEvoInfoStringPtr pInfoString)
 * structure.
 */

-static void ClearEdid(NVDpyEvoPtr pDpyEvo)
+static void ClearEdid(NVDpyEvoPtr pDpyEvo, const NvBool bSendHdmiCapsToRm)
 {
    NVEdidRec edid = { };
    NVEvoInfoStringRec infoString;
    nvInitInfoString(&infoString, NULL, 0);

    if (EdidHasChanged(pDpyEvo, &edid, NULL)) {
-        ApplyNewEdid(pDpyEvo, &edid, NULL, &infoString);
+        ApplyNewEdid(pDpyEvo, &edid, NULL,
+                     bSendHdmiCapsToRm, &infoString);
    }
 }

@ -2283,7 +2288,7 @@ NVDpyEvoPtr nvAllocDpyEvo(NVDispEvoPtr pDispEvo,

 void nvFreeDpyEvo(NVDispEvoPtr pDispEvo, NVDpyEvoPtr pDpyEvo)
 {
-    DpyDisconnectEvo(pDpyEvo);
+    DpyDisconnectEvo(pDpyEvo, FALSE /* bSendHdmiCapsToRm */);

    // Let the DP library host implementation handle deleting a pDpy as if the
    // library had notified it of a lost device.
@ -2826,7 +2831,7 @@ NvBool nvDpyGetDynamicData(
            return FALSE;
        }
    } else {
-        DpyDisconnectEvo(pDpyEvo);
+        DpyDisconnectEvo(pDpyEvo, TRUE /* bSendHdmiCapsToRm */);
    }

    if (nvConnectorUsesDPLib(pConnectorEvo)) {
--- a/src/nvidia-modeset/src/nvkms-evo.c
+++ b/src/nvidia-modeset/src/nvkms-evo.c
@ -8602,6 +8602,7 @@ void nvEvoEnableMergeModePreModeset(NVDispEvoRec *pDispEvo,
            pHC->serverLock = NV_EVO_RASTER_LOCK;
            pHC->serverLockPin = NV_EVO_LOCK_PIN_INTERNAL(primaryHead);
            pHC->setLockOffsetX = TRUE;
+            pHC->crashLockUnstallMode = FALSE;
        } else {
            pHC->clientLock = NV_EVO_RASTER_LOCK;
            pHC->clientLockPin = NV_EVO_LOCK_PIN_INTERNAL(primaryHead);
@ -8612,11 +8613,10 @@ void nvEvoEnableMergeModePreModeset(NVDispEvoRec *pDispEvo,
            } else {
                pHC->clientLockoutWindow = 2;
            }
+            pHC->crashLockUnstallMode =
+                (pTimings->vrr.type != NVKMS_DPY_VRR_TYPE_NONE);
        }

-        if (pTimings->vrr.type != NVKMS_DPY_VRR_TYPE_NONE) {
-            pHC->crashLockUnstallMode = TRUE;
-        }
        pHC->stereoLocked = FALSE;

        EvoUpdateHeadParams(pDispEvo, head, pUpdateState);
--- a/src/nvidia-modeset/src/nvkms-evo3.c
+++ b/src/nvidia-modeset/src/nvkms-evo3.c
@ -6639,11 +6639,18 @@ static void EvoSetStallLockC3(NVDispEvoPtr pDispEvo, const int head,
    NVEvoChannelPtr pChannel = pDevEvo->core;
    NVEvoSubDevPtr pEvoSubDev = &pDevEvo->gpus[pDispEvo->displayOwner];
    NVEvoHeadControlPtr pHC = &pEvoSubDev->headControl[head];
+    NvU32 data = 0x0;

    nvUpdateUpdateState(pDevEvo, updateState, pChannel);

+    if (pHC->crashLockUnstallMode) {
+        data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _CRASH_LOCK);
+    } else {
+        data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _LINE_LOCK);
+    }
+
    if (enable) {
-        NvU32 data = DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _TRUE) |
+        data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _TRUE) |
                DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _MODE, _ONE_SHOT);

        if (!pHC->useStallLockPin) {
@ -6657,20 +6664,12 @@ static void EvoSetStallLockC3(NVDispEvoPtr pDispEvo, const int head,
            data |= DRF_NUM(C37D, _HEAD_SET_STALL_LOCK, _LOCK_PIN,
                            NVC37D_HEAD_SET_STALL_LOCK_LOCK_PIN_LOCK_PIN(pin));
        }
-
-        if (pHC->crashLockUnstallMode) {
-            data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _CRASH_LOCK);
    } else {
-            data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _UNSTALL_MODE, _LINE_LOCK);
+        data |= DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _FALSE);
    }

    nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_STALL_LOCK(head), 1);
    nvDmaSetEvoMethodData(pChannel, data);
-    } else {
-        nvDmaSetStartEvoMethod(pChannel, NVC37D_HEAD_SET_STALL_LOCK(head), 1);
-        nvDmaSetEvoMethodData(pChannel,
-            DRF_DEF(C37D, _HEAD_SET_STALL_LOCK, _ENABLE, _FALSE));
-    }
 }

 static NvBool GetChannelState(NVDevEvoPtr pDevEvo,
--- a/src/nvidia-modeset/src/nvkms-hdmi.c
+++ b/src/nvidia-modeset/src/nvkms-hdmi.c
@ -203,7 +203,7 @@ NvBool nvDpyIsHdmiEvo(const NVDpyEvoRec *pDpyEvo)
 /*!
 * Updates the display's HDMI 2.0 capabilities to the RM.
 */
-void nvUpdateHdmiCaps(NVDpyEvoPtr pDpyEvo)
+void nvSendHdmiCapsToRm(NVDpyEvoPtr pDpyEvo)
 {
    NV0073_CTRL_SPECIFIC_SET_HDMI_SINK_CAPS_PARAMS params = { 0 };
    NVParsedEdidEvoPtr pParsedEdid = &pDpyEvo->parsedEdid;
@ -221,7 +221,7 @@ void nvUpdateHdmiCaps(NVDpyEvoPtr pDpyEvo)
    params.caps = 0;

    /*
-     * nvUpdateHdmiCaps() gets called on dpy's connect/disconnect events
+     * nvSendHdmiCapsToRm() gets called on dpy's connect/disconnect events
     * to set/clear capabilities, clear capabilities if parsed edid
     * is not valid.
     */
--- a/src/nvidia/Makefile
+++ b/src/nvidia/Makefile
@ -91,6 +91,7 @@ ifeq ($(TARGET_ARCH),aarch64)
  CFLAGS += -mgeneral-regs-only
  CFLAGS += -march=armv8-a
  CFLAGS += -mstrict-align
+  CFLAGS += -ffixed-x18
  CONDITIONAL_CFLAGS += $(call TEST_CC_ARG, -mno-outline-atomics)
 endif

--- a/src/nvidia/arch/nvalloc/common/inc/dev_ctrl_defines.h
+++ b/src/nvidia/arch/nvalloc/common/inc/dev_ctrl_defines.h
@ -1,5 +1,5 @@
 /*
- * SPDX-FileCopyrightText: Copyright (c) 2021-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 2021-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
@ -77,6 +77,9 @@
 #define NV_CTRL_INTR_GPU_VECTOR_TO_SUBTREE(i) \
    ((NV_CTRL_INTR_GPU_VECTOR_TO_LEAF_REG(i)) / 2)

+// First index of doorbell which is controlled by VF
+#define NV_CTRL_INTR_GPU_DOORBELL_INDEX_VF_START 2048
+
 // The max number of leaf registers we expect
 #define NV_MAX_INTR_LEAVES 16

--- a/src/nvidia/generated/g_nv_name_released.h
+++ b/src/nvidia/generated/g_nv_name_released.h
@ -1042,6 +1042,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
    { 0x28A0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
    { 0x28A1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
    { 0x28B8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Laptop GPU" },
+    { 0x28B9, 0x0000, 0x0000, "NVIDIA RTX 1000 Ada Generation Laptop GPU" },
+    { 0x28BB, 0x0000, 0x0000, "NVIDIA RTX 500 Ada Generation Laptop GPU" },
    { 0x28E0, 0x0000, 0x0000, "NVIDIA GeForce RTX 4060 Laptop GPU" },
    { 0x28E1, 0x0000, 0x0000, "NVIDIA GeForce RTX 4050 Laptop GPU" },
    { 0x28F8, 0x0000, 0x0000, "NVIDIA RTX 2000 Ada Generation Embedded GPU" },
--- a/src/nvidia/generated/g_vgpuconfigapi_nvoc.h
+++ b/src/nvidia/generated/g_vgpuconfigapi_nvoc.h
@ -7,7 +7,7 @@ extern "C" {
 #endif

 /*
- * SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+ * SPDX-FileCopyrightText: Copyright (c) 1993-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 * SPDX-License-Identifier: MIT
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
--- a/src/nvidia/inc/kernel/gpu/gsp/message_queue_priv.h
+++ b/src/nvidia/inc/kernel/gpu/gsp/message_queue_priv.h
@ -103,4 +103,24 @@ typedef struct MESSAGE_QUEUE_COLLECTION
 #define GSP_MSG_QUEUE_HEADER_SIZE                                   RM_PAGE_SIZE
 #define GSP_MSG_QUEUE_HEADER_ALIGN                                             4   // 2 ^ 4 = 16

+/*!
+ * Calculate 32-bit checksum
+ *
+ * This routine assumes that the data is padded out with zeros to the next
+ * 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
+ */
+static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
+{
+    NvU64 *p        = (NvU64 *)pData;
+    NvU64 *pEnd     = (NvU64 *)((NvUPtr)pData + uLen);
+    NvU64  checkSum = 0;
+
+    NV_ASSERT_CHECKED(uLen > 0);
+
+    while (p < pEnd)
+        checkSum ^= *p++;
+
+    return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
+}
+
 #endif // _MESSAGE_QUEUE_PRIV_H_
--- a/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c
+++ b/src/nvidia/src/kernel/gpu/fsp/kern_fsp.c
@ -244,32 +244,50 @@ kfspPollForQueueEmpty_IMPL
    KernelFsp *pKernelFsp
 )
 {
+    NV_STATUS status = NV_OK;
    RMTIMEOUT timeout;

-    gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout, GPU_TIMEOUT_FLAGS_OSTIMER | GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);
+    gpuSetTimeout(pGpu, GPU_TIMEOUT_DEFAULT, &timeout,
+        GPU_TIMEOUT_FLAGS_OSTIMER |
+        GPU_TIMEOUT_FLAGS_BYPASS_THREAD_STATE);

    while (!kfspIsQueueEmpty(pGpu, pKernelFsp))
    {
        //
-        // For now we assume that any response from FSP before RM message send is complete
-        // indicates an error and we should abort.
+        // For now we assume that any response from FSP before RM message
+        // send is complete indicates an error and we should abort.
+        //
+        // Ongoing dicussion on usefullness of this check. Bug to be filed.
        //
        if (!kfspIsMsgQueueEmpty(pGpu, pKernelFsp))
        {
            kfspReadMessage(pGpu, pKernelFsp, NULL, 0);
-            NV_PRINTF(LEVEL_ERROR, "Received error message from FSP while waiting for CMDQ to be empty.\n");
-            return NV_ERR_GENERIC;
+            NV_PRINTF(LEVEL_ERROR,
+                "Received error message from FSP while waiting for CMDQ to be empty.\n");
+            status = NV_ERR_GENERIC;
+            break;
        }

-        if (gpuCheckTimeout(pGpu, &timeout) == NV_ERR_TIMEOUT)
-        {
-            NV_PRINTF(LEVEL_ERROR, "Timed out waiting for FSP command queue to be empty.\n");
-            return NV_ERR_TIMEOUT;
-        }
        osSpinLoop();
+
+        status = gpuCheckTimeout(pGpu, &timeout);
+        if (status != NV_OK)
+        {
+            if ((status == NV_ERR_TIMEOUT) &&
+                kfspIsQueueEmpty(pGpu, pKernelFsp))
+            {
+                status = NV_OK;
+            }
+            else
+            {
+                NV_PRINTF(LEVEL_ERROR,
+                    "Timed out waiting for FSP command queue to be empty.\n");
+            }
+            break;
+        }
    }

-    return NV_OK;
+    return status;
 }

 /*!
--- a/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c
+++ b/src/nvidia/src/kernel/gpu/gsp/message_queue_cpu.c
@ -476,24 +476,6 @@ void GspMsgQueuesCleanup(MESSAGE_QUEUE_COLLECTION **ppMQCollection)
    *ppMQCollection = NULL;
 }

-/*!
- * Calculate 32-bit checksum
- *
- * This routine assumes that the data is padded out with zeros to the next
- * 8-byte alignment, and it is OK to read past the end to the 8-byte alignment.
- */
-static NV_INLINE NvU32 _checkSum32(void *pData, NvU32 uLen)
-{
-    NvU64 *p        = (NvU64 *)pData;
-    NvU64 *pEnd     = (NvU64 *)((NvUPtr)pData + uLen);
-    NvU64  checkSum = 0;
-
-    while (p < pEnd)
-        checkSum ^= *p++;
-
-    return NvU64_HI32(checkSum) ^ NvU64_LO32(checkSum);
-}
-
 /*!
 * GspMsgQueueSendCommand
 *
@ -533,7 +515,7 @@ NV_STATUS GspMsgQueueSendCommand(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)

    pCQE->seqNum    = pMQI->txSeqNum;
    pCQE->elemCount = GSP_MSG_QUEUE_BYTES_TO_ELEMENTS(uElementSize);
-    pCQE->checkSum  = 0;
+    pCQE->checkSum  = 0; // The checkSum field is included in the checksum calculation, so zero it.

    ConfidentialCompute *pCC = GPU_GET_CONF_COMPUTE(pGpu);
    if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_ENABLED))
@ -660,7 +642,8 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
    NvU32       nRetries;
    NvU32       nMaxRetries  = 3;
    NvU32       nElements    = 1;  // Assume record fits in one queue element for now.
-    NvU32       uElementSize = 0;
+    NvU32       uElementSize;
+    NvU32       checkSum;
    NvU32       seqMismatchDiff = NV_U32_MAX;
    NV_STATUS   nvStatus     = NV_OK;
    ConfidentialCompute *pCC = NULL;
@ -713,15 +696,23 @@ NV_STATUS GspMsgQueueReceiveStatus(MESSAGE_QUEUE_INFO *pMQI, OBJGPU *pGpu)
        pCC = GPU_GET_CONF_COMPUTE(pGpu);
        if (pCC != NULL && pCC->getProperty(pCC, PDB_PROP_CONFCOMPUTE_ENCRYPT_READY))
        {
-            // In Confidential Compute scenario, checksum includes complete element range.
-            if (_checkSum32(pMQI->pCmdQueueElement, (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN)) != 0)
-            {
-                NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
-                nvStatus = NV_ERR_INVALID_DATA;
-                continue;
-            }
+            //
+            // In the Confidential Compute scenario, the actual message length
+            // is inside the encrypted payload, and we can't access it before
+            // decryption, therefore the checksum encompasses the whole element
+            // range. This makes checksum verification significantly slower
+            // because messages are typically much smaller than element size.
+            //
+            checkSum = _checkSum32(pMQI->pCmdQueueElement,
+                                   (nElements * GSP_MSG_QUEUE_ELEMENT_SIZE_MIN));
        } else
-        if (_checkSum32(pMQI->pCmdQueueElement, uElementSize) != 0)
+        {
+            checkSum = _checkSum32(pMQI->pCmdQueueElement,
+                                   (GSP_MSG_QUEUE_ELEMENT_HDR_SIZE +
+                                    pMQI->pCmdQueueElement->rpc.length));
+        }
+
+        if (checkSum != 0)
        {
            NV_PRINTF(LEVEL_ERROR, "Bad checksum.\n");
            nvStatus = NV_ERR_INVALID_DATA;
--- a/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c
+++ b/src/nvidia/src/kernel/gpu/mem_mgr/mem_desc.c
@ -1587,6 +1587,7 @@ memdescFree
        }

        if (pMemDesc->_addressSpace != ADDR_FBMEM &&
+            pMemDesc->_addressSpace != ADDR_EGM &&
            pMemDesc->_addressSpace != ADDR_SYSMEM)
        {
            return;
@ -1991,6 +1992,7 @@ memdescUnmap
    switch (pMemDesc->_addressSpace)
    {
        case ADDR_SYSMEM:
+        case ADDR_EGM:
        {
            osUnmapSystemMemory(pMemDesc, Kernel, ProcessId, Address, Priv);
            break;
--- a/src/nvidia/src/kernel/rmapi/mapping_cpu.c
+++ b/src/nvidia/src/kernel/rmapi/mapping_cpu.c
@ -733,8 +733,9 @@ memUnmap_IMPL
        //
    }
    // System Memory case
-    else if ((pGpu == NULL) || ((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM) &&
-                                 FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
+    else if ((pGpu == NULL) || (((memdescGetAddressSpace(pMemDesc) == ADDR_SYSMEM)
+                                 || (memdescGetAddressSpace(pMemDesc) == ADDR_EGM)
+                                ) && FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags)))
    {
        if (FLD_TEST_DRF(OS33, _FLAGS, _MAPPING, _DIRECT, pCpuMapping->flags))
        {
--- a/version.mk
+++ b/version.mk
@ -1,4 +1,4 @@
-NVIDIA_VERSION = 535.161.08
+NVIDIA_VERSION = 535.171.04

 # This file.
 VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))