open-gpu-kernel-modules/kernel-open/nvidia-uvm/uvm_channel.c

/*******************************************************************************
    Copyright (c) 2015-2022 NVIDIA Corporation

    Permission is hereby granted, free of charge, to any person obtaining a copy
    of this software and associated documentation files (the "Software"), to
    deal in the Software without restriction, including without limitation the
    rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
    sell copies of the Software, and to permit persons to whom the Software is
    furnished to do so, subject to the following conditions:

        The above copyright notice and this permission notice shall be
        included in all copies or substantial portions of the Software.

    THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
    THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
    FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
    DEALINGS IN THE SOFTWARE.

*******************************************************************************/

#include "uvm_channel.h"

#include "uvm_api.h"
#include "uvm_common.h"
#include "uvm_global.h"
#include "uvm_hal.h"
#include "uvm_procfs.h"
#include "uvm_push.h"
#include "uvm_gpu_semaphore.h"
#include "uvm_lock.h"
#include "uvm_kvmalloc.h"

#include "nv_uvm_interface.h"
#include "clb06f.h"

static unsigned uvm_channel_num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;

#define UVM_CHANNEL_GPFIFO_LOC_DEFAULT "auto"

static char *uvm_channel_gpfifo_loc = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;

#define UVM_CHANNEL_GPPUT_LOC_DEFAULT "auto"

static char *uvm_channel_gpput_loc = UVM_CHANNEL_GPPUT_LOC_DEFAULT;

#define UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT "auto"

static char *uvm_channel_pushbuffer_loc = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;

module_param(uvm_channel_num_gpfifo_entries, uint, S_IRUGO);
module_param(uvm_channel_gpfifo_loc, charp, S_IRUGO);
module_param(uvm_channel_gpput_loc, charp, S_IRUGO);
module_param(uvm_channel_pushbuffer_loc, charp, S_IRUGO);

static NV_STATUS manager_create_procfs_dirs(uvm_channel_manager_t *manager);
static NV_STATUS manager_create_procfs(uvm_channel_manager_t *manager);
static NV_STATUS channel_create_procfs(uvm_channel_t *channel);

typedef enum
{
    // Only remove completed GPFIFO entries from the pushbuffer
    UVM_CHANNEL_UPDATE_MODE_COMPLETED,

    // Remove all remaining GPFIFO entries from the pushbuffer, regardless of
    // whether they're actually done yet.
    UVM_CHANNEL_UPDATE_MODE_FORCE_ALL
} uvm_channel_update_mode_t;

static void channel_pool_lock_init(uvm_channel_pool_t *pool)
{
    if (uvm_channel_pool_is_proxy(pool))
        uvm_mutex_init(&pool->mutex, UVM_LOCK_ORDER_CHANNEL);
    else
        uvm_spin_lock_init(&pool->spinlock, UVM_LOCK_ORDER_CHANNEL);
}

void uvm_channel_pool_lock(uvm_channel_pool_t *pool)
{
    if (uvm_channel_pool_is_proxy(pool))
        uvm_mutex_lock(&pool->mutex);
    else
        uvm_spin_lock(&pool->spinlock);
}

void uvm_channel_pool_unlock(uvm_channel_pool_t *pool)
{
    if (uvm_channel_pool_is_proxy(pool))
        uvm_mutex_unlock(&pool->mutex);
    else
        uvm_spin_unlock(&pool->spinlock);
}

void uvm_channel_pool_assert_locked(uvm_channel_pool_t *pool)
{
    if (uvm_channel_pool_is_proxy(pool))
        uvm_assert_mutex_locked(&pool->mutex);
    else
        uvm_assert_spinlock_locked(&pool->spinlock);
}

// Update channel progress, completing up to max_to_complete entries
static NvU32 uvm_channel_update_progress_with_max(uvm_channel_t *channel,
                                                  NvU32 max_to_complete,
                                                  uvm_channel_update_mode_t mode)
{
    NvU32 gpu_get;
    NvU32 cpu_put;
    NvU32 completed_count = 0;
    NvU32 pending_gpfifos;

    NvU64 completed_value = uvm_channel_update_completed_value(channel);

    uvm_channel_pool_lock(channel->pool);

    // Completed value should never exceed the queued value
    UVM_ASSERT_MSG_RELEASE(completed_value <= channel->tracking_sem.queued_value,
                           "GPU %s channel %s unexpected completed_value 0x%llx > queued_value 0x%llx\n",
                           channel->pool->manager->gpu->parent->name, channel->name, completed_value,
                           channel->tracking_sem.queued_value);

    cpu_put = channel->cpu_put;
    gpu_get = channel->gpu_get;

    while (gpu_get != cpu_put && completed_count < max_to_complete) {
        uvm_gpfifo_entry_t *entry = &channel->gpfifo_entries[gpu_get];

        if (mode == UVM_CHANNEL_UPDATE_MODE_COMPLETED && entry->tracking_semaphore_value > completed_value)
            break;

        if (entry->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL) {
            uvm_pushbuffer_mark_completed(channel->pool->manager->pushbuffer, entry);
            list_add_tail(&entry->push_info->available_list_node, &channel->available_push_infos);
        }

        gpu_get = (gpu_get + 1) % channel->num_gpfifo_entries;
        ++completed_count;
    }

    channel->gpu_get = gpu_get;

    uvm_channel_pool_unlock(channel->pool);

    if (cpu_put >= gpu_get)
        pending_gpfifos = cpu_put - gpu_get;
    else
        pending_gpfifos = channel->num_gpfifo_entries - gpu_get + cpu_put;

    return pending_gpfifos;
}

NvU32 uvm_channel_update_progress(uvm_channel_t *channel)
{
    // By default, don't complete too many entries at a time to spread the cost
    // of doing so across callers and avoid holding a spin lock for too long.
    return uvm_channel_update_progress_with_max(channel, 8, UVM_CHANNEL_UPDATE_MODE_COMPLETED);
}

// Update progress for all pending GPFIFO entries. This might take a longer time
// and should be only used in exceptional circumstances like when a channel
// error is encountered. Otherwise, uvm_chanel_update_progress() should be used.
static NvU32 channel_update_progress_all(uvm_channel_t *channel, uvm_channel_update_mode_t mode)
{
    return uvm_channel_update_progress_with_max(channel, channel->num_gpfifo_entries, mode);
}

NvU32 uvm_channel_update_progress_all(uvm_channel_t *channel)
{
    return channel_update_progress_all(channel, UVM_CHANNEL_UPDATE_MODE_COMPLETED);
}

NvU32 uvm_channel_manager_update_progress(uvm_channel_manager_t *channel_manager)
{
    NvU32 pending_gpfifos = 0;
    uvm_channel_pool_t *pool;

    uvm_for_each_pool(pool, channel_manager) {
        uvm_channel_t *channel;

        uvm_for_each_channel_in_pool(channel, pool)
            pending_gpfifos += uvm_channel_update_progress(channel);
    }

    return pending_gpfifos;
}

static bool channel_is_available(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
    NvU32 pending_entries;

    uvm_channel_pool_assert_locked(channel->pool);

    if (channel->cpu_put >= channel->gpu_get)
        pending_entries = channel->cpu_put - channel->gpu_get;
    else
        pending_entries = channel->cpu_put + channel->num_gpfifo_entries - channel->gpu_get;

    return (pending_entries + channel->current_gpfifo_count + num_gpfifo_entries < channel->num_gpfifo_entries);
}

static bool try_claim_channel(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
    bool claimed = false;

    UVM_ASSERT(num_gpfifo_entries > 0);
    UVM_ASSERT(num_gpfifo_entries < channel->num_gpfifo_entries);

    uvm_channel_pool_lock(channel->pool);

    if (channel_is_available(channel, num_gpfifo_entries)) {
        channel->current_gpfifo_count += num_gpfifo_entries;
        claimed = true;
    }

    uvm_channel_pool_unlock(channel->pool);

    return claimed;
}

static void lock_push(uvm_channel_t *channel)
{
}

static void unlock_push(uvm_channel_t *channel)
{
}

static bool trylock_push(uvm_channel_t *channel)
{
    return true;
}

// Reserve a channel in the specified pool
static NV_STATUS channel_reserve_in_pool(uvm_channel_pool_t *pool, uvm_channel_t **channel_out)
{
    uvm_channel_t *channel;
    uvm_spin_loop_t spin;

    UVM_ASSERT(pool);

    uvm_for_each_channel_in_pool(channel, pool) {
        // TODO: Bug 1764953: Prefer idle/less busy channels
        if (trylock_push(channel)) {
            if (try_claim_channel(channel, 1)) {
                *channel_out = channel;
                return NV_OK;
            }
            else {
                unlock_push(channel);
            }
        }
    }

    uvm_spin_loop_init(&spin);
    while (1) {
        uvm_for_each_channel_in_pool(channel, pool) {
            NV_STATUS status;

            uvm_channel_update_progress(channel);

            if (try_claim_channel(channel, 1)) {
                lock_push(channel);
                *channel_out = channel;

                return NV_OK;
            }

            status = uvm_channel_check_errors(channel);
            if (status != NV_OK)
                return status;

            UVM_SPIN_LOOP(&spin);
        }
    }

    UVM_ASSERT_MSG(0, "Cannot get here?!\n");
    return NV_ERR_GENERIC;
}

NV_STATUS uvm_channel_reserve_type(uvm_channel_manager_t *manager, uvm_channel_type_t type, uvm_channel_t **channel_out)
{
    UVM_ASSERT(type < UVM_CHANNEL_TYPE_COUNT);

    return channel_reserve_in_pool(manager->pool_to_use.default_for_type[type], channel_out);
}

NV_STATUS uvm_channel_reserve_gpu_to_gpu(uvm_channel_manager_t *manager,
                                         uvm_gpu_t *dst_gpu,
                                         uvm_channel_t **channel_out)
{
    const NvU32 dst_gpu_index = uvm_id_gpu_index(dst_gpu->id);
    uvm_channel_pool_t *pool = manager->pool_to_use.gpu_to_gpu[dst_gpu_index];

    // If there is no recommended pool for the given GPU pair, use default
    if (pool == NULL)
        pool = manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU];

    UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);

    return channel_reserve_in_pool(pool, channel_out);
}

NV_STATUS uvm_channel_manager_wait(uvm_channel_manager_t *manager)
{
    NV_STATUS status = NV_OK;
    uvm_spin_loop_t spin;

    if (uvm_channel_manager_update_progress(manager) == 0)
        return uvm_channel_manager_check_errors(manager);

    uvm_spin_loop_init(&spin);
    while (uvm_channel_manager_update_progress(manager) > 0 && status == NV_OK) {
        UVM_SPIN_LOOP(&spin);
        status = uvm_channel_manager_check_errors(manager);
    }

    return status;
}

static NvU32 channel_get_available_push_info_index(uvm_channel_t *channel)
{
    uvm_push_info_t *push_info;

    uvm_channel_pool_lock(channel->pool);

    push_info = list_first_entry_or_null(&channel->available_push_infos, uvm_push_info_t, available_list_node);
    UVM_ASSERT(push_info != NULL);
    UVM_ASSERT(push_info->on_complete == NULL && push_info->on_complete_data == NULL);
    list_del(&push_info->available_list_node);

    uvm_channel_pool_unlock(channel->pool);

    return push_info - channel->push_infos;
}

NV_STATUS uvm_channel_begin_push(uvm_channel_t *channel, uvm_push_t *push)
{
    NV_STATUS status;
    uvm_channel_manager_t *manager;

    UVM_ASSERT(channel);
    UVM_ASSERT(push);

    manager = channel->pool->manager;

    status = uvm_pushbuffer_begin_push(manager->pushbuffer, push);
    if (status != NV_OK)
        return status;

    push->channel = channel;
    push->channel_tracking_value = 0;
    push->push_info_index = channel_get_available_push_info_index(channel);

    return NV_OK;
}

static void internal_channel_submit_work(uvm_push_t *push, NvU32 push_size, NvU32 new_gpu_put)
{
    NvU64 *gpfifo_entry;
    NvU64 pushbuffer_va;
    uvm_channel_t *channel = push->channel;
    uvm_channel_manager_t *channel_manager = channel->pool->manager;
    uvm_pushbuffer_t *pushbuffer = channel_manager->pushbuffer;
    uvm_gpu_t *gpu = channel_manager->gpu;

    BUILD_BUG_ON(sizeof(*gpfifo_entry) != NVB06F_GP_ENTRY__SIZE);
    UVM_ASSERT(!uvm_channel_is_proxy(channel));

    gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + channel->cpu_put;
    pushbuffer_va = uvm_pushbuffer_get_gpu_va_for_push(pushbuffer, push);

    gpu->parent->host_hal->set_gpfifo_entry(gpfifo_entry, pushbuffer_va, push_size);

    // Need to make sure all the pushbuffer and the GPFIFO entries writes
    // complete before updating GPPUT. We also don't want any reads to be moved
    // after the GPPut write as the GPU might modify the data they read as soon
    // as the GPPut write happens.
    mb();

    gpu->parent->host_hal->write_gpu_put(channel, new_gpu_put);
}

static void proxy_channel_submit_work(uvm_push_t *push, NvU32 push_size)
{
    NV_STATUS status;
    uvm_channel_t *channel = push->channel;

    UVM_ASSERT(uvm_channel_is_proxy(channel));

    status = nvUvmInterfacePagingChannelPushStream(channel->proxy.handle, (char *) push->begin, push_size);

    if (status != NV_OK) {
        uvm_push_info_t *push_info = uvm_push_info_from_push(push);

        // If the RM invocation fails, there is no clean recovery available
        // (for example, the vGPU plugin may have crashed), so swallow the error
        // but print a descriptive message about the failed push.
        UVM_ASSERT_MSG(status == NV_OK,
                       "nvUvmInterfacePagingChannelPushStream() failed: %s, GPU %s, push '%s' started at %s:%d in %s()\n",
                       nvstatusToString(status),
                       uvm_gpu_name(uvm_channel_get_gpu(channel)),
                       push_info->description,
                       push_info->filename,
                       push_info->line,
                       push_info->function);
    }
}

static void uvm_channel_semaphore_release(uvm_push_t *push, NvU64 semaphore_va, NvU32 new_payload)
{
    uvm_gpu_t *gpu = uvm_push_get_gpu(push);

    // We used to skip the membar or use membar GPU for the semaphore release
    // for a few pushes, but that doesn't provide sufficient ordering guarantees
    // in some cases (e.g. ga100 with an LCE with PCEs from both HSHUBs) for the
    // semaphore writes. To be safe, just always uses a membar sys for now.
    // TODO bug 3770539: Optimize membars used by end of push semaphore releases
    (void)uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_GPU);
    (void)uvm_push_get_and_reset_flag(push, UVM_PUSH_FLAG_NEXT_MEMBAR_NONE);

    if (uvm_channel_is_ce(push->channel))
        gpu->parent->ce_hal->semaphore_release(push, semaphore_va, new_payload);
    else
        UVM_ASSERT_MSG(0, "Semaphore release on an unsupported channel.\n");
}

void uvm_channel_end_push(uvm_push_t *push)
{
    uvm_channel_t *channel = push->channel;
    uvm_channel_manager_t *channel_manager = channel->pool->manager;
    uvm_pushbuffer_t *pushbuffer = channel_manager->pushbuffer;
    uvm_gpfifo_entry_t *entry;
    NvU64 semaphore_va;
    NvU64 new_tracking_value;
    NvU32 new_payload;
    NvU32 push_size;
    NvU32 cpu_put;
    NvU32 new_cpu_put;

    uvm_channel_pool_lock(channel->pool);

    new_tracking_value = ++channel->tracking_sem.queued_value;
    new_payload = (NvU32)new_tracking_value;

    semaphore_va = uvm_channel_tracking_semaphore_get_gpu_va(channel);
    uvm_channel_semaphore_release(push, semaphore_va, new_payload);

    push_size = uvm_push_get_size(push);
    UVM_ASSERT_MSG(push_size <= UVM_MAX_PUSH_SIZE, "push size %u\n", push_size);

    cpu_put = channel->cpu_put;
    new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;

    entry = &channel->gpfifo_entries[cpu_put];
    entry->tracking_semaphore_value = new_tracking_value;
    entry->pushbuffer_offset = uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
    entry->pushbuffer_size = push_size;
    entry->push_info = &channel->push_infos[push->push_info_index];
    entry->type = UVM_GPFIFO_ENTRY_TYPE_NORMAL;

    UVM_ASSERT(channel->current_gpfifo_count > 0);
    --channel->current_gpfifo_count;

    if (uvm_channel_is_proxy(channel))
        proxy_channel_submit_work(push, push_size);
    else
        internal_channel_submit_work(push, push_size, new_cpu_put);

    channel->cpu_put = new_cpu_put;

    uvm_pushbuffer_end_push(pushbuffer, push, entry);

    // The moment the channel is unlocked uvm_channel_update_progress_with_max()
    // may notice the GPU work to be completed and hence all state tracking the
    // push must be updated before that. Notably uvm_pushbuffer_end_push() has
    // to be called first.
    uvm_channel_pool_unlock(channel->pool);
    unlock_push(channel);

    // This memory barrier is borrowed from CUDA, as it supposedly fixes perf
    // issues on some systems. Comment from CUDA: "fixes throughput-related
    // performance problems, e.g. bugs 626179, 593841. This may be related to
    // bug 124888, which GL works around by doing a clflush"
    wmb();

    push->push_info_index = channel->num_gpfifo_entries;
    push->channel_tracking_value = new_tracking_value;
}

// The caller must submit a normal GPFIFO entry with a semaphore release
// following the control GPFIFO, refer to uvm_channel_write_ctrl_gpfifo() for an
// example.
static void write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
{
    uvm_gpfifo_entry_t *entry;
    NvU64 *gpfifo_entry;
    NvU32 cpu_put;
    NvU32 new_cpu_put;
    uvm_gpu_t *gpu = channel->pool->manager->gpu;

    uvm_channel_pool_lock(channel->pool);

    cpu_put = channel->cpu_put;
    new_cpu_put = (cpu_put + 1) % channel->num_gpfifo_entries;

    entry = &channel->gpfifo_entries[cpu_put];
    memset(entry, 0, sizeof(*entry));
    entry->type = UVM_GPFIFO_ENTRY_TYPE_CONTROL;

    // Control GPFIFO entries are followed by a semaphore_release push in UVM.
    // We add the tracking semaphore value of the next GPFIFO entry,
    // potentially the associated semaphore release push. Even if a different
    // GPFIFO entry sneaks in, the purposes of signaling that this control
    // GPFIFO entry has been processed is accomplished.
    entry->tracking_semaphore_value = channel->tracking_sem.queued_value + 1;

    UVM_ASSERT(channel->current_gpfifo_count > 1);
    --channel->current_gpfifo_count;

    gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + cpu_put;
    *gpfifo_entry = ctrl_fifo_entry_value;

    // Need to make sure all the GPFIFO entries writes complete before updating
    // GPPUT. We also don't want any reads to be moved after the GPPut write as
    // the GPU might modify the data they read as soon as the GPPut write
    // happens.
    mb();

    gpu->parent->host_hal->write_gpu_put(channel, new_cpu_put);

    channel->cpu_put = new_cpu_put;

    // The moment the channel is unlocked uvm_channel_update_progress_with_max()
    // may notice the GPU work to be completed and hence all state tracking the
    // push must be updated before that.
    uvm_channel_pool_unlock(channel->pool);
    unlock_push(channel);

    // This memory barrier is borrowed from CUDA, as it supposedly fixes perf
    // issues on some systems. Comment from CUDA: "fixes throughput-related
    // performance problems, e.g. bugs 626179, 593841. This may be related to
    // bug 124888, which GL works around by doing a clflush"
    wmb();
}

NV_STATUS uvm_channel_write_ctrl_gpfifo(uvm_channel_t *channel, NvU64 ctrl_fifo_entry_value)
{
    NV_STATUS status;
    uvm_gpu_t *gpu = channel->pool->manager->gpu;
    uvm_push_t push;

    UVM_ASSERT(!uvm_channel_is_proxy(channel));

    // We reserve two GPFIFO entries, i.e., the control GPFIFO entry and the
    // subsequent semaphore_release push. There is, however, a potential case
    // for GPFIFO control submission starvation. This may happen because a
    // GPFIFO control submission requires two available GPFIFO entries. If you
    // have a full GPFIFO ring buffer that frees one entry at a time, while
    // there is another thread consuming this recently released entry at the
    // same rate, a concurrent thread trying to reserve two entries for a GPFIFO
    // control submission may starve. We could handle this by imposing minimal
    // release entries in uvm_channel.c:uvm_channel_update_progress(). Instead,
    // we don't deal with this potential starvation case because:
    // - Control GPFIFO are rarely used.
    // - By default, we release up to 8 GPFIFO entries at a time, except if the
    //   release rate is constrained by lengthy pushbuffers -- another rare
    //   situation.
    // - It would add unnecessary complexity to channel_update_progress().
    status = uvm_channel_reserve(channel, 2);
    if (status != NV_OK)
        return status;

    write_ctrl_gpfifo(channel, ctrl_fifo_entry_value);

    status = uvm_push_begin_on_reserved_channel(channel, &push, "write_ctrl_GPFIFO");
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
        return status;
    }

    // This is an empty push, the push's embedded semaphore_release signals that
    // the GPFIFO control entry has been processed.
    uvm_push_end(&push);

    return NV_OK;
}

NV_STATUS uvm_channel_reserve(uvm_channel_t *channel, NvU32 num_gpfifo_entries)
{
    NV_STATUS status = NV_OK;
    uvm_spin_loop_t spin;

    if (try_claim_channel(channel, num_gpfifo_entries))
        goto out;

    uvm_channel_update_progress(channel);

    uvm_spin_loop_init(&spin);
    while (!try_claim_channel(channel, num_gpfifo_entries) && status == NV_OK) {
        UVM_SPIN_LOOP(&spin);
        status = uvm_channel_check_errors(channel);
        uvm_channel_update_progress(channel);
    }

out:
    if (status == NV_OK)
        lock_push(channel);

    return status;
}

// Get the first pending GPFIFO entry, if any.
// This doesn't stop the entry from being reused.
static uvm_gpfifo_entry_t *uvm_channel_get_first_pending_entry(uvm_channel_t *channel)
{
    uvm_gpfifo_entry_t *entry = NULL;
    NvU32 pending_count = channel_update_progress_all(channel, UVM_CHANNEL_UPDATE_MODE_COMPLETED);

    if (pending_count == 0)
        return NULL;

    uvm_channel_pool_lock(channel->pool);

    if (channel->gpu_get != channel->cpu_put)
        entry = &channel->gpfifo_entries[channel->gpu_get];

    uvm_channel_pool_unlock(channel->pool);

    return entry;
}

NV_STATUS uvm_channel_get_status(uvm_channel_t *channel)
{
    uvm_gpu_t *gpu;
    NvNotification *errorNotifier;

    if (uvm_channel_is_proxy(channel))
        errorNotifier = channel->proxy.channel_info.shadowErrorNotifier;
    else
        errorNotifier = channel->channel_info.errorNotifier;

    if (errorNotifier->status == 0)
        return NV_OK;

    // In case we hit a channel error, check the ECC error notifier as well so
    // that a more precise ECC error can be returned in case there is indeed an
    // ECC error.
    //
    // Notably this might be racy depending on the ordering of the notifications,
    // but we can't always call RM to service interrupts from this context.
    gpu = uvm_channel_get_gpu(channel);
    if (gpu->ecc.enabled && *gpu->ecc.error_notifier)
        return NV_ERR_ECC_ERROR;

    return NV_ERR_RC_ERROR;
}

uvm_gpfifo_entry_t *uvm_channel_get_fatal_entry(uvm_channel_t *channel)
{
    UVM_ASSERT(uvm_channel_get_status(channel) != NV_OK);

    return uvm_channel_get_first_pending_entry(channel);
}

NV_STATUS uvm_channel_check_errors(uvm_channel_t *channel)
{
    uvm_gpfifo_entry_t *fatal_entry;
    NV_STATUS status = uvm_channel_get_status(channel);

    if (status == NV_OK)
        return NV_OK;

    UVM_ERR_PRINT("Detected a channel error, channel %s GPU %s\n",
                  channel->name,
                  uvm_gpu_name(uvm_channel_get_gpu(channel)));

    fatal_entry = uvm_channel_get_fatal_entry(channel);
    if (fatal_entry != NULL) {
        if (fatal_entry->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL) {
            uvm_push_info_t *push_info = fatal_entry->push_info;
            UVM_ERR_PRINT("Channel error likely caused by push '%s' started at %s:%d in %s()\n",
                          push_info->description,
                          push_info->filename,
                          push_info->line,
                          push_info->function);
        }
        else {
            NvU64 *gpfifo_entry;

            UVM_ASSERT(!uvm_channel_is_proxy(channel));

            gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + (fatal_entry - channel->gpfifo_entries);
            UVM_ERR_PRINT("Channel error likely caused by GPFIFO control entry, data: 0x%llx, gpu_get: %d\n",
                          *gpfifo_entry,
                          channel->gpu_get);
        }
    }

    uvm_global_set_fatal_error(status);

    return status;
}

NV_STATUS uvm_channel_manager_check_errors(uvm_channel_manager_t *channel_manager)
{
    uvm_channel_pool_t *pool;
    NV_STATUS status = uvm_global_get_status();

    if (status != NV_OK)
        return status;

    uvm_for_each_pool(pool, channel_manager) {
        uvm_channel_t *channel;

        uvm_for_each_channel_in_pool(channel, pool) {
            status = uvm_channel_check_errors(channel);
            if (status != NV_OK)
                return status;
        }
    }

    return status;
}

bool uvm_channel_is_value_completed(uvm_channel_t *channel, NvU64 value)
{
    return uvm_gpu_tracking_semaphore_is_value_completed(&channel->tracking_sem, value);
}

NvU64 uvm_channel_update_completed_value(uvm_channel_t *channel)
{
    return uvm_gpu_tracking_semaphore_update_completed_value(&channel->tracking_sem);
}

static void channel_destroy(uvm_channel_pool_t *pool, uvm_channel_t *channel)
{
    UVM_ASSERT(pool->num_channels > 0);

    if (channel->tracking_sem.queued_value > 0) {
        // The channel should have been idled before being destroyed, unless an
        // error was triggered. We need to check both error cases (global and
        // channel) to handle the UVM_TEST_CHANNEL_SANITY unit test.
        if (uvm_global_get_status() == NV_OK && uvm_channel_get_status(channel) == NV_OK)
            UVM_ASSERT(uvm_gpu_tracking_semaphore_is_completed(&channel->tracking_sem));

        // Remove all remaining GPFIFOs from their pushbuffer chunk, since the
        // pushbuffer has a longer lifetime.
        channel_update_progress_all(channel, UVM_CHANNEL_UPDATE_MODE_FORCE_ALL);
    }

    proc_remove(channel->procfs.pushes);
    proc_remove(channel->procfs.info);
    proc_remove(channel->procfs.dir);

    uvm_kvfree(channel->push_acquire_infos);
    uvm_kvfree(channel->push_infos);

    uvm_kvfree(channel->gpfifo_entries);

    if (uvm_channel_is_proxy(channel))
        uvm_rm_locked_call_void(nvUvmInterfacePagingChannelDestroy(channel->proxy.handle));
    else
        uvm_rm_locked_call_void(nvUvmInterfaceChannelDestroy(channel->handle));

    uvm_gpu_tracking_semaphore_free(&channel->tracking_sem);

    UVM_ASSERT(list_empty(&channel->tools.channel_list_node));
    UVM_ASSERT(channel->tools.pending_event_count == 0);

    pool->num_channels--;
}

static NV_STATUS internal_channel_create(uvm_channel_t *channel, unsigned engine_index)
{
    NV_STATUS status;
    UvmGpuChannelAllocParams channel_alloc_params;
    UvmGpuChannelInfo *channel_info = &channel->channel_info;
    uvm_channel_manager_t *manager = channel->pool->manager;
    uvm_gpu_t *gpu = manager->gpu;

    if (uvm_channel_is_ce(channel)) {
        UVM_ASSERT(channel->pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
    }

    memset(&channel_alloc_params, 0, sizeof(channel_alloc_params));
    channel_alloc_params.numGpFifoEntries = manager->conf.num_gpfifo_entries;
    channel_alloc_params.gpFifoLoc = manager->conf.gpfifo_loc;
    channel_alloc_params.gpPutLoc = manager->conf.gpput_loc;
    channel_alloc_params.engineIndex = engine_index;

    if (uvm_channel_is_ce(channel))
        channel_alloc_params.engineType = UVM_GPU_CHANNEL_ENGINE_TYPE_CE;

    status = uvm_rm_locked_call(nvUvmInterfaceChannelAllocate(gpu->rm_address_space,
                                                              &channel_alloc_params,
                                                              &channel->handle,
                                                              channel_info));
    if (status != NV_OK) {
        UVM_ERR_PRINT("nvUvmInterfaceChannelAllocate() failed: %s, GPU %s\n",
                      nvstatusToString(status),
                      uvm_gpu_name(gpu));
        return status;
    }

    snprintf(channel->name,
             sizeof(channel->name),
             "ID %u:%u (0x%x:0x%x) %s %u",
             channel_info->hwRunlistId,
             channel_info->hwChannelId,
             channel_info->hwRunlistId,
             channel_info->hwChannelId,
             "CE",
             engine_index);

    return NV_OK;
}

static NV_STATUS proxy_channel_create(uvm_channel_t *channel, unsigned ce_index)
{
    NV_STATUS status;
    unsigned proxy_index;
    UvmGpuPagingChannelAllocParams channel_alloc_params;
    uvm_channel_manager_t *manager = channel->pool->manager;
    uvm_gpu_t *gpu = manager->gpu;

    UVM_ASSERT(uvm_channel_is_proxy(channel));

    memset(&channel_alloc_params, 0, sizeof(channel_alloc_params));
    channel_alloc_params.engineIndex = ce_index;

    status = uvm_rm_locked_call(nvUvmInterfacePagingChannelAllocate(uvm_gpu_device_handle(gpu),
                                                                    &channel_alloc_params,
                                                                    &channel->proxy.handle,
                                                                    &channel->proxy.channel_info));
    if (status != NV_OK) {
        UVM_ERR_PRINT("nvUvmInterfacePagingChannelAllocate() failed: %s, GPU %s\n",
                      nvstatusToString(status),
                      uvm_gpu_name(gpu));
        return status;
    }

    proxy_index = uvm_channel_index_in_pool(channel);
    snprintf(channel->name, sizeof(channel->name), "Proxy %u CE %u", proxy_index, ce_index);

    return NV_OK;
}

static NV_STATUS channel_create(uvm_channel_pool_t *pool, uvm_channel_t *channel)
{
    NV_STATUS status;
    uvm_channel_manager_t *manager = pool->manager;
    uvm_gpu_t *gpu = manager->gpu;
    unsigned int i;

    UVM_ASSERT(channel != NULL);

    channel->pool = pool;
    pool->num_channels++;
    INIT_LIST_HEAD(&channel->available_push_infos);
    channel->tools.pending_event_count = 0;
    INIT_LIST_HEAD(&channel->tools.channel_list_node);

    status = uvm_gpu_tracking_semaphore_alloc(gpu->semaphore_pool, &channel->tracking_sem);
    if (status != NV_OK) {
        UVM_ERR_PRINT("uvm_gpu_tracking_semaphore_alloc() failed: %s, GPU %s\n",
                      nvstatusToString(status),
                      uvm_gpu_name(gpu));
        goto error;
    }

     if (uvm_channel_is_proxy(channel))
         status = proxy_channel_create(channel, pool->engine_index);
     else
         status = internal_channel_create(channel, pool->engine_index);

     if (status != NV_OK)
         goto error;

    channel->num_gpfifo_entries = manager->conf.num_gpfifo_entries;
    channel->gpfifo_entries = uvm_kvmalloc_zero(sizeof(*channel->gpfifo_entries) * channel->num_gpfifo_entries);
    if (channel->gpfifo_entries == NULL) {
        status = NV_ERR_NO_MEMORY;
        goto error;
    }

    channel->push_infos = uvm_kvmalloc_zero(sizeof(*channel->push_infos) * channel->num_gpfifo_entries);
    if (channel->push_infos == NULL) {
        status = NV_ERR_NO_MEMORY;
        goto error;
    }

    if (uvm_push_info_is_tracking_acquires()) {
        channel->push_acquire_infos = uvm_kvmalloc_zero(sizeof(*channel->push_acquire_infos) * channel->num_gpfifo_entries);
        if (channel->push_acquire_infos == NULL) {
            status = NV_ERR_NO_MEMORY;
            goto error;
        }
    }

    for (i = 0; i < channel->num_gpfifo_entries; i++)
        list_add_tail(&channel->push_infos[i].available_list_node, &channel->available_push_infos);

    status = channel_create_procfs(channel);
    if (status != NV_OK)
        goto error;

    return NV_OK;

error:
    channel_destroy(pool, channel);

    return status;
}

NvU64 uvm_channel_tracking_semaphore_get_gpu_va_in_channel(uvm_channel_t *semaphore_channel,
                                                           uvm_channel_t *access_channel)
{
    uvm_gpu_semaphore_t *semaphore = &semaphore_channel->tracking_sem.semaphore;
    uvm_gpu_t *gpu = uvm_channel_get_gpu(access_channel);

    return uvm_gpu_semaphore_get_gpu_va(semaphore, gpu, uvm_channel_is_proxy(access_channel));
}

static NV_STATUS init_channel(uvm_channel_t *channel)
{
    uvm_push_t push;
    uvm_gpu_t *gpu = uvm_channel_get_gpu(channel);
    NV_STATUS status;
    NvU32 num_entries = 1;

    if (uvm_gpu_has_pushbuffer_segments(gpu))
        num_entries++;

    status = uvm_channel_reserve(channel, num_entries);
    if (status != NV_OK)
        return status;

    if (uvm_gpu_has_pushbuffer_segments(gpu)) {
        NvU64 gpfifo_entry;
        uvm_pushbuffer_t *pushbuffer = channel->pool->manager->pushbuffer;
        gpu->parent->host_hal->set_gpfifo_pushbuffer_segment_base(&gpfifo_entry,
                                                                  uvm_pushbuffer_get_gpu_va_base(pushbuffer));
        write_ctrl_gpfifo(channel, gpfifo_entry);
    }

    status = uvm_push_begin_on_reserved_channel(channel, &push, "Init channel");
    if (status != NV_OK) {
        UVM_ERR_PRINT("Failed to begin push on channel: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
        return status;
    }

    if (uvm_channel_is_ce(channel))
        gpu->parent->ce_hal->init(&push);

    gpu->parent->host_hal->init(&push);

    status = uvm_push_end_and_wait(&push);
    if (status != NV_OK)
        UVM_ERR_PRINT("Channel init failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));

    return status;
}

static bool channel_manager_uses_proxy_pool(uvm_channel_manager_t *manager)
{
    return uvm_gpu_is_virt_mode_sriov_heavy(manager->gpu);
}

// Number of channels to create in a pool of the given type.
//
// TODO: Bug 1764958: Tweak this function after benchmarking real workloads.
static unsigned channel_pool_type_num_channels(uvm_channel_pool_type_t pool_type)
{
    // TODO: Bug 3387454: The vGPU plugin implementation supports a single
    // proxy channel per GPU
    if (pool_type == UVM_CHANNEL_POOL_TYPE_CE_PROXY)
        return 1;

    return 2;
}

static void channel_pool_destroy(uvm_channel_pool_t *pool)
{
    UVM_ASSERT(pool->manager->num_channel_pools > 0);

    while (pool->num_channels > 0)
        channel_destroy(pool, pool->channels + pool->num_channels - 1);

    uvm_kvfree(pool->channels);
    pool->manager->num_channel_pools--;
}

static NV_STATUS channel_pool_add(uvm_channel_manager_t *channel_manager,
                                  uvm_channel_pool_type_t pool_type,
                                  unsigned engine_index,
                                  uvm_channel_pool_t **pool_out)
{
    NV_STATUS status;
    unsigned i;
    unsigned num_channels;
    uvm_channel_pool_t *pool;

    pool = channel_manager->channel_pools + channel_manager->num_channel_pools;
    pool->manager = channel_manager;
    pool->engine_index = engine_index;
    pool->pool_type = pool_type;

    channel_pool_lock_init(pool);

    num_channels = channel_pool_type_num_channels(pool_type);

    pool->channels = uvm_kvmalloc_zero(sizeof(*pool->channels) * num_channels);
    if (!pool->channels)
        return NV_ERR_NO_MEMORY;

    channel_manager->num_channel_pools++;

    for (i = 0; i < num_channels; i++) {
        uvm_channel_t *channel = pool->channels + i;

        status = channel_create(pool, channel);
        if (status != NV_OK)
            goto error;

        status = init_channel(channel);
        if (status != NV_OK)
            goto error;
    }

    *pool_out = pool;
    return NV_OK;

 error:
    channel_pool_destroy(pool);
    return status;
}

static bool ce_usable_for_channel_type(uvm_channel_type_t type, const UvmGpuCopyEngineCaps *cap)
{
    if (!cap->supported || cap->grce)
        return false;

    switch (type) {
        case UVM_CHANNEL_TYPE_CPU_TO_GPU:
        case UVM_CHANNEL_TYPE_GPU_TO_CPU:
            return cap->sysmem;
        case UVM_CHANNEL_TYPE_GPU_INTERNAL:
        case UVM_CHANNEL_TYPE_MEMOPS:
            return true;
        case UVM_CHANNEL_TYPE_GPU_TO_GPU:
            return cap->p2p;
        default:
            UVM_ASSERT_MSG(false, "Unexpected channel type 0x%x\n", type);
            return false;
    }
}

static unsigned ce_usage_count(NvU32 ce, const unsigned *preferred_ce)
{
    unsigned i;
    unsigned count = 0;

    UVM_ASSERT(ce < UVM_COPY_ENGINE_COUNT_MAX);

    for (i = 0; i < UVM_CHANNEL_TYPE_CE_COUNT; i++) {
        if (ce == preferred_ce[i])
            count++;
    }

    return count;
}

// Returns negative if the first CE should be considered better than the second
static int compare_ce_for_channel_type(const UvmGpuCopyEngineCaps *ce_caps,
                                       uvm_channel_type_t type,
                                       NvU32 ce_index0,
                                       NvU32 ce_index1,
                                       NvU32 *preferred_ce)
{
    unsigned ce0_usage, ce1_usage;
    const UvmGpuCopyEngineCaps *cap0 = ce_caps + ce_index0;
    const UvmGpuCopyEngineCaps *cap1 = ce_caps + ce_index1;

    UVM_ASSERT(ce_usable_for_channel_type(type, cap0));
    UVM_ASSERT(ce_usable_for_channel_type(type, cap1));
    UVM_ASSERT(ce_index0 < UVM_COPY_ENGINE_COUNT_MAX);
    UVM_ASSERT(ce_index1 < UVM_COPY_ENGINE_COUNT_MAX);
    UVM_ASSERT(ce_index0 != ce_index1);

    switch (type) {
        case UVM_CHANNEL_TYPE_CPU_TO_GPU:
            // For CPU to GPU fast sysmem read is the most important
            if (cap0->sysmemRead != cap1->sysmemRead)
                return cap1->sysmemRead - cap0->sysmemRead;

            // Prefer not to take up the CEs for nvlink P2P
            if (cap0->nvlinkP2p != cap1->nvlinkP2p)
                return cap0->nvlinkP2p - cap1->nvlinkP2p;

            break;

        case UVM_CHANNEL_TYPE_GPU_TO_CPU:
            // For GPU to CPU fast sysmem write is the most important
            if (cap0->sysmemWrite != cap1->sysmemWrite)
                return cap1->sysmemWrite - cap0->sysmemWrite;

            // Prefer not to take up the CEs for nvlink P2P
            if (cap0->nvlinkP2p != cap1->nvlinkP2p)
                return cap0->nvlinkP2p - cap1->nvlinkP2p;

            break;

        case UVM_CHANNEL_TYPE_GPU_TO_GPU:
            // Prefer the LCE with the most PCEs
            {
                int pce_diff = (int)hweight32(cap1->cePceMask) - (int)hweight32(cap0->cePceMask);

                if (pce_diff != 0)
                    return pce_diff;
            }

            break;

        case UVM_CHANNEL_TYPE_GPU_INTERNAL:
            // We want the max possible bandwidth for CEs used for GPU_INTERNAL,
            // for now assume that the number of PCEs is a good measure.
            // TODO: Bug 1735254: Add a direct CE query for local FB bandwidth
            {
                int pce_diff = (int)hweight32(cap1->cePceMask) - (int)hweight32(cap0->cePceMask);

                if (pce_diff != 0)
                    return pce_diff;
            }

            // Leave P2P CEs to the GPU_TO_GPU channel type, when possible
            if (cap0->nvlinkP2p != cap1->nvlinkP2p)
                return cap0->nvlinkP2p - cap1->nvlinkP2p;

            break;

        case UVM_CHANNEL_TYPE_MEMOPS:
            // For MEMOPS we mostly care about latency which should be better
            // with less used CEs (although we only know about our own usage and
            // not system-wide) so just break out to get the default ordering
            // which prioritizes usage count.
            break;

        default:
            UVM_ASSERT_MSG(false, "Unexpected channel type 0x%x\n", type);
            return 0;
    }

    // By default, prefer less used CEs (within the UVM driver at least)
    ce0_usage = ce_usage_count(ce_index0, preferred_ce);
    ce1_usage = ce_usage_count(ce_index1, preferred_ce);

    if (ce0_usage != ce1_usage)
        return ce0_usage - ce1_usage;

    // And CEs that don't share PCEs
    if (cap0->shared != cap1->shared)
        return cap0->shared - cap1->shared;

    // Last resort, just order by index
    return ce_index0 - ce_index1;
}

// Identify usable CEs, and select the preferred CE for a given channel type.
static NV_STATUS pick_ce_for_channel_type(uvm_channel_manager_t *manager,
                                          const UvmGpuCopyEngineCaps *ce_caps,
                                          uvm_channel_type_t type,
                                          unsigned *preferred_ce)
{
    NvU32 i;
    NvU32 best_ce = UVM_COPY_ENGINE_COUNT_MAX;

    UVM_ASSERT(type < UVM_CHANNEL_TYPE_CE_COUNT);

    for (i = 0; i < UVM_COPY_ENGINE_COUNT_MAX; ++i) {
        const UvmGpuCopyEngineCaps *cap = ce_caps + i;

        if (!ce_usable_for_channel_type(type, cap))
            continue;

        __set_bit(i, manager->ce_mask);

        if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
            best_ce = i;
            continue;
        }

        if (compare_ce_for_channel_type(ce_caps, type, i, best_ce, preferred_ce) < 0)
            best_ce = i;
    }

    if (best_ce == UVM_COPY_ENGINE_COUNT_MAX) {
        UVM_ERR_PRINT("Failed to find a suitable CE for channel type %s\n", uvm_channel_type_to_string(type));
        return NV_ERR_NOT_SUPPORTED;
    }

    preferred_ce[type] = best_ce;
    return NV_OK;
}

static NV_STATUS channel_manager_pick_copy_engines(uvm_channel_manager_t *manager, unsigned *preferred_ce)
{
    NV_STATUS status;
    unsigned i;
    UvmGpuCopyEnginesCaps ces_caps;
    uvm_channel_type_t types[] = {UVM_CHANNEL_TYPE_CPU_TO_GPU,
                                  UVM_CHANNEL_TYPE_GPU_TO_CPU,
                                  UVM_CHANNEL_TYPE_GPU_INTERNAL,
                                  UVM_CHANNEL_TYPE_GPU_TO_GPU,
                                  UVM_CHANNEL_TYPE_MEMOPS};

    memset(&ces_caps, 0, sizeof(ces_caps));
    status = uvm_rm_locked_call(nvUvmInterfaceQueryCopyEnginesCaps(uvm_gpu_device_handle(manager->gpu), &ces_caps));
    if (status != NV_OK)
        return status;

   // The order of picking CEs for each type matters as it's affected by the
   // usage count of each CE and it increases every time a CE is selected.
   // MEMOPS has the least priority as it only cares about low usage of the
   // CE to improve latency
    for (i = 0; i < ARRAY_SIZE(types); ++i) {
        status = pick_ce_for_channel_type(manager, ces_caps.copyEngineCaps, types[i], preferred_ce);
        if (status != NV_OK)
            return status;
    }

    return NV_OK;
}

// Return the pool corresponding to the given CE index
//
// This function cannot be used to access the proxy pool in SR-IOV heavy.
static uvm_channel_pool_t *channel_manager_ce_pool(uvm_channel_manager_t *manager, NvU32 ce)
{
    uvm_channel_pool_t *pool;

    UVM_ASSERT(test_bit(ce, manager->ce_mask));

    // The index of the pool associated with 'ce' is the number of usable CEs
    // in [0, ce)
    pool = manager->channel_pools + bitmap_weight(manager->ce_mask, ce);

    UVM_ASSERT(pool->pool_type == UVM_CHANNEL_POOL_TYPE_CE);
    UVM_ASSERT(pool->engine_index == ce);

    return pool;
}

void uvm_channel_manager_set_p2p_ce(uvm_channel_manager_t *manager, uvm_gpu_t *peer, NvU32 optimal_ce)
{
    const NvU32 peer_gpu_index = uvm_id_gpu_index(peer->id);

    UVM_ASSERT(manager->gpu != peer);
    UVM_ASSERT(optimal_ce < UVM_COPY_ENGINE_COUNT_MAX);

    manager->pool_to_use.gpu_to_gpu[peer_gpu_index] = channel_manager_ce_pool(manager, optimal_ce);
}

static bool is_string_valid_location(const char *loc)
{
    return strcmp(uvm_channel_gpfifo_loc, "sys") == 0 ||
           strcmp(uvm_channel_gpfifo_loc, "vid") == 0 ||
           strcmp(uvm_channel_gpfifo_loc, "auto") == 0;
}

static UVM_BUFFER_LOCATION string_to_buffer_location(const char *loc)
{
    UVM_ASSERT(is_string_valid_location(loc));

    if (strcmp(loc, "sys") == 0)
        return UVM_BUFFER_LOCATION_SYS;
    else if (strcmp(loc, "vid") == 0)
        return UVM_BUFFER_LOCATION_VID;
    else
        return UVM_BUFFER_LOCATION_DEFAULT;
}

static const char *buffer_location_to_string(UVM_BUFFER_LOCATION loc)
{
    if (loc == UVM_BUFFER_LOCATION_SYS)
        return "sys";
    else if (loc == UVM_BUFFER_LOCATION_VID)
        return "vid";
    else if (loc == UVM_BUFFER_LOCATION_DEFAULT)
        return "auto";

    UVM_ASSERT_MSG(false, "Invalid buffer locationvalue %d\n", loc);
    return NULL;
}

static void init_channel_manager_conf(uvm_channel_manager_t *manager)
{
    const char *gpfifo_loc_value;
    const char *gpput_loc_value;
    const char *pushbuffer_loc_value;

    uvm_gpu_t *gpu = manager->gpu;

    // 1- Number of GPFIFO entries
    manager->conf.num_gpfifo_entries = uvm_channel_num_gpfifo_entries;

    if (uvm_channel_num_gpfifo_entries < UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN)
        manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MIN;
    else if (uvm_channel_num_gpfifo_entries > UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX)
        manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_MAX;

    if (!is_power_of_2(manager->conf.num_gpfifo_entries))
        manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;

    if (manager->conf.num_gpfifo_entries != uvm_channel_num_gpfifo_entries) {
        pr_info("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
                uvm_channel_num_gpfifo_entries,
                manager->conf.num_gpfifo_entries);
    }

    // 2- Allocation locations

    // Override if the GPU doesn't have memory
    if (gpu->mem_info.size == 0) {
        manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
        manager->conf.gpfifo_loc     = UVM_BUFFER_LOCATION_SYS;
        manager->conf.gpput_loc      = UVM_BUFFER_LOCATION_SYS;
        return;
    }

    manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;

    pushbuffer_loc_value = uvm_channel_pushbuffer_loc;
    if (!is_string_valid_location(pushbuffer_loc_value)) {
        pushbuffer_loc_value = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;
        pr_info("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
                uvm_channel_pushbuffer_loc,
                pushbuffer_loc_value);
    }

    // Override the default value if requested by the user
    if (strcmp(pushbuffer_loc_value, "vid") == 0) {
        // aarch64 requires memset_io/memcpy_io instead of memset/memcpy for
        // mapped GPU memory. The existing push paths only use memset/memcpy,
        // so force the location to sys for now.
        // TODO: Bug 2904133: Remove the following "if" after the bug is fixed.
        if (NVCPU_IS_AARCH64) {
            pr_info("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
                    pushbuffer_loc_value);
            manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
        }
        else {
            manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_VID;
        }
    }

    // 3- GPFIFO/GPPut location
    // Only support the knobs for GPFIFO/GPPut on Volta+
    if (!gpu->parent->gpfifo_in_vidmem_supported) {
        if (manager->conf.gpput_loc == UVM_BUFFER_LOCATION_SYS) {
            pr_info("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s instead\n",
                    buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
        }

        manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_DEFAULT;
        manager->conf.gpput_loc = UVM_BUFFER_LOCATION_DEFAULT;

        return;
    }

    gpfifo_loc_value = uvm_channel_gpfifo_loc;
    if (!is_string_valid_location(gpfifo_loc_value)) {
        gpfifo_loc_value = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;
        pr_info("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
                uvm_channel_gpfifo_loc,
                gpfifo_loc_value);
    }

    gpput_loc_value = uvm_channel_gpput_loc;
    if (!is_string_valid_location(gpput_loc_value)) {
        gpput_loc_value = UVM_CHANNEL_GPPUT_LOC_DEFAULT;
        pr_info("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
                uvm_channel_gpput_loc,
                gpput_loc_value);
    }

    // By default we place GPFIFO and GPPUT on vidmem as it potentially has
    // lower latency.
    manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_VID;
    manager->conf.gpput_loc = UVM_BUFFER_LOCATION_VID;

    // TODO: Bug 1766129: However, this will likely be different on P9 systems.
    // Leaving GPFIFO on sysmem for now. GPPut on sysmem is not supported in
    // production, so we keep it on vidmem, too.
    if (gpu->parent->sysmem_link >= UVM_GPU_LINK_NVLINK_2)
        manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_SYS;

    // Override defaults
    if (string_to_buffer_location(gpfifo_loc_value) != UVM_BUFFER_LOCATION_DEFAULT)
        manager->conf.gpfifo_loc = string_to_buffer_location(gpfifo_loc_value);

    if (string_to_buffer_location(gpput_loc_value) != UVM_BUFFER_LOCATION_DEFAULT)
        manager->conf.gpput_loc = string_to_buffer_location(gpput_loc_value);
}

// A pool is created for each usable CE, even if it has not been selected as the
// preferred CE for any type, because as more information is discovered (for
// example, a pair of peer GPUs is added) we may start using the previously idle
// channels.
static NV_STATUS channel_manager_create_pools(uvm_channel_manager_t *manager)
{
    NV_STATUS status;
    unsigned ce, type;
    unsigned num_channel_pools;
    unsigned preferred_ce[UVM_CHANNEL_TYPE_CE_COUNT];
    uvm_channel_pool_t *pool = NULL;

    for (type = 0; type < ARRAY_SIZE(preferred_ce); type++)
        preferred_ce[type] = UVM_COPY_ENGINE_COUNT_MAX;

    status = channel_manager_pick_copy_engines(manager, preferred_ce);
    if (status != NV_OK)
        return status;

    // CE channel pools
    num_channel_pools = bitmap_weight(manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX);

    // CE proxy channel pool.
    if (uvm_gpu_uses_proxy_channel_pool(manager->gpu))
        num_channel_pools++;

    manager->channel_pools = uvm_kvmalloc_zero(sizeof(*manager->channel_pools) * num_channel_pools);
    if (!manager->channel_pools)
        return NV_ERR_NO_MEMORY;

    for_each_set_bit(ce, manager->ce_mask, UVM_COPY_ENGINE_COUNT_MAX) {
        status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE, ce, &pool);
        if (status != NV_OK)
            return status;
    }

    // Assign channel types to pools
    for (type = 0; type < ARRAY_SIZE(preferred_ce); type++) {
        unsigned ce = preferred_ce[type];

        UVM_ASSERT(test_bit(ce, manager->ce_mask));

        manager->pool_to_use.default_for_type[type] = channel_manager_ce_pool(manager, ce);
    }

    // In SR-IOV heavy, add an additional, single-channel, pool that is
    // dedicated to the MEMOPS type.
    if (uvm_gpu_uses_proxy_channel_pool(manager->gpu)) {
        uvm_channel_type_t channel_type = uvm_channel_proxy_channel_type();
        status = channel_pool_add(manager, UVM_CHANNEL_POOL_TYPE_CE_PROXY, preferred_ce[channel_type], &pool);
        if (status != NV_OK)
            return status;

        manager->pool_to_use.default_for_type[channel_type] = pool;
    }

    return NV_OK;
}

NV_STATUS uvm_channel_manager_create(uvm_gpu_t *gpu, uvm_channel_manager_t **channel_manager_out)
{
    NV_STATUS status = NV_OK;
    uvm_channel_manager_t *channel_manager;

    channel_manager = uvm_kvmalloc_zero(sizeof(*channel_manager));
    if (!channel_manager)
        return NV_ERR_NO_MEMORY;

    channel_manager->gpu = gpu;
    init_channel_manager_conf(channel_manager);
    status = uvm_pushbuffer_create(channel_manager, &channel_manager->pushbuffer);
    if (status != NV_OK)
        goto error;

    status = manager_create_procfs_dirs(channel_manager);
    if (status != NV_OK)
        goto error;

    status = channel_manager_create_pools(channel_manager);
    if (status != NV_OK)
        goto error;

    status = manager_create_procfs(channel_manager);
    if (status != NV_OK)
        goto error;

    *channel_manager_out = channel_manager;

    return status;

error:
    uvm_channel_manager_destroy(channel_manager);
    return status;
}

static void channel_manager_destroy_pools(uvm_channel_manager_t *manager)
{
    while (manager->num_channel_pools > 0)
        channel_pool_destroy(manager->channel_pools + manager->num_channel_pools - 1);

    uvm_kvfree(manager->channel_pools);
}

void uvm_channel_manager_destroy(uvm_channel_manager_t *channel_manager)
{
    if (channel_manager == NULL)
        return;

    proc_remove(channel_manager->procfs.pending_pushes);

    channel_manager_destroy_pools(channel_manager);

    proc_remove(channel_manager->procfs.channels_dir);

    uvm_pushbuffer_destroy(channel_manager->pushbuffer);

    uvm_kvfree(channel_manager);
}

bool uvm_channel_is_privileged(uvm_channel_t *channel)
{
    if (uvm_gpu_is_virt_mode_sriov_heavy(uvm_channel_get_gpu(channel)))
        return uvm_channel_is_proxy(channel);

    return true;
}

// Return the first channel pool of the given type(s) starting at begin_pool
// (included).
//
// The pool type mask must be a non empty mask of uvm_channel_pool_type_t
// values.
static uvm_channel_pool_t *channel_pool_first_from(uvm_channel_manager_t *manager,
                                                   uvm_channel_pool_t *begin_pool,
                                                   NvU32 pool_type_mask)
{
    uvm_channel_pool_t *curr_pool, *end_pool;

    UVM_ASSERT(manager->channel_pools != NULL);
    UVM_ASSERT(begin_pool != NULL);
    UVM_ASSERT(begin_pool >= manager->channel_pools);
    UVM_ASSERT(pool_type_mask > 0);
    UVM_ASSERT(pool_type_mask <= UVM_CHANNEL_POOL_TYPE_MASK);

    end_pool = manager->channel_pools + manager->num_channel_pools;
    UVM_ASSERT(begin_pool <= end_pool);

    for (curr_pool = begin_pool; curr_pool != end_pool; curr_pool++) {
        if (curr_pool->pool_type & pool_type_mask)
            return curr_pool;
    }

    return NULL;
}

uvm_channel_pool_t *uvm_channel_pool_first(uvm_channel_manager_t *manager, NvU32 pool_type_mask)
{
    return channel_pool_first_from(manager, manager->channel_pools, pool_type_mask);
}

uvm_channel_pool_t *uvm_channel_pool_next(uvm_channel_manager_t *manager,
                                          uvm_channel_pool_t *pool,
                                          NvU32 pool_type_mask)
{
    return channel_pool_first_from(manager, pool + 1, pool_type_mask);
}

uvm_channel_t *uvm_channel_any_of_type(uvm_channel_manager_t *manager, NvU32 pool_type_mask)
{
    uvm_channel_pool_t *pool = uvm_channel_pool_first(manager, pool_type_mask);

    if (pool == NULL)
        return NULL;

    UVM_ASSERT(pool->channels);

    return pool->channels;
}

const char *uvm_channel_type_to_string(uvm_channel_type_t channel_type)
{
    BUILD_BUG_ON(UVM_CHANNEL_TYPE_COUNT != 5);

    switch (channel_type) {
        UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_CPU_TO_GPU);
        UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_CPU);
        UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_INTERNAL);
        UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_MEMOPS);
        UVM_ENUM_STRING_CASE(UVM_CHANNEL_TYPE_GPU_TO_GPU);
        UVM_ENUM_STRING_DEFAULT();
    }
}

const char *uvm_channel_pool_type_to_string(uvm_channel_pool_type_t channel_pool_type)
{
    BUILD_BUG_ON(UVM_CHANNEL_POOL_TYPE_COUNT != 2);

    switch (channel_pool_type) {
        UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE);
        UVM_ENUM_STRING_CASE(UVM_CHANNEL_POOL_TYPE_CE_PROXY);
        UVM_ENUM_STRING_DEFAULT();
    }
}

static void uvm_channel_print_info(uvm_channel_t *channel, struct seq_file *s)
{
    uvm_channel_manager_t *manager = channel->pool->manager;
    UVM_SEQ_OR_DBG_PRINT(s, "Channel %s\n", channel->name);

    uvm_channel_pool_lock(channel->pool);

    UVM_SEQ_OR_DBG_PRINT(s, "completed          %llu\n", uvm_channel_update_completed_value(channel));
    UVM_SEQ_OR_DBG_PRINT(s, "queued             %llu\n", channel->tracking_sem.queued_value);
    UVM_SEQ_OR_DBG_PRINT(s, "GPFIFO count       %u\n", channel->num_gpfifo_entries);
    UVM_SEQ_OR_DBG_PRINT(s, "GPFIFO location    %s\n", buffer_location_to_string(manager->conf.gpfifo_loc));
    UVM_SEQ_OR_DBG_PRINT(s, "GPPUT location     %s\n", buffer_location_to_string(manager->conf.gpput_loc));
    UVM_SEQ_OR_DBG_PRINT(s, "get                %u\n", channel->gpu_get);
    UVM_SEQ_OR_DBG_PRINT(s, "put                %u\n", channel->cpu_put);
    UVM_SEQ_OR_DBG_PRINT(s, "Semaphore GPU VA   0x%llx\n", uvm_channel_tracking_semaphore_get_gpu_va(channel));
    UVM_SEQ_OR_DBG_PRINT(s, "Semaphore CPU VA   0x%llx\n", (NvU64)(uintptr_t)channel->tracking_sem.semaphore.payload);

    uvm_channel_pool_unlock(channel->pool);
}

static void channel_print_push_acquires(uvm_push_acquire_info_t *push_acquire_info, struct seq_file *seq)
{
    NvU32 i;
    NvU32 valid_entries;

    UVM_ASSERT(uvm_push_info_is_tracking_acquires());
    UVM_ASSERT(push_acquire_info);

    if (push_acquire_info->num_values == 0)
        return;

    valid_entries = min(push_acquire_info->num_values, (NvU32)UVM_PUSH_ACQUIRE_INFO_MAX_ENTRIES);

    for (i = 0; i < valid_entries; ++i) {
        bool is_proxy = push_acquire_info->values[i].is_proxy;

        UVM_SEQ_OR_DBG_PRINT(seq,
                             "%s (gpu %u, channel %d:%u, value %llu)",
                             i == 0? " acquiring values" : "",
                             uvm_id_value(push_acquire_info->values[i].gpu_id),
                             is_proxy? -1 : push_acquire_info->values[i].runlist_id,
                             is_proxy? push_acquire_info->values[i].proxy.pool_index :
                                       push_acquire_info->values[i].channel_id,
                             push_acquire_info->values[i].value);
    }

    if (push_acquire_info->num_values > valid_entries)
        UVM_SEQ_OR_DBG_PRINT(seq, " (missing %u entries)", push_acquire_info->num_values - valid_entries);

    UVM_SEQ_OR_DBG_PRINT(seq, "\n");
}

// Print all pending pushes and up to finished_pushes_count completed if their
// GPFIFO entries haven't been reused yet.
static void channel_print_pushes(uvm_channel_t *channel, NvU32 finished_pushes_count, struct seq_file *seq)
{
    NvU32 gpu_get;
    NvU32 cpu_put;

    NvU64 completed_value = uvm_channel_update_completed_value(channel);

    uvm_channel_pool_lock(channel->pool);

    cpu_put = channel->cpu_put;

    for (gpu_get = channel->gpu_get; gpu_get != cpu_put; gpu_get = (gpu_get + 1) % channel->num_gpfifo_entries) {
        uvm_gpfifo_entry_t *entry = &channel->gpfifo_entries[gpu_get];
        uvm_push_info_t *push_info = entry->push_info;
        uvm_push_acquire_info_t *push_acquire_info = NULL;

        if (entry->tracking_semaphore_value + finished_pushes_count <= completed_value)
            continue;

        if (entry->type == UVM_GPFIFO_ENTRY_TYPE_CONTROL) {
            NvU64 *gpfifo_entry;

            UVM_ASSERT(!uvm_channel_is_proxy(channel));

            gpfifo_entry = (NvU64*)channel->channel_info.gpFifoEntries + gpu_get;
            UVM_SEQ_OR_DBG_PRINT(seq,
                                 " control GPFIFO entry - data: 0x%llx, gpu_get: %d\n",
                                 *gpfifo_entry,
                                 gpu_get);
        }
        else {

            // Obtain the value acquire tracking information from the push_info
            // index
            if (uvm_push_info_is_tracking_acquires()) {
                NvU32 push_info_index = push_info - channel->push_infos;
                UVM_ASSERT(push_info_index < channel->num_gpfifo_entries);

                push_acquire_info = &channel->push_acquire_infos[push_info_index];
            }

            UVM_SEQ_OR_DBG_PRINT(seq,
                                 " %s push '%s' started at %s:%d in %s() releasing value %llu%s",
                                 entry->tracking_semaphore_value <= completed_value ? "finished" : "pending",
                                 push_info->description,
                                 push_info->filename,
                                 push_info->line,
                                 push_info->function,
                                 entry->tracking_semaphore_value,
                                 !push_acquire_info || push_acquire_info->num_values == 0 ? "\n" : "");

            if (push_acquire_info)
                channel_print_push_acquires(push_acquire_info, seq);
        }
    }
    uvm_channel_pool_unlock(channel->pool);
}

void uvm_channel_print_pending_pushes(uvm_channel_t *channel)
{
    channel_print_pushes(channel, 0, NULL);
}

static void channel_manager_print_pending_pushes(uvm_channel_manager_t *manager, struct seq_file *seq)
{
    uvm_channel_pool_t *pool;

    uvm_for_each_pool(pool, manager) {
        uvm_channel_t *channel;

        uvm_for_each_channel_in_pool(channel, pool) {
            UVM_SEQ_OR_DBG_PRINT(seq, "Channel %s, pending pushes:\n", channel->name);

            channel_print_pushes(channel, 0, seq);
        }
    }
}

static NV_STATUS manager_create_procfs_dirs(uvm_channel_manager_t *manager)
{
    uvm_gpu_t *gpu = manager->gpu;

    // The channel manager procfs files are debug only
    if (!uvm_procfs_is_debug_enabled())
        return NV_OK;

    manager->procfs.channels_dir = NV_CREATE_PROC_DIR("channels", gpu->procfs.dir);
    if (manager->procfs.channels_dir == NULL)
        return NV_ERR_OPERATING_SYSTEM;

    return NV_OK;
}

static int nv_procfs_read_manager_pending_pushes(struct seq_file *s, void *v)
{
    uvm_channel_manager_t *manager = (uvm_channel_manager_t *)s->private;

    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
        return -EAGAIN;

    channel_manager_print_pending_pushes(manager, s);

    uvm_up_read(&g_uvm_global.pm.lock);

    return 0;
}

static int nv_procfs_read_manager_pending_pushes_entry(struct seq_file *s, void *v)
{
    UVM_ENTRY_RET(nv_procfs_read_manager_pending_pushes(s, v));
}

UVM_DEFINE_SINGLE_PROCFS_FILE(manager_pending_pushes_entry);

static NV_STATUS manager_create_procfs(uvm_channel_manager_t *manager)
{
    uvm_gpu_t *gpu = manager->gpu;

    // The channel manager procfs files are debug only
    if (!uvm_procfs_is_debug_enabled())
        return NV_OK;

    manager->procfs.pending_pushes = NV_CREATE_PROC_FILE("pending_pushes",
                                                         gpu->procfs.dir,
                                                         manager_pending_pushes_entry,
                                                         manager);
    if (manager->procfs.pending_pushes == NULL)
        return NV_ERR_OPERATING_SYSTEM;

    return NV_OK;
}

static int nv_procfs_read_channel_info(struct seq_file *s, void *v)
{
    uvm_channel_t *channel = (uvm_channel_t *)s->private;

    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
        return -EAGAIN;

    uvm_channel_print_info(channel, s);

    uvm_up_read(&g_uvm_global.pm.lock);

    return 0;
}

static int nv_procfs_read_channel_info_entry(struct seq_file *s, void *v)
{
    UVM_ENTRY_RET(nv_procfs_read_channel_info(s, v));
}

UVM_DEFINE_SINGLE_PROCFS_FILE(channel_info_entry);

static int nv_procfs_read_channel_pushes(struct seq_file *s, void *v)
{
    uvm_channel_t *channel = (uvm_channel_t *)s->private;

    if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
        return -EAGAIN;

    // Include up to 5 finished pushes for some context
    channel_print_pushes(channel, 5, s);

    uvm_up_read(&g_uvm_global.pm.lock);

    return 0;
}

static int nv_procfs_read_channel_pushes_entry(struct seq_file *s, void *v)
{
    UVM_ENTRY_RET(nv_procfs_read_channel_pushes(s, v));
}

UVM_DEFINE_SINGLE_PROCFS_FILE(channel_pushes_entry);

static NV_STATUS channel_create_procfs(uvm_channel_t *channel)
{
    char dirname[16];
    uvm_channel_manager_t *manager = channel->pool->manager;

    // The channel procfs files are debug only
    if (!uvm_procfs_is_debug_enabled())
        return NV_OK;

    // For internal channels, the directory name contains the HW IDs. Those are
    // not available for proxy channels, so use -1:<channel index> instead.
    if (uvm_channel_is_proxy(channel))
        snprintf(dirname, sizeof(dirname), "-1:%u", uvm_channel_index_in_pool(channel));
    else
        snprintf(dirname, sizeof(dirname), "%u:%u", channel->channel_info.hwRunlistId, channel->channel_info.hwChannelId);

    channel->procfs.dir = NV_CREATE_PROC_DIR(dirname, manager->procfs.channels_dir);
    if (channel->procfs.dir == NULL)
        return NV_ERR_OPERATING_SYSTEM;

    channel->procfs.info = NV_CREATE_PROC_FILE("info", channel->procfs.dir, channel_info_entry, channel);
    if (channel->procfs.info == NULL)
        return NV_ERR_OPERATING_SYSTEM;

    channel->procfs.pushes = NV_CREATE_PROC_FILE("pushes", channel->procfs.dir, channel_pushes_entry, channel);
    if (channel->procfs.pushes == NULL)
        return NV_ERR_OPERATING_SYSTEM;

    return NV_OK;
}