mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2024-12-02 09:24:21 +01:00
642 lines
22 KiB
C
642 lines
22 KiB
C
/*******************************************************************************
|
|
Copyright (c) 2015-2022 NVIDIA Corporation
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to
|
|
deal in the Software without restriction, including without limitation the
|
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
sell copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
DEALINGS IN THE SOFTWARE.
|
|
|
|
*******************************************************************************/
|
|
|
|
#include "uvm_api.h"
|
|
#include "uvm_pushbuffer.h"
|
|
#include "uvm_channel.h"
|
|
#include "uvm_global.h"
|
|
#include "uvm_lock.h"
|
|
#include "uvm_procfs.h"
|
|
#include "uvm_push.h"
|
|
#include "uvm_kvmalloc.h"
|
|
#include "uvm_gpu.h"
|
|
#include "uvm_common.h"
|
|
#include "uvm_linux.h"
|
|
#include "uvm_conf_computing.h"
|
|
|
|
// Print pushbuffer state into a seq_file if provided or with UVM_DBG_PRINT() if not.
|
|
static void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s);
|
|
|
|
static int nv_procfs_read_pushbuffer_info(struct seq_file *s, void *v)
|
|
{
|
|
uvm_pushbuffer_t *pushbuffer = (uvm_pushbuffer_t *)s->private;
|
|
|
|
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
|
return -EAGAIN;
|
|
|
|
uvm_pushbuffer_print_common(pushbuffer, s);
|
|
|
|
uvm_up_read(&g_uvm_global.pm.lock);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nv_procfs_read_pushbuffer_info_entry(struct seq_file *s, void *v)
|
|
{
|
|
UVM_ENTRY_RET(nv_procfs_read_pushbuffer_info(s, v));
|
|
}
|
|
|
|
UVM_DEFINE_SINGLE_PROCFS_FILE(pushbuffer_info_entry);
|
|
|
|
static NV_STATUS create_procfs(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
uvm_gpu_t *gpu = pushbuffer->channel_manager->gpu;
|
|
|
|
// The pushbuffer info file is for debug only
|
|
if (!uvm_procfs_is_debug_enabled())
|
|
return NV_OK;
|
|
|
|
pushbuffer->procfs.info_file = NV_CREATE_PROC_FILE("pushbuffer",
|
|
gpu->procfs.dir,
|
|
pushbuffer_info_entry,
|
|
pushbuffer);
|
|
if (pushbuffer->procfs.info_file == NULL)
|
|
return NV_ERR_OPERATING_SYSTEM;
|
|
|
|
return NV_OK;
|
|
}
|
|
|
|
NV_STATUS uvm_pushbuffer_create(uvm_channel_manager_t *channel_manager, uvm_pushbuffer_t **pushbuffer_out)
|
|
{
|
|
NV_STATUS status;
|
|
int i;
|
|
uvm_gpu_t *gpu = channel_manager->gpu;
|
|
NvU64 pushbuffer_alignment;
|
|
|
|
uvm_pushbuffer_t *pushbuffer = uvm_kvmalloc_zero(sizeof(*pushbuffer));
|
|
if (pushbuffer == NULL)
|
|
return NV_ERR_NO_MEMORY;
|
|
|
|
pushbuffer->channel_manager = channel_manager;
|
|
|
|
uvm_spin_lock_init(&pushbuffer->lock, UVM_LOCK_ORDER_LEAF);
|
|
|
|
// Currently the pushbuffer supports UVM_PUSHBUFFER_CHUNKS of concurrent
|
|
// pushes.
|
|
uvm_sema_init(&pushbuffer->concurrent_pushes_sema, UVM_PUSHBUFFER_CHUNKS, UVM_LOCK_ORDER_PUSH);
|
|
|
|
UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS ||
|
|
channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_VID);
|
|
|
|
// The pushbuffer allocation is aligned to UVM_PUSHBUFFER_SIZE and its size
|
|
// (UVM_PUSHBUFFER_SIZE) is a power of 2. These constraints guarantee that
|
|
// the entire pushbuffer belongs to a 1TB (2^40) segment. Thus, we can set
|
|
// the Esched/PBDMA segment base for all channels during their
|
|
// initialization and it is immutable for the entire channels' lifetime.
|
|
BUILD_BUG_ON_NOT_POWER_OF_2(UVM_PUSHBUFFER_SIZE);
|
|
BUILD_BUG_ON(UVM_PUSHBUFFER_SIZE >= (1ull << 40));
|
|
|
|
if (gpu->uvm_test_force_upper_pushbuffer_segment)
|
|
pushbuffer_alignment = (1ull << 40);
|
|
else
|
|
pushbuffer_alignment = UVM_PUSHBUFFER_SIZE;
|
|
|
|
status = uvm_rm_mem_alloc_and_map_cpu(gpu,
|
|
(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS) ?
|
|
UVM_RM_MEM_TYPE_SYS:
|
|
UVM_RM_MEM_TYPE_GPU,
|
|
UVM_PUSHBUFFER_SIZE,
|
|
pushbuffer_alignment,
|
|
&pushbuffer->memory);
|
|
if (status != NV_OK)
|
|
goto error;
|
|
|
|
if (uvm_conf_computing_mode_enabled(gpu)) {
|
|
UVM_ASSERT(channel_manager->conf.pushbuffer_loc == UVM_BUFFER_LOCATION_SYS);
|
|
|
|
// Move the above allocation to unprotected_sysmem
|
|
pushbuffer->memory_unprotected_sysmem = pushbuffer->memory;
|
|
pushbuffer->memory = NULL;
|
|
|
|
// Make sure the base can be least 4KB aligned. Pushes can include inline buffers
|
|
// with specific alignment requirement. Different base between backing memory
|
|
// locations would change that.
|
|
pushbuffer->memory_protected_sysmem = uvm_kvmalloc_zero(UVM_PUSHBUFFER_SIZE + UVM_PAGE_SIZE_4K);
|
|
if (!pushbuffer->memory_protected_sysmem) {
|
|
status = NV_ERR_NO_MEMORY;
|
|
goto error;
|
|
}
|
|
|
|
|
|
status = uvm_rm_mem_alloc(gpu,
|
|
UVM_RM_MEM_TYPE_GPU,
|
|
UVM_PUSHBUFFER_SIZE,
|
|
pushbuffer_alignment,
|
|
&pushbuffer->memory);
|
|
if (status != NV_OK)
|
|
goto error;
|
|
|
|
status = uvm_rm_mem_map_gpu(pushbuffer->memory_unprotected_sysmem, gpu, pushbuffer_alignment);
|
|
if (status != NV_OK)
|
|
goto error;
|
|
}
|
|
|
|
// Verify the GPU can access the pushbuffer.
|
|
UVM_ASSERT((uvm_pushbuffer_get_gpu_va_base(pushbuffer) + UVM_PUSHBUFFER_SIZE - 1) < gpu->parent->max_host_va);
|
|
|
|
bitmap_fill(pushbuffer->idle_chunks, UVM_PUSHBUFFER_CHUNKS);
|
|
bitmap_fill(pushbuffer->available_chunks, UVM_PUSHBUFFER_CHUNKS);
|
|
|
|
for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i)
|
|
INIT_LIST_HEAD(&pushbuffer->chunks[i].pending_gpfifos);
|
|
|
|
status = create_procfs(pushbuffer);
|
|
if (status != NV_OK)
|
|
goto error;
|
|
|
|
*pushbuffer_out = pushbuffer;
|
|
|
|
return status;
|
|
|
|
error:
|
|
uvm_pushbuffer_destroy(pushbuffer);
|
|
return status;
|
|
}
|
|
|
|
static uvm_pushbuffer_chunk_t *get_chunk_in_mask(uvm_pushbuffer_t *pushbuffer, unsigned long *mask)
|
|
{
|
|
NvU32 index = find_first_bit(mask, UVM_PUSHBUFFER_CHUNKS);
|
|
|
|
uvm_assert_spinlock_locked(&pushbuffer->lock);
|
|
|
|
if (index == UVM_PUSHBUFFER_CHUNKS)
|
|
return NULL;
|
|
|
|
return &pushbuffer->chunks[index];
|
|
}
|
|
|
|
static uvm_pushbuffer_chunk_t *get_available_chunk(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
return get_chunk_in_mask(pushbuffer, pushbuffer->available_chunks);
|
|
}
|
|
|
|
static uvm_pushbuffer_chunk_t *get_idle_chunk(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
return get_chunk_in_mask(pushbuffer, pushbuffer->idle_chunks);
|
|
}
|
|
|
|
static NvU32 chunk_get_index(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
|
|
{
|
|
NvU32 index = chunk - pushbuffer->chunks;
|
|
UVM_ASSERT(index < UVM_PUSHBUFFER_CHUNKS);
|
|
return index;
|
|
}
|
|
|
|
static NvU32 chunk_get_offset(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
|
|
{
|
|
return chunk_get_index(pushbuffer, chunk) * UVM_PUSHBUFFER_CHUNK_SIZE;
|
|
}
|
|
|
|
static void set_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask)
|
|
{
|
|
NvU32 index = chunk_get_index(pushbuffer, chunk);
|
|
|
|
uvm_assert_spinlock_locked(&pushbuffer->lock);
|
|
|
|
__set_bit(index, mask);
|
|
}
|
|
|
|
static void clear_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk, unsigned long *mask)
|
|
{
|
|
NvU32 index = chunk_get_index(pushbuffer, chunk);
|
|
|
|
uvm_assert_spinlock_locked(&pushbuffer->lock);
|
|
|
|
__clear_bit(index, mask);
|
|
}
|
|
|
|
static uvm_pushbuffer_chunk_t *pick_chunk(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
uvm_pushbuffer_chunk_t *chunk = get_idle_chunk(pushbuffer);
|
|
|
|
uvm_assert_spinlock_locked(&pushbuffer->lock);
|
|
|
|
if (chunk == NULL)
|
|
chunk = get_available_chunk(pushbuffer);
|
|
|
|
return chunk;
|
|
}
|
|
|
|
static bool try_claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out)
|
|
{
|
|
uvm_pushbuffer_chunk_t *chunk;
|
|
|
|
uvm_spin_lock(&pushbuffer->lock);
|
|
|
|
chunk = pick_chunk(pushbuffer);
|
|
if (!chunk)
|
|
goto done;
|
|
|
|
chunk->current_push = push;
|
|
clear_chunk(pushbuffer, chunk, pushbuffer->idle_chunks);
|
|
clear_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
|
|
|
|
done:
|
|
uvm_spin_unlock(&pushbuffer->lock);
|
|
*chunk_out = chunk;
|
|
|
|
return chunk != NULL;
|
|
}
|
|
|
|
static char *get_base_cpu_va(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
// Confidential Computing pushes are assembled in protected sysmem
|
|
// and safely (through encrypt/decrypt) moved to protected vidmem.
|
|
// Or signed and moved to unprotected sysmem.
|
|
if (uvm_conf_computing_mode_enabled(pushbuffer->channel_manager->gpu)) {
|
|
// Align protected sysmem base to 4kB. This should be enough to give
|
|
// the same alignment behaviour for inline buffers as the other two
|
|
// backing memory locations.
|
|
return (char*)(UVM_ALIGN_UP((uintptr_t)pushbuffer->memory_protected_sysmem, UVM_PAGE_SIZE_4K));
|
|
}
|
|
|
|
return (char *)uvm_rm_mem_get_cpu_va(pushbuffer->memory);
|
|
}
|
|
|
|
static NvU32 *chunk_get_next_push_start_addr(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
|
|
{
|
|
char *push_start = get_base_cpu_va(pushbuffer);
|
|
push_start += chunk_get_offset(pushbuffer, chunk);
|
|
push_start += chunk->next_push_start;
|
|
|
|
UVM_ASSERT(((NvU64)push_start) % sizeof(NvU32) == 0);
|
|
|
|
return (NvU32*)push_start;
|
|
}
|
|
|
|
static NV_STATUS claim_chunk(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_pushbuffer_chunk_t **chunk_out)
|
|
{
|
|
NV_STATUS status = NV_OK;
|
|
uvm_channel_manager_t *channel_manager = pushbuffer->channel_manager;
|
|
uvm_spin_loop_t spin;
|
|
|
|
if (try_claim_chunk(pushbuffer, push, chunk_out))
|
|
return NV_OK;
|
|
|
|
uvm_channel_manager_update_progress(channel_manager);
|
|
|
|
uvm_spin_loop_init(&spin);
|
|
while (!try_claim_chunk(pushbuffer, push, chunk_out) && status == NV_OK) {
|
|
UVM_SPIN_LOOP(&spin);
|
|
status = uvm_channel_manager_check_errors(channel_manager);
|
|
uvm_channel_manager_update_progress(channel_manager);
|
|
}
|
|
|
|
return status;
|
|
}
|
|
|
|
NV_STATUS uvm_pushbuffer_begin_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
|
|
{
|
|
uvm_pushbuffer_chunk_t *chunk;
|
|
NV_STATUS status;
|
|
|
|
UVM_ASSERT(pushbuffer);
|
|
UVM_ASSERT(push);
|
|
UVM_ASSERT(push->channel);
|
|
|
|
if (uvm_channel_is_wlc(push->channel)) {
|
|
// WLC pushes use static PB and don't count against max concurrent
|
|
// pushes.
|
|
push->begin = (void*)UVM_ALIGN_UP((uintptr_t)push->channel->conf_computing.static_pb_protected_sysmem,
|
|
UVM_PAGE_SIZE_4K);
|
|
push->next = push->begin;
|
|
return NV_OK;
|
|
}
|
|
|
|
// Note that this semaphore is uvm_up()ed in end_push().
|
|
uvm_down(&pushbuffer->concurrent_pushes_sema);
|
|
|
|
status = claim_chunk(pushbuffer, push, &chunk);
|
|
if (status != NV_OK) {
|
|
uvm_up(&pushbuffer->concurrent_pushes_sema);
|
|
return status;
|
|
}
|
|
|
|
UVM_ASSERT(chunk);
|
|
|
|
push->begin = chunk_get_next_push_start_addr(pushbuffer, chunk);
|
|
push->next = push->begin;
|
|
|
|
return NV_OK;
|
|
}
|
|
|
|
static uvm_gpfifo_entry_t *chunk_get_first_gpfifo(uvm_pushbuffer_chunk_t *chunk)
|
|
{
|
|
return list_first_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node);
|
|
}
|
|
|
|
static uvm_gpfifo_entry_t *chunk_get_last_gpfifo(uvm_pushbuffer_chunk_t *chunk)
|
|
{
|
|
return list_last_entry_or_null(&chunk->pending_gpfifos, uvm_gpfifo_entry_t, pending_list_node);
|
|
}
|
|
|
|
// Get the cpu put within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE])
|
|
static NvU32 chunk_get_cpu_put(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
|
|
{
|
|
uvm_gpfifo_entry_t *gpfifo = chunk_get_last_gpfifo(chunk);
|
|
|
|
uvm_assert_spinlock_locked(&pushbuffer->lock);
|
|
|
|
if (gpfifo != NULL)
|
|
return gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - chunk_get_offset(pushbuffer, chunk);
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
// Get the gpu get within the chunk (in range [0, UVM_PUSHBUFFER_CHUNK_SIZE))
|
|
static NvU32 chunk_get_gpu_get(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
|
|
{
|
|
uvm_gpfifo_entry_t *gpfifo = chunk_get_first_gpfifo(chunk);
|
|
|
|
uvm_assert_spinlock_locked(&pushbuffer->lock);
|
|
|
|
if (gpfifo != NULL)
|
|
return gpfifo->pushbuffer_offset - chunk_get_offset(pushbuffer, chunk);
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static void update_chunk(uvm_pushbuffer_t *pushbuffer, uvm_pushbuffer_chunk_t *chunk)
|
|
{
|
|
NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk);
|
|
NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk);
|
|
|
|
uvm_assert_spinlock_locked(&pushbuffer->lock);
|
|
|
|
if (gpu_get == cpu_put) {
|
|
// cpu_put can be equal to gpu_get both when the chunk is full and empty. We
|
|
// can tell apart the cases by checking whether the pending GPFIFOs list is
|
|
// empty.
|
|
if (!list_empty(&chunk->pending_gpfifos))
|
|
return;
|
|
|
|
// Chunk completely idle
|
|
set_chunk(pushbuffer, chunk, pushbuffer->idle_chunks);
|
|
set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
|
|
UVM_ASSERT_MSG(cpu_put == 0, "cpu put %u\n", cpu_put);
|
|
|
|
// For a completely idle chunk, always start at the very beginning. This
|
|
// helps avoid the waste that can happen at the very end of the chunk
|
|
// described at the top of uvm_pushbuffer.h.
|
|
chunk->next_push_start = 0;
|
|
}
|
|
else if (gpu_get > cpu_put) {
|
|
if (gpu_get - cpu_put >= UVM_MAX_PUSH_SIZE) {
|
|
// Enough space between put and get
|
|
set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
|
|
chunk->next_push_start = cpu_put;
|
|
}
|
|
}
|
|
else if (UVM_PUSHBUFFER_CHUNK_SIZE >= cpu_put + UVM_MAX_PUSH_SIZE) {
|
|
UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put);
|
|
|
|
// Enough space at the end
|
|
set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
|
|
chunk->next_push_start = cpu_put;
|
|
}
|
|
else if (gpu_get >= UVM_MAX_PUSH_SIZE) {
|
|
UVM_ASSERT_MSG(gpu_get < cpu_put, "gpu_get %u cpu_put %u\n", gpu_get, cpu_put);
|
|
|
|
// Enough space at the beginning
|
|
set_chunk(pushbuffer, chunk, pushbuffer->available_chunks);
|
|
chunk->next_push_start = 0;
|
|
}
|
|
}
|
|
|
|
void uvm_pushbuffer_destroy(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
if (pushbuffer == NULL)
|
|
return;
|
|
|
|
proc_remove(pushbuffer->procfs.info_file);
|
|
|
|
uvm_rm_mem_free(pushbuffer->memory_unprotected_sysmem);
|
|
uvm_kvfree(pushbuffer->memory_protected_sysmem);
|
|
uvm_rm_mem_free(pushbuffer->memory);
|
|
uvm_kvfree(pushbuffer);
|
|
}
|
|
|
|
static uvm_pushbuffer_chunk_t *offset_to_chunk(uvm_pushbuffer_t *pushbuffer, NvU32 offset)
|
|
{
|
|
UVM_ASSERT(offset < UVM_PUSHBUFFER_SIZE);
|
|
return &pushbuffer->chunks[offset / UVM_PUSHBUFFER_CHUNK_SIZE];
|
|
}
|
|
|
|
static uvm_pushbuffer_chunk_t *gpfifo_to_chunk(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo)
|
|
{
|
|
uvm_pushbuffer_chunk_t *chunk = offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset);
|
|
UVM_ASSERT(offset_to_chunk(pushbuffer, gpfifo->pushbuffer_offset + gpfifo->pushbuffer_size - 1) == chunk);
|
|
return chunk;
|
|
}
|
|
|
|
void uvm_pushbuffer_mark_completed(uvm_pushbuffer_t *pushbuffer, uvm_gpfifo_entry_t *gpfifo)
|
|
{
|
|
uvm_pushbuffer_chunk_t *chunk;
|
|
uvm_push_info_t *push_info = gpfifo->push_info;
|
|
bool need_to_update_chunk = false;
|
|
|
|
UVM_ASSERT(gpfifo->type == UVM_GPFIFO_ENTRY_TYPE_NORMAL);
|
|
|
|
chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
|
|
|
|
if (push_info->on_complete != NULL)
|
|
push_info->on_complete(push_info->on_complete_data);
|
|
|
|
push_info->on_complete = NULL;
|
|
push_info->on_complete_data = NULL;
|
|
|
|
uvm_spin_lock(&pushbuffer->lock);
|
|
|
|
if (gpfifo == chunk_get_first_gpfifo(chunk))
|
|
need_to_update_chunk = true;
|
|
else if (gpfifo == chunk_get_last_gpfifo(chunk))
|
|
need_to_update_chunk = true;
|
|
|
|
list_del(&gpfifo->pending_list_node);
|
|
|
|
// If current_push is not NULL, updating the chunk is delayed till
|
|
// uvm_pushbuffer_end_push() is called for that push.
|
|
if (need_to_update_chunk && chunk->current_push == NULL)
|
|
update_chunk(pushbuffer, chunk);
|
|
|
|
uvm_spin_unlock(&pushbuffer->lock);
|
|
}
|
|
|
|
NvU32 uvm_pushbuffer_get_offset_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
|
|
{
|
|
NvU32 offset;
|
|
|
|
if (uvm_channel_is_wlc(push->channel)) {
|
|
// WLC channels use private static PB and their gpfifo entries are not
|
|
// added to any chunk's list. This only needs to return legal offset.
|
|
// Completion cleanup will not find WLC gpfifo entries as either first
|
|
// or last entry of any chunk.
|
|
return 0;
|
|
}
|
|
|
|
offset = (char*)push->begin - get_base_cpu_va(pushbuffer);
|
|
|
|
UVM_ASSERT(((NvU64)offset) % sizeof(NvU32) == 0);
|
|
|
|
return offset;
|
|
}
|
|
|
|
NvU64 uvm_pushbuffer_get_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
|
|
{
|
|
NvU64 pushbuffer_base;
|
|
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
|
bool is_proxy_channel = uvm_channel_is_proxy(push->channel);
|
|
|
|
pushbuffer_base = uvm_rm_mem_get_gpu_va(pushbuffer->memory, gpu, is_proxy_channel).address;
|
|
|
|
if (uvm_channel_is_wlc(push->channel) || uvm_channel_is_lcic(push->channel)) {
|
|
// We need to use the same static locations for PB as the fixed
|
|
// schedule because that's what the channels are initialized to use.
|
|
return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_protected_vidmem, gpu);
|
|
}
|
|
else if (uvm_channel_is_sec2(push->channel)) {
|
|
// SEC2 PBs are in unprotected sysmem
|
|
pushbuffer_base = uvm_pushbuffer_get_sec2_gpu_va_base(pushbuffer);
|
|
}
|
|
|
|
return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
|
|
}
|
|
|
|
void *uvm_pushbuffer_get_unprotected_cpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
|
|
{
|
|
char *pushbuffer_base;
|
|
|
|
if (uvm_channel_is_wlc(push->channel)) {
|
|
// Reuse existing WLC static pb for initialization
|
|
UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
|
|
return push->channel->conf_computing.static_pb_unprotected_sysmem_cpu;
|
|
}
|
|
|
|
pushbuffer_base = uvm_rm_mem_get_cpu_va(pushbuffer->memory_unprotected_sysmem);
|
|
|
|
return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
|
|
}
|
|
|
|
NvU64 uvm_pushbuffer_get_unprotected_gpu_va_for_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push)
|
|
{
|
|
NvU64 pushbuffer_base;
|
|
|
|
if (uvm_channel_is_wlc(push->channel)) {
|
|
// Reuse existing WLC static pb for initialization
|
|
UVM_ASSERT(!uvm_channel_manager_is_wlc_ready(push->channel->pool->manager));
|
|
return uvm_rm_mem_get_gpu_uvm_va(push->channel->conf_computing.static_pb_unprotected_sysmem,
|
|
uvm_push_get_gpu(push));
|
|
}
|
|
|
|
pushbuffer_base = uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, uvm_push_get_gpu(push));
|
|
|
|
return pushbuffer_base + uvm_pushbuffer_get_offset_for_push(pushbuffer, push);
|
|
}
|
|
|
|
void uvm_pushbuffer_end_push(uvm_pushbuffer_t *pushbuffer, uvm_push_t *push, uvm_gpfifo_entry_t *gpfifo)
|
|
{
|
|
uvm_pushbuffer_chunk_t *chunk;
|
|
|
|
if (uvm_channel_is_wlc(push->channel)) {
|
|
// WLC channels use static pushbuffer and don't count towards max
|
|
// concurrent pushes. Initializing the list as head makes sure the
|
|
// deletion in "uvm_pushbuffer_mark_completed" doesn't crash.
|
|
INIT_LIST_HEAD(&gpfifo->pending_list_node);
|
|
return;
|
|
}
|
|
|
|
chunk = gpfifo_to_chunk(pushbuffer, gpfifo);
|
|
|
|
uvm_channel_pool_assert_locked(push->channel->pool);
|
|
|
|
uvm_spin_lock(&pushbuffer->lock);
|
|
|
|
list_add_tail(&gpfifo->pending_list_node, &chunk->pending_gpfifos);
|
|
|
|
update_chunk(pushbuffer, chunk);
|
|
|
|
UVM_ASSERT(chunk->current_push == push);
|
|
chunk->current_push = NULL;
|
|
|
|
uvm_spin_unlock(&pushbuffer->lock);
|
|
|
|
// uvm_pushbuffer_end_push() needs to be called with the channel lock held
|
|
// while the concurrent pushes sema has a higher lock order. To keep the
|
|
// code structure simple, just up out of order here.
|
|
uvm_up_out_of_order(&pushbuffer->concurrent_pushes_sema);
|
|
}
|
|
|
|
bool uvm_pushbuffer_has_space(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
bool has_space;
|
|
|
|
uvm_spin_lock(&pushbuffer->lock);
|
|
|
|
has_space = pick_chunk(pushbuffer) != NULL;
|
|
|
|
uvm_spin_unlock(&pushbuffer->lock);
|
|
|
|
return has_space;
|
|
}
|
|
|
|
void uvm_pushbuffer_print_common(uvm_pushbuffer_t *pushbuffer, struct seq_file *s)
|
|
{
|
|
NvU32 i;
|
|
|
|
UVM_SEQ_OR_DBG_PRINT(s, "Pushbuffer for GPU %s\n", uvm_gpu_name(pushbuffer->channel_manager->gpu));
|
|
UVM_SEQ_OR_DBG_PRINT(s, " has space: %d\n", uvm_pushbuffer_has_space(pushbuffer));
|
|
|
|
uvm_spin_lock(&pushbuffer->lock);
|
|
|
|
for (i = 0; i < UVM_PUSHBUFFER_CHUNKS; ++i) {
|
|
uvm_pushbuffer_chunk_t *chunk = &pushbuffer->chunks[i];
|
|
NvU32 cpu_put = chunk_get_cpu_put(pushbuffer, chunk);
|
|
NvU32 gpu_get = chunk_get_gpu_get(pushbuffer, chunk);
|
|
UVM_SEQ_OR_DBG_PRINT(s, " chunk %u put %u get %u next %u available %d idle %d\n",
|
|
i,
|
|
cpu_put, gpu_get, chunk->next_push_start,
|
|
test_bit(i, pushbuffer->available_chunks) ? 1 : 0,
|
|
test_bit(i, pushbuffer->idle_chunks) ? 1 : 0);
|
|
|
|
}
|
|
|
|
uvm_spin_unlock(&pushbuffer->lock);
|
|
}
|
|
|
|
void uvm_pushbuffer_print(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
return uvm_pushbuffer_print_common(pushbuffer, NULL);
|
|
}
|
|
|
|
NvU64 uvm_pushbuffer_get_gpu_va_base(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory, pushbuffer->channel_manager->gpu);
|
|
}
|
|
|
|
NvU64 uvm_pushbuffer_get_sec2_gpu_va_base(uvm_pushbuffer_t *pushbuffer)
|
|
{
|
|
UVM_ASSERT(uvm_conf_computing_mode_enabled(pushbuffer->channel_manager->gpu));
|
|
|
|
return uvm_rm_mem_get_gpu_uvm_va(pushbuffer->memory_unprotected_sysmem, pushbuffer->channel_manager->gpu);
|
|
}
|