mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-01-06 01:46:05 +01:00
204 lines
8.6 KiB
C
204 lines
8.6 KiB
C
/*******************************************************************************
|
|
Copyright (c) 2015 NVIDIA Corporation
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to
|
|
deal in the Software without restriction, including without limitation the
|
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
|
sell copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be
|
|
included in all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
|
DEALINGS IN THE SOFTWARE.
|
|
|
|
*******************************************************************************/
|
|
|
|
#ifndef __UVM_GPU_SEMAPHORE_H__
|
|
#define __UVM_GPU_SEMAPHORE_H__
|
|
|
|
#include "uvm_forward_decl.h"
|
|
#include "uvm_lock.h"
|
|
#include "uvm_rm_mem.h"
|
|
#include "uvm_linux.h"
|
|
|
|
// A GPU semaphore is a memory location accessible by the GPUs and the CPU
|
|
// that's used for synchronization among them.
|
|
// The GPU has primitives to acquire (wait for) and release (set) 4-byte memory
|
|
// locations. The same memory can be accessed by multiple GPUs and the CPU
|
|
// allowing for different synchronization schemes.
|
|
//
|
|
// The UVM driver maintains a per-GPU semaphore pool that grows on demand as
|
|
// semaphores are allocated out of it.
|
|
//
|
|
// TODO: Bug 200194638: Add support for timestamps (the GPU also supports
|
|
// releasing 16-byte semaphores that include an 8-byte timestamp).
|
|
struct uvm_gpu_semaphore_struct
|
|
{
|
|
// The semaphore pool page the semaphore came from
|
|
uvm_gpu_semaphore_pool_page_t *page;
|
|
|
|
// Pointer to the memory location
|
|
NvU32 *payload;
|
|
struct {
|
|
NvU16 index;
|
|
NvU32 cached_payload;
|
|
uvm_rm_mem_t *encrypted_payload;
|
|
uvm_rm_mem_t *notifier;
|
|
uvm_rm_mem_t *auth_tag;
|
|
UvmCslIv *ivs;
|
|
NvU32 last_pushed_notifier;
|
|
NvU32 last_observed_notifier;
|
|
} conf_computing;
|
|
};
|
|
|
|
// A primitive used for tracking progress of the GPU
|
|
// Whenever a stream of GPU operations needs to be synchronized it increments
|
|
// the semaphore's payload as the last step so that other processors
|
|
// can acquire (wait for) it.
|
|
// The primitive maintains a 64-bit counter on top of the 32-bit GPU semaphore
|
|
// to support 2^64 synchronization points instead of just 2^32. The logic relies
|
|
// on being able to notice every time the 32-bit counter wraps around (see
|
|
// update_completed_value()).
|
|
struct uvm_gpu_tracking_semaphore_struct
|
|
{
|
|
uvm_gpu_semaphore_t semaphore;
|
|
|
|
// Last completed value
|
|
// The bottom 32-bits will always match the latest semaphore payload seen in
|
|
// update_completed_value_locked().
|
|
atomic64_t completed_value;
|
|
|
|
// Lock protecting updates to the completed_value
|
|
union {
|
|
uvm_spinlock_t s_lock;
|
|
uvm_mutex_t m_lock;
|
|
};
|
|
|
|
// Last queued value
|
|
// All accesses to the queued value should be handled by the user of the GPU
|
|
// tracking semaphore.
|
|
NvU64 queued_value;
|
|
};
|
|
|
|
// Create a semaphore pool for a GPU.
|
|
NV_STATUS uvm_gpu_semaphore_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
|
|
|
|
// When the Confidential Computing feature is enabled, semaphore pools
|
|
// associated with CE channels are allocated in the CPR of vidmem and as such
|
|
// have all the associated access restrictions. Because of this, they're called
|
|
// secure pools and secure semaphores are allocated out of said secure pools.
|
|
NV_STATUS uvm_gpu_semaphore_secure_pool_create(uvm_gpu_t *gpu, uvm_gpu_semaphore_pool_t **pool_out);
|
|
|
|
// Destroy a semaphore pool
|
|
// Locking:
|
|
// - Global lock needs to be held in read mode (for unmapping from all GPUs)
|
|
// - Internally acquires:
|
|
// - GPU semaphore pool lock
|
|
// - RM API lock
|
|
// - RM GPUs lock
|
|
void uvm_gpu_semaphore_pool_destroy(uvm_gpu_semaphore_pool_t *pool);
|
|
|
|
// Allocate a semaphore from the pool.
|
|
// The semaphore will be mapped on all GPUs currently registered with the UVM
|
|
// driver, and on all new GPUs which will be registered in the future.
|
|
// Unless the Confidential Computing feature is enabled and the pool is a
|
|
// secure pool. In this case, it is only mapped to the GPU that holds the
|
|
// allocation.
|
|
// The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
|
|
// to the proxy address space.
|
|
//
|
|
// The semaphore's payload will be initially set to 0.
|
|
//
|
|
// Locking:
|
|
// - Global lock needs to be held in read mode (for mapping on all GPUs)
|
|
// - Internally synchronized and hence safe to be called from multiple threads
|
|
// - Internally acquires:
|
|
// - GPU semaphore pool lock
|
|
// - RM API lock
|
|
// - RM GPUs lock
|
|
NV_STATUS uvm_gpu_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_semaphore_t *semaphore);
|
|
|
|
// Free a semaphore
|
|
// Locking:
|
|
// - Internally synchronized and hence safe to be called from multiple threads
|
|
void uvm_gpu_semaphore_free(uvm_gpu_semaphore_t *semaphore);
|
|
|
|
// Map all the semaphores from the pool on a GPU
|
|
//
|
|
// The mappings are added to UVM's internal address space, and (in SR-IOV heavy)
|
|
// to the proxy address space.
|
|
NV_STATUS uvm_gpu_semaphore_pool_map_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu);
|
|
|
|
// Unmap all the semaphores from the pool from a GPU
|
|
//
|
|
// The unmapping affects all the VA spaces where the semaphores are currently
|
|
// mapped.
|
|
void uvm_gpu_semaphore_pool_unmap_gpu(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_t *gpu);
|
|
|
|
// Get the GPU VA of a semaphore in UVM's internal address space.
|
|
NvU64 uvm_gpu_semaphore_get_gpu_uvm_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu);
|
|
|
|
// Get the GPU VA of a semaphore in the proxy address space.
|
|
NvU64 uvm_gpu_semaphore_get_gpu_proxy_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu);
|
|
|
|
NvU64 uvm_gpu_semaphore_get_gpu_va(uvm_gpu_semaphore_t *semaphore, uvm_gpu_t *gpu, bool is_proxy_va_space);
|
|
|
|
// Read the 32-bit payload of the semaphore
|
|
// Notably doesn't provide any memory ordering guarantees and needs to be used with
|
|
// care. For an example of what needs to be considered see
|
|
// uvm_gpu_tracking_semaphore_update_completed_value().
|
|
NvU32 uvm_gpu_semaphore_get_payload(uvm_gpu_semaphore_t *semaphore);
|
|
|
|
// Set the 32-bit payload of the semaphore
|
|
// Guarantees that all memory accesses preceding setting the payload won't be
|
|
// moved past it.
|
|
void uvm_gpu_semaphore_set_payload(uvm_gpu_semaphore_t *semaphore, NvU32 payload);
|
|
|
|
// Allocate a GPU tracking semaphore from the pool
|
|
// Locking same as uvm_gpu_semaphore_alloc()
|
|
NV_STATUS uvm_gpu_tracking_semaphore_alloc(uvm_gpu_semaphore_pool_t *pool, uvm_gpu_tracking_semaphore_t *tracking_sem);
|
|
|
|
// Free a GPU tracking semaphore
|
|
// Locking same as uvm_gpu_semaphore_free()
|
|
void uvm_gpu_tracking_semaphore_free(uvm_gpu_tracking_semaphore_t *tracking_sem);
|
|
|
|
// Check whether a specific value has been completed
|
|
//
|
|
// If true is returned, guarantees that all operations ordered prior to a
|
|
// processor (commonly a GPU) completing the specific value will be visible to
|
|
// the caller.
|
|
//
|
|
// In case a GPU is supposed to complete a value, care needs to be taken for all GPU
|
|
// operations to be ordered correctly with the semaphore release that sets the value.
|
|
// In case it's the CPU completing the value, uvm_gpu_semaphore_set_payload()
|
|
// should be used that provides the necessary ordering guarantees.
|
|
//
|
|
// Locking: this operation is internally synchronized and hence safe to be
|
|
// called from multiple threads.
|
|
bool uvm_gpu_tracking_semaphore_is_value_completed(uvm_gpu_tracking_semaphore_t *tracking_sem, NvU64 value);
|
|
|
|
// Update and return the completed value
|
|
//
|
|
// Provides the same guarantees as if uvm_gpu_tracking_semaphore_is_value_completed()
|
|
// returned true for the returned completed value.
|
|
//
|
|
// Locking: this operation is internally synchronized and hence safe to be
|
|
// called from multiple threads.
|
|
NvU64 uvm_gpu_tracking_semaphore_update_completed_value(uvm_gpu_tracking_semaphore_t *tracking_sem);
|
|
|
|
// See the comments for uvm_gpu_tracking_semaphore_is_value_completed
|
|
static bool uvm_gpu_tracking_semaphore_is_completed(uvm_gpu_tracking_semaphore_t *tracking_sem)
|
|
{
|
|
return uvm_gpu_tracking_semaphore_is_value_completed(tracking_sem, tracking_sem->queued_value);
|
|
}
|
|
|
|
#endif // __UVM_GPU_SEMAPHORE_H__
|