mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-01-20 15:52:10 +01:00
231 lines
8.8 KiB
C
231 lines
8.8 KiB
C
|
/*******************************************************************************
|
||
|
Copyright (c) 2018-2022 NVIDIA Corporation
|
||
|
|
||
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||
|
of this software and associated documentation files (the "Software"), to
|
||
|
deal in the Software without restriction, including without limitation the
|
||
|
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||
|
sell copies of the Software, and to permit persons to whom the Software is
|
||
|
furnished to do so, subject to the following conditions:
|
||
|
|
||
|
The above copyright notice and this permission notice shall be
|
||
|
included in all copies or substantial portions of the Software.
|
||
|
|
||
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||
|
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||
|
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||
|
DEALINGS IN THE SOFTWARE.
|
||
|
|
||
|
*******************************************************************************/
|
||
|
|
||
|
#include "uvm_hal.h"
|
||
|
#include "uvm_hal_types.h"
|
||
|
#include "clc6b5.h"
|
||
|
#include "clc7b5.h"
|
||
|
#include "clc56f.h" // Needed because HAL ce_init pushes SET_OBJECT
|
||
|
|
||
|
bool uvm_hal_ampere_ce_method_validate_c6b5(uvm_push_t *push, NvU32 method_address, NvU32 method_data)
|
||
|
{
|
||
|
if (!uvm_channel_is_proxy(push->channel))
|
||
|
return true;
|
||
|
|
||
|
switch (method_address) {
|
||
|
case NVC56F_SET_OBJECT:
|
||
|
case NVC6B5_SET_SEMAPHORE_A:
|
||
|
case NVC6B5_SET_SEMAPHORE_B:
|
||
|
case NVC6B5_SET_SEMAPHORE_PAYLOAD:
|
||
|
case NVC6B5_SET_SRC_PHYS_MODE:
|
||
|
case NVC6B5_SET_DST_PHYS_MODE:
|
||
|
case NVC6B5_LAUNCH_DMA:
|
||
|
case NVC6B5_OFFSET_IN_UPPER:
|
||
|
case NVC6B5_OFFSET_IN_LOWER:
|
||
|
case NVC6B5_OFFSET_OUT_UPPER:
|
||
|
case NVC6B5_OFFSET_OUT_LOWER:
|
||
|
case NVC6B5_LINE_LENGTH_IN:
|
||
|
case NVC6B5_SET_REMAP_CONST_A:
|
||
|
case NVC6B5_SET_REMAP_CONST_B:
|
||
|
case NVC6B5_SET_REMAP_COMPONENTS:
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
UVM_ERR_PRINT("Unsupported CE method 0x%x\n", method_address);
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
static NvU32 ce_aperture(uvm_aperture_t aperture)
|
||
|
{
|
||
|
BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB) !=
|
||
|
HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, LOCAL_FB));
|
||
|
BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM) !=
|
||
|
HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, COHERENT_SYSMEM));
|
||
|
BUILD_BUG_ON(HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) !=
|
||
|
HWCONST(C6B5, SET_DST_PHYS_MODE, TARGET, PEERMEM));
|
||
|
|
||
|
if (aperture == UVM_APERTURE_SYS) {
|
||
|
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, COHERENT_SYSMEM);
|
||
|
}
|
||
|
else if (aperture == UVM_APERTURE_VID) {
|
||
|
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, LOCAL_FB);
|
||
|
}
|
||
|
else {
|
||
|
return HWCONST(C6B5, SET_SRC_PHYS_MODE, TARGET, PEERMEM) |
|
||
|
HWVALUE(C6B5, SET_SRC_PHYS_MODE, FLA, 0) |
|
||
|
HWVALUE(C6B5, SET_SRC_PHYS_MODE, PEER_ID, UVM_APERTURE_PEER_ID(aperture));
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// Push SET_{SRC,DST}_PHYS mode if needed and return LAUNCH_DMA_{SRC,DST}_TYPE
|
||
|
// flags
|
||
|
NvU32 uvm_hal_ampere_ce_phys_mode(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||
|
{
|
||
|
NvU32 launch_dma_src_dst_type = 0;
|
||
|
|
||
|
if (src.is_virtual)
|
||
|
launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, VIRTUAL);
|
||
|
else
|
||
|
launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, SRC_TYPE, PHYSICAL);
|
||
|
|
||
|
if (dst.is_virtual)
|
||
|
launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, VIRTUAL);
|
||
|
else
|
||
|
launch_dma_src_dst_type |= HWCONST(C6B5, LAUNCH_DMA, DST_TYPE, PHYSICAL);
|
||
|
|
||
|
if (!src.is_virtual && !dst.is_virtual) {
|
||
|
NV_PUSH_2U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture),
|
||
|
SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
|
||
|
}
|
||
|
else if (!src.is_virtual) {
|
||
|
NV_PUSH_1U(C6B5, SET_SRC_PHYS_MODE, ce_aperture(src.aperture));
|
||
|
}
|
||
|
else if (!dst.is_virtual) {
|
||
|
NV_PUSH_1U(C6B5, SET_DST_PHYS_MODE, ce_aperture(dst.aperture));
|
||
|
}
|
||
|
|
||
|
return launch_dma_src_dst_type;
|
||
|
}
|
||
|
|
||
|
NvU32 uvm_hal_ampere_ce_plc_mode_c7b5(void)
|
||
|
{
|
||
|
return HWCONST(C7B5, LAUNCH_DMA, DISABLE_PLC, TRUE);
|
||
|
}
|
||
|
|
||
|
bool uvm_hal_ampere_ce_memcopy_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, uvm_gpu_address_t src)
|
||
|
{
|
||
|
NvU64 push_begin_gpu_va;
|
||
|
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||
|
|
||
|
if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||
|
return true;
|
||
|
|
||
|
if (uvm_channel_is_proxy(push->channel)) {
|
||
|
if (dst.is_virtual) {
|
||
|
UVM_ERR_PRINT("Destination address of memcopy must be physical, not virtual\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (dst.aperture != UVM_APERTURE_VID) {
|
||
|
UVM_ERR_PRINT("Destination address of memcopy must be in vidmem\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// The source address is irrelevant, since it is a pushbuffer offset
|
||
|
if (!IS_ALIGNED(dst.address, 8)){
|
||
|
UVM_ERR_PRINT("Destination address of memcopy is not 8-byte aligned");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (!src.is_virtual) {
|
||
|
UVM_ERR_PRINT("Source address of memcopy must be virtual\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
push_begin_gpu_va = uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||
|
|
||
|
if ((src.address < push_begin_gpu_va) || (src.address >= push_begin_gpu_va + uvm_push_get_size(push))) {
|
||
|
UVM_ERR_PRINT("Source address of memcopy must point to pushbuffer\n");
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
else {
|
||
|
// TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a
|
||
|
// UVM internal channel cannot use peer physical addresses.
|
||
|
if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) {
|
||
|
UVM_ERR_PRINT("Destination address of memcopy must be virtual, not physical (aperture: %s)\n",
|
||
|
uvm_gpu_address_aperture_string(dst));
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (!src.is_virtual && !uvm_aperture_is_peer(src.aperture)) {
|
||
|
UVM_ERR_PRINT("Source address of memcopy must be virtual, not physical (aperture: %s)\n",
|
||
|
uvm_gpu_address_aperture_string(src));
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// In SR-IOV heavy (GA100 only), the UVM driver is expected to push a patched
|
||
|
// version of an inlined memcopy to the proxy channels. The patching consists in
|
||
|
// passing the offset of the inlined data within the push as the source virtual
|
||
|
// address, instead of passing its GPU VA.
|
||
|
//
|
||
|
// Copies pushed to internal channels use the GPU VA of the inlined data,
|
||
|
// irrespective of the virtualization mode.
|
||
|
void uvm_hal_ampere_ce_memcopy_patch_src_c6b5(uvm_push_t *push, uvm_gpu_address_t *src)
|
||
|
{
|
||
|
if (!uvm_channel_is_proxy(push->channel))
|
||
|
return;
|
||
|
|
||
|
src->address -= uvm_pushbuffer_get_gpu_va_for_push(push->channel->pool->manager->pushbuffer, push);
|
||
|
}
|
||
|
|
||
|
bool uvm_hal_ampere_ce_memset_validate_c6b5(uvm_push_t *push, uvm_gpu_address_t dst, size_t element_size)
|
||
|
{
|
||
|
uvm_gpu_t *gpu = uvm_push_get_gpu(push);
|
||
|
|
||
|
if (!uvm_gpu_is_virt_mode_sriov_heavy(gpu))
|
||
|
return true;
|
||
|
|
||
|
if (uvm_channel_is_proxy(push->channel)) {
|
||
|
if (dst.is_virtual) {
|
||
|
UVM_ERR_PRINT("Destination address of memset must be physical, not virtual\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (dst.aperture != UVM_APERTURE_VID) {
|
||
|
UVM_ERR_PRINT("Destination address of memset must be in vidmem\n");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
if (!IS_ALIGNED(dst.address, 8)){
|
||
|
UVM_ERR_PRINT("Destination address of memset is not 8-byte aligned");
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// Disallow memsets that don't match the page table/directory entry
|
||
|
// size. PDE0 entries are 16 bytes wide, but those are written using a
|
||
|
// memcopy.
|
||
|
//
|
||
|
// The memset size is not checked to be a multiple of the element size
|
||
|
// because the check is not exclusive of SR-IOV heavy, and it is already
|
||
|
// present in the uvm_hal_*_memset_* functions.
|
||
|
if (element_size != 8) {
|
||
|
UVM_ERR_PRINT("Memset data must be 8 bytes wide, but found %zu instead\n", element_size);
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
// TODO: Bug 3429418: When in SR-IOV heavy, a memcopy/memset pushed to a
|
||
|
// UVM internal channel cannot use peer physical addresses.
|
||
|
else if (!dst.is_virtual && !uvm_aperture_is_peer(dst.aperture)) {
|
||
|
UVM_ERR_PRINT("Destination address of memset must be virtual, not physical (aperture: %s)\n",
|
||
|
uvm_gpu_address_aperture_string(dst));
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|