mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-02-01 06:52:11 +01:00
545.29.03
This commit is contained in:
parent
b5bf85a8e3
commit
f364378a65
@ -2,6 +2,8 @@
|
||||
|
||||
## Release 545 Entries
|
||||
|
||||
### [545.29.03] 2023-10-31
|
||||
|
||||
### [545.23.06] 2023-10-17
|
||||
|
||||
#### Fixed
|
||||
@ -62,6 +64,10 @@
|
||||
|
||||
## Release 525 Entries
|
||||
|
||||
#### Fixed
|
||||
|
||||
- Fix nvidia_p2p_get_pages(): Fix double-free in register-callback error path, [#557](https://github.com/NVIDIA/open-gpu-kernel-modules/pull/557) by @BrendanCunningham
|
||||
|
||||
### [525.116.04] 2023-05-09
|
||||
|
||||
### [525.116.03] 2023-04-25
|
||||
|
10
README.md
10
README.md
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 545.23.06.
|
||||
version 545.29.03.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
545.23.06 driver release. This can be achieved by installing
|
||||
545.29.03 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -188,7 +188,7 @@ encountered specific to them.
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/545.23.06/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/545.29.03/README/kernel_open.html
|
||||
|
||||
In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
@ -658,6 +658,7 @@ Subsystem Device ID.
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
|
||||
| NVIDIA PG509-210 | 20B2 10DE 1625 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
|
||||
@ -665,6 +666,7 @@ Subsystem Device ID.
|
||||
| NVIDIA PG506-232 | 20B6 10DE 1492 |
|
||||
| NVIDIA A30 | 20B7 10DE 1532 |
|
||||
| NVIDIA A30 | 20B7 10DE 1804 |
|
||||
| NVIDIA A30 | 20B7 10DE 1852 |
|
||||
| NVIDIA A800-SXM4-40GB | 20BD 10DE 17F4 |
|
||||
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
|
||||
@ -748,6 +750,8 @@ Subsystem Device ID.
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA H800 NVL | 233A 10DE 183A |
|
||||
| GH200 120GB | 2342 10DE 16EB |
|
||||
| GH200 480GB | 2342 10DE 1809 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
| NVIDIA RTX A5500 Laptop GPU | 2438 |
|
||||
|
@ -72,7 +72,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-error -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"545.23.06\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"545.29.03\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
@ -729,6 +729,7 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
|
||||
params->gpu_id = nv_dev->gpu_info.gpu_id;
|
||||
params->primary_index = dev->primary->index;
|
||||
params->supports_alloc = false;
|
||||
params->generic_page_kind = 0;
|
||||
params->page_kind_generation = 0;
|
||||
params->sector_layout = 0;
|
||||
@ -736,15 +737,20 @@ static int nv_drm_get_dev_info_ioctl(struct drm_device *dev,
|
||||
params->supports_semsurf = false;
|
||||
|
||||
#if defined(NV_DRM_ATOMIC_MODESET_AVAILABLE)
|
||||
params->generic_page_kind = nv_dev->genericPageKind;
|
||||
params->page_kind_generation = nv_dev->pageKindGeneration;
|
||||
params->sector_layout = nv_dev->sectorLayout;
|
||||
/* Semaphore surfaces are only supported if the modeset = 1 parameter is set */
|
||||
if ((nv_dev->pDevice) != NULL && (nv_dev->semsurf_stride != 0)) {
|
||||
params->supports_semsurf = true;
|
||||
/* Memory allocation and semaphore surfaces are only supported
|
||||
* if the modeset = 1 parameter is set */
|
||||
if (nv_dev->pDevice != NULL) {
|
||||
params->supports_alloc = true;
|
||||
params->generic_page_kind = nv_dev->genericPageKind;
|
||||
params->page_kind_generation = nv_dev->pageKindGeneration;
|
||||
params->sector_layout = nv_dev->sectorLayout;
|
||||
|
||||
if (nv_dev->semsurf_stride != 0) {
|
||||
params->supports_semsurf = true;
|
||||
#if defined(NV_SYNC_FILE_GET_FENCE_PRESENT)
|
||||
params->supports_sync_fd = true;
|
||||
params->supports_sync_fd = true;
|
||||
#endif /* defined(NV_SYNC_FILE_GET_FENCE_PRESENT) */
|
||||
}
|
||||
}
|
||||
#endif /* defined(NV_DRM_ATOMIC_MODESET_AVAILABLE) */
|
||||
|
||||
|
@ -178,7 +178,10 @@ struct drm_nvidia_get_dev_info_params {
|
||||
uint32_t gpu_id; /* OUT */
|
||||
uint32_t primary_index; /* OUT; the "card%d" value */
|
||||
|
||||
/* See DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D definitions of these */
|
||||
uint32_t supports_alloc; /* OUT */
|
||||
/* The generic_page_kind, page_kind_generation, and sector_layout
|
||||
* fields are only valid if supports_alloc is true.
|
||||
* See DRM_FORMAT_MOD_NVIDIA_BLOCK_LINEAR_2D definitions of these. */
|
||||
uint32_t generic_page_kind; /* OUT */
|
||||
uint32_t page_kind_generation; /* OUT */
|
||||
uint32_t sector_layout; /* OUT */
|
||||
|
@ -156,7 +156,7 @@ NvS32 NV_API_CALL nv_request_msix_irq(nv_linux_state_t *nvl)
|
||||
{
|
||||
for( j = 0; j < i; j++)
|
||||
{
|
||||
free_irq(nvl->msix_entries[i].vector, (void *)nvl);
|
||||
free_irq(nvl->msix_entries[j].vector, (void *)nvl);
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -506,8 +506,13 @@ static int nv_p2p_get_pages(
|
||||
(*page_table)->page_size = page_size_index;
|
||||
|
||||
os_free_mem(physical_addresses);
|
||||
physical_addresses = NULL;
|
||||
|
||||
os_free_mem(wreqmb_h);
|
||||
wreqmb_h = NULL;
|
||||
|
||||
os_free_mem(rreqmb_h);
|
||||
rreqmb_h = NULL;
|
||||
|
||||
if (free_callback != NULL)
|
||||
{
|
||||
|
@ -2068,11 +2068,13 @@ void DeviceImpl::setDscDecompressionDevice(bool bDscCapBasedOnParent)
|
||||
}
|
||||
}
|
||||
}
|
||||
else if (this->parent && this->parent->isDSCDecompressionSupported())
|
||||
else if (this->parent && this->parent->isDSCDecompressionSupported() &&
|
||||
!(this->isLogical()))
|
||||
{
|
||||
//
|
||||
// This condition takes care of sink devices not capable of DSC
|
||||
// but parent is capable of DSC decompression.
|
||||
// but parent is capable of DSC decompression. We need to skip this
|
||||
// if sink is at logical port.
|
||||
//
|
||||
this->bDSCPossible = true;
|
||||
this->devDoingDscDecompression = this->parent;
|
||||
|
@ -36,25 +36,25 @@
|
||||
// and then checked back in. You cannot make changes to these sections without
|
||||
// corresponding changes to the buildmeister script
|
||||
#ifndef NV_BUILD_BRANCH
|
||||
#define NV_BUILD_BRANCH r545_74
|
||||
#define NV_BUILD_BRANCH r545_96
|
||||
#endif
|
||||
#ifndef NV_PUBLIC_BRANCH
|
||||
#define NV_PUBLIC_BRANCH r545_74
|
||||
#define NV_PUBLIC_BRANCH r545_96
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r545/r545_74-96"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33409679)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r545/r545_96-121"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33470109)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r545/r545_74-96"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33409679)
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r545/r545_96-121"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33470109)
|
||||
|
||||
#else /* Windows builds */
|
||||
#define NV_BUILD_BRANCH_VERSION "r545_74-8"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33409679)
|
||||
#define NV_BUILD_BRANCH_VERSION "r545_96-3"
|
||||
#define NV_BUILD_CHANGELIST_NUM (33467912)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "545.87"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33409679)
|
||||
#define NV_BUILD_NAME "546.04"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (33467912)
|
||||
#define NV_BUILD_BRANCH_BASE_VERSION R545
|
||||
#endif
|
||||
// End buildmeister python edited section
|
||||
|
@ -4,7 +4,7 @@
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
||||
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
|
||||
|
||||
#define NV_VERSION_STRING "545.23.06"
|
||||
#define NV_VERSION_STRING "545.29.03"
|
||||
|
||||
#else
|
||||
|
||||
|
@ -887,10 +887,6 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
|
||||
* This parameter is set by the client to indicate the
|
||||
* gpuId of the GPU to which the display to be optimized
|
||||
* is attached.
|
||||
* display
|
||||
* This parameter is not used by RM currently.
|
||||
* Clients can ignore this parameter. Note that this
|
||||
* parameter will be removed in future.
|
||||
* output
|
||||
* This parameter is set by the client to indicate the
|
||||
* output resource type of the display to be optimized.
|
||||
@ -1033,6 +1029,12 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
|
||||
* optimal pixel clock to use with the adjusted mode,
|
||||
* in units of Hz.
|
||||
*
|
||||
*
|
||||
* bOptimized[out]
|
||||
* This is set to NV_TRUE if the timings were successfully optimized, and
|
||||
* NV_FALSE otherwise.
|
||||
*
|
||||
*
|
||||
* Progressive Raster Structure
|
||||
*
|
||||
* hSyncEnd hTotal
|
||||
@ -1145,28 +1147,29 @@ typedef struct NV30F1_CTRL_GSYNC_GET_CONTROL_SWAP_LOCK_WINDOW_PARAMS {
|
||||
#define NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS_MESSAGE_ID (0x60U)
|
||||
|
||||
typedef struct NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS {
|
||||
NvU32 gpuId;
|
||||
NvU32 display;
|
||||
NvU32 output;
|
||||
NvU32 protocol;
|
||||
NvU32 structure;
|
||||
NvU32 adjust;
|
||||
NvU32 hDeltaStep;
|
||||
NvU32 hDeltaMax;
|
||||
NvU32 vDeltaStep;
|
||||
NvU32 vDeltaMax;
|
||||
NvU32 hSyncEnd;
|
||||
NvU32 hBlankEnd;
|
||||
NvU32 hBlankStart;
|
||||
NvU32 hTotal;
|
||||
NvU32 vSyncEnd;
|
||||
NvU32 vBlankEnd;
|
||||
NvU32 vBlankStart;
|
||||
NvU32 vInterlacedBlankEnd;
|
||||
NvU32 vInterlacedBlankStart;
|
||||
NvU32 vTotal;
|
||||
NvU32 refreshX10K;
|
||||
NvU32 pixelClockHz;
|
||||
NvU32 gpuId;
|
||||
NvU32 output;
|
||||
NvU32 protocol;
|
||||
NvU32 structure;
|
||||
NvU32 adjust;
|
||||
NvU32 hDeltaStep;
|
||||
NvU32 hDeltaMax;
|
||||
NvU32 vDeltaStep;
|
||||
NvU32 vDeltaMax;
|
||||
NvU32 hSyncEnd;
|
||||
NvU32 hBlankEnd;
|
||||
NvU32 hBlankStart;
|
||||
NvU32 hTotal;
|
||||
NvU32 vSyncEnd;
|
||||
NvU32 vBlankEnd;
|
||||
NvU32 vBlankStart;
|
||||
NvU32 vInterlacedBlankEnd;
|
||||
NvU32 vInterlacedBlankStart;
|
||||
NvU32 vTotal;
|
||||
NvU32 refreshX10K;
|
||||
NvU32 pixelClockHz;
|
||||
|
||||
NvBool bOptimized;
|
||||
} NV30F1_CTRL_GSYNC_GET_OPTIMIZED_TIMING_PARAMS;
|
||||
|
||||
/* output values */
|
||||
|
@ -786,6 +786,9 @@ static void TweakTimingsForGsync(const NVDpyEvoRec *pDpyEvo,
|
||||
return;
|
||||
}
|
||||
|
||||
nvEvoLogInfoString(pInfoString,
|
||||
"Adjusting Mode Timings for Quadro Sync Compatibility");
|
||||
|
||||
ret = nvRmApiControl(nvEvoGlobal.clientHandle,
|
||||
pDispEvo->pFrameLockEvo->device,
|
||||
NV30F1_CTRL_CMD_GSYNC_GET_OPTIMIZED_TIMING,
|
||||
@ -796,12 +799,13 @@ static void TweakTimingsForGsync(const NVDpyEvoRec *pDpyEvo,
|
||||
nvAssert(!"Failed to convert to Quadro Sync safe timing");
|
||||
/* do not apply the timings returned by RM if the call failed */
|
||||
return;
|
||||
} else if (!gsyncOptTimingParams.bOptimized) {
|
||||
nvEvoLogInfoString(pInfoString, " Timings Unchanged.");
|
||||
return;
|
||||
}
|
||||
|
||||
nvConstructNvModeTimingsFromHwModeTimings(pTimings, &modeTimings);
|
||||
|
||||
nvEvoLogInfoString(pInfoString,
|
||||
"Adjusting Mode Timings for Quadro Sync Compatibility");
|
||||
nvEvoLogInfoString(pInfoString, " Old Timings:");
|
||||
nvEvoLogModeValidationModeTimings(pInfoString, &modeTimings);
|
||||
|
||||
@ -5923,13 +5927,6 @@ NvBool nvConstructHwModeTimingsImpCheckEvo(
|
||||
NVKMS_MODE_VALIDATION_REQUIRE_BOOT_CLOCKS);
|
||||
NvU32 ret;
|
||||
|
||||
/* bypass this checking if the user disabled IMP */
|
||||
|
||||
if ((pParams->overrides &
|
||||
NVKMS_MODE_VALIDATION_NO_EXTENDED_GPU_CAPABILITIES_CHECK) != 0) {
|
||||
return TRUE;
|
||||
}
|
||||
|
||||
activeRmId = nvRmAllocDisplayId(pConnectorEvo->pDispEvo,
|
||||
nvAddDpyIdToEmptyDpyIdList(pConnectorEvo->displayId));
|
||||
if (activeRmId == 0x0) {
|
||||
@ -5954,11 +5951,18 @@ NvBool nvConstructHwModeTimingsImpCheckEvo(
|
||||
timingsParams[head].pUsage = &timings[head].viewPort.guaranteedUsage;
|
||||
}
|
||||
|
||||
ret = nvValidateImpOneDispDowngrade(pConnectorEvo->pDispEvo, timingsParams,
|
||||
requireBootClocks,
|
||||
NV_EVO_REALLOCATE_BANDWIDTH_MODE_NONE,
|
||||
/* downgradePossibleHeadsBitMask */
|
||||
(NVBIT(NVKMS_MAX_HEADS_PER_DISP) - 1UL));
|
||||
/* bypass this checking if the user disabled IMP */
|
||||
if ((pParams->overrides &
|
||||
NVKMS_MODE_VALIDATION_NO_EXTENDED_GPU_CAPABILITIES_CHECK) != 0) {
|
||||
ret = TRUE;
|
||||
} else {
|
||||
ret = nvValidateImpOneDispDowngrade(pConnectorEvo->pDispEvo, timingsParams,
|
||||
requireBootClocks,
|
||||
NV_EVO_REALLOCATE_BANDWIDTH_MODE_NONE,
|
||||
/* downgradePossibleHeadsBitMask */
|
||||
(NVBIT(NVKMS_MAX_HEADS_PER_DISP) - 1UL));
|
||||
}
|
||||
|
||||
if (ret) {
|
||||
*pNumHeads = numHeads;
|
||||
} else {
|
||||
|
@ -808,6 +808,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
|
||||
{ 0x20B2, 0x147f, 0x10de, "NVIDIA A100-SXM4-80GB" },
|
||||
{ 0x20B2, 0x1622, 0x10de, "NVIDIA A100-SXM4-80GB" },
|
||||
{ 0x20B2, 0x1623, 0x10de, "NVIDIA A100-SXM4-80GB" },
|
||||
{ 0x20B2, 0x1625, 0x10de, "NVIDIA PG509-210" },
|
||||
{ 0x20B3, 0x14a7, 0x10de, "NVIDIA A100-SXM-64GB" },
|
||||
{ 0x20B3, 0x14a8, 0x10de, "NVIDIA A100-SXM-64GB" },
|
||||
{ 0x20B5, 0x1533, 0x10de, "NVIDIA A100 80GB PCIe" },
|
||||
@ -815,6 +816,7 @@ static const CHIPS_RELEASED sChipsReleased[] = {
|
||||
{ 0x20B6, 0x1492, 0x10de, "NVIDIA PG506-232" },
|
||||
{ 0x20B7, 0x1532, 0x10de, "NVIDIA A30" },
|
||||
{ 0x20B7, 0x1804, 0x10de, "NVIDIA A30" },
|
||||
{ 0x20B7, 0x1852, 0x10de, "NVIDIA A30" },
|
||||
{ 0x20BD, 0x17f4, 0x10de, "NVIDIA A800-SXM4-40GB" },
|
||||
{ 0x20F1, 0x145f, 0x10de, "NVIDIA A100-PCIE-40GB" },
|
||||
{ 0x20F3, 0x179b, 0x10de, "NVIDIA A800-SXM4-80GB" },
|
||||
@ -899,6 +901,8 @@ static const CHIPS_RELEASED sChipsReleased[] = {
|
||||
{ 0x2331, 0x1626, 0x10de, "NVIDIA H100 PCIe" },
|
||||
{ 0x2339, 0x17fc, 0x10de, "NVIDIA H100" },
|
||||
{ 0x233A, 0x183a, 0x10de, "NVIDIA H800 NVL" },
|
||||
{ 0x2342, 0x16eb, 0x10de, "GH200 120GB" },
|
||||
{ 0x2342, 0x1809, 0x10de, "GH200 480GB" },
|
||||
{ 0x2414, 0x0000, 0x0000, "NVIDIA GeForce RTX 3060 Ti" },
|
||||
{ 0x2420, 0x0000, 0x0000, "NVIDIA GeForce RTX 3080 Ti Laptop GPU" },
|
||||
{ 0x2438, 0x0000, 0x0000, "NVIDIA RTX A5500 Laptop GPU" },
|
||||
|
@ -151,11 +151,28 @@ void pmaAddrtreePrintTree(void *pMap, const char* str);
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
void pmaAddrtreeChangeState(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState);
|
||||
void pmaAddrtreeChangeStateAttrib(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, NvBool writeAttrib);
|
||||
void pmaAddrtreeChangeStateAttribEx(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState,PMA_PAGESTATUS newStateMask);
|
||||
void pmaAddrtreeChangePageStateAttrib(void * pMap, NvU64 startFrame, NvU64 pageSize,
|
||||
PMA_PAGESTATUS newState, NvBool writeAttrib);
|
||||
void pmaAddrtreeChangeStateAttribEx(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
|
||||
void pmaAddrtreeChangePageStateAttribEx(void * pMap, NvU64 startFrame, NvU64 pageSize,
|
||||
PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
|
||||
|
||||
/*!
|
||||
* @brief Changes the state & attrib bits specified by mask
|
||||
*
|
||||
* Changes the state of the bits given the physical frame number
|
||||
* and the number of frames to change
|
||||
*
|
||||
* @param[in] pMap The addrtree to change
|
||||
* @param[in] frameNum The frame number to change
|
||||
* @param[in] numFrames The number of frames to change
|
||||
* @param[in] newState The new state to change to
|
||||
* @param[in] newStateMask Specific bits to write
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
void pmaAddrtreeChangeBlockStateAttrib(void *pMap, NvU64 frameNum,
|
||||
NvU64 numFrames,
|
||||
PMA_PAGESTATUS newState,
|
||||
PMA_PAGESTATUS newStateMask);
|
||||
|
||||
/*!
|
||||
* @brief Read the page state & attrib bits
|
||||
|
@ -178,10 +178,9 @@ typedef NV_STATUS (*pmaEvictRangeCb_t)(void *ctxPtr, NvU64 physBegin, NvU64 phys
|
||||
*/
|
||||
typedef void *(*pmaMapInit_t)(NvU64 numFrames, NvU64 addrBase, PMA_STATS *pPmaStats, NvBool bProtected);
|
||||
typedef void (*pmaMapDestroy_t)(void *pMap);
|
||||
typedef void (*pmaMapChangeState_t)(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState);
|
||||
typedef void (*pmaMapChangeStateAttrib_t)(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, NvBool writeAttrib);
|
||||
typedef void (*pmaMapChangeStateAttribEx_t)(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
|
||||
typedef void (*pmaMapChangePageStateAttrib_t)(void *pMap, NvU64 startFrame, NvU64 pageSize, PMA_PAGESTATUS newState, NvBool writeAttrib);
|
||||
typedef void (*pmaMapChangePageStateAttribEx_t)(void *pMap, NvU64 startFrame, NvU64 pageSize, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
|
||||
typedef void (*pmaMapChangeBlockStateAttrib_t)(void *pMap, NvU64 frameNum, NvU64 numFrames, PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
|
||||
typedef PMA_PAGESTATUS (*pmaMapRead_t)(void *pMap, NvU64 frameNum, NvBool readAttrib);
|
||||
typedef NV_STATUS (*pmaMapScanContiguous_t)(void *pMap, NvU64 addrBase, NvU64 rangeStart, NvU64 rangeEnd,
|
||||
NvU64 numPages, NvU64 *freelist, NvU64 pageSize, NvU64 alignment,
|
||||
@ -201,10 +200,9 @@ struct _PMA_MAP_INFO
|
||||
NvU32 mode;
|
||||
pmaMapInit_t pmaMapInit;
|
||||
pmaMapDestroy_t pmaMapDestroy;
|
||||
pmaMapChangeState_t pmaMapChangeState;
|
||||
pmaMapChangeStateAttrib_t pmaMapChangeStateAttrib;
|
||||
pmaMapChangeStateAttribEx_t pmaMapChangeStateAttribEx;
|
||||
pmaMapChangePageStateAttrib_t pmaMapChangePageStateAttrib;
|
||||
pmaMapChangePageStateAttribEx_t pmaMapChangePageStateAttribEx;
|
||||
pmaMapChangeBlockStateAttrib_t pmaMapChangeBlockStateAttrib;
|
||||
pmaMapRead_t pmaMapRead;
|
||||
pmaMapScanContiguous_t pmaMapScanContiguous;
|
||||
pmaMapScanDiscontiguous_t pmaMapScanDiscontiguous;
|
||||
|
@ -89,34 +89,6 @@ void pmaRegmapDestroy(void *pMap);
|
||||
NvU64 pmaRegmapGetEvictingFrames(void *pMap);
|
||||
void pmaRegmapSetEvictingFrames(void *pMap, NvU64 frameEvictionsInProcess);
|
||||
|
||||
/*!
|
||||
* @brief Changes the recorded state bits
|
||||
*
|
||||
* Changes the state of the bits given the physical frame number
|
||||
*
|
||||
* @param[in] pMap The regmap to change
|
||||
* @param[in] frameNum The frame number to change
|
||||
* @param[in] newState The new state to change to
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
void pmaRegmapChangeState(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState);
|
||||
|
||||
/*!
|
||||
* @brief Changes the recorded state & attrib bits
|
||||
*
|
||||
* Changes the state of the bits given the physical frame number
|
||||
*
|
||||
* @param[in] pMap The regmap to change
|
||||
* @param[in] frameNum The frame number to change
|
||||
* @param[in] newState The new state to change to
|
||||
* @param[in] writeAttrib Write attribute bits as well
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
void pmaRegmapChangeStateAttrib(void *pMap, NvU64 frameNum,
|
||||
PMA_PAGESTATUS newState, NvBool writeAttrib);
|
||||
|
||||
/*!
|
||||
* @brief Changes the recorded state & attrib bits for an entire page
|
||||
*
|
||||
@ -131,8 +103,8 @@ void pmaRegmapChangeStateAttrib(void *pMap, NvU64 frameNum,
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
void pmaRegmapChangePageStateAttrib(void * pMap, NvU64 frameNumStart, NvU64 pageSize,
|
||||
PMA_PAGESTATUS newState, NvBool writeAttrib);
|
||||
void pmaRegmapChangePageStateAttribEx(void * pMap, NvU64 frameNumStart, NvU64 pageSize,
|
||||
PMA_PAGESTATUS newState, PMA_PAGESTATUS newStateMask);
|
||||
|
||||
/*!
|
||||
* @brief Changes the state & attrib bits specified by mask
|
||||
@ -150,6 +122,25 @@ void pmaRegmapChangeStateAttribEx(void *pMap, NvU64 frameNum,
|
||||
PMA_PAGESTATUS newState,
|
||||
PMA_PAGESTATUS newStateMask);
|
||||
|
||||
/*!
|
||||
* @brief Changes the state & attrib bits specified by mask
|
||||
*
|
||||
* Changes the state of the bits given the physical frame number
|
||||
* and the number of frames to change
|
||||
*
|
||||
* @param[in] pMap The regmap to change
|
||||
* @param[in] frameNum The frame number to change
|
||||
* @param[in] numFrames The number of frames to change
|
||||
* @param[in] newState The new state to change to
|
||||
* @param[in] newStateMask Specific bits to write
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
void pmaRegmapChangeBlockStateAttrib(void *pMap, NvU64 frameNum,
|
||||
NvU64 numFrames,
|
||||
PMA_PAGESTATUS newState,
|
||||
PMA_PAGESTATUS newStateMask);
|
||||
|
||||
/*!
|
||||
* @brief Read the page state & attrib bits
|
||||
*
|
||||
|
@ -2404,6 +2404,8 @@ kbusUpdateRmAperture_GM107
|
||||
{
|
||||
pFmt = pKernelBus->bar2[gfid].pFmt;
|
||||
|
||||
NV_CHECK_OR_RETURN(LEVEL_ERROR, pFmt != NULL, NV_ERR_INVALID_ARGUMENT);
|
||||
|
||||
// MMU_MAP_CTX
|
||||
mapTarget.pLevelFmt = mmuFmtFindLevelWithPageShift(pFmt->pRoot,
|
||||
BIT_IDX_64(pageSize));
|
||||
|
@ -2776,6 +2776,10 @@ kchannelCtrlCmdGetClassEngineid_IMPL
|
||||
return NV_ERR_OBJECT_NOT_FOUND;
|
||||
}
|
||||
|
||||
NV_CHECK_OR_RETURN(LEVEL_ERROR,
|
||||
pParams->hObject != RES_GET_CLIENT_HANDLE(pKernelChannel),
|
||||
NV_ERR_INVALID_ARGUMENT);
|
||||
|
||||
if (IS_VIRTUAL_WITHOUT_SRIOV(pGpu) ||
|
||||
(IS_VIRTUAL_WITH_SRIOV(pGpu) && gpuIsWarBug200577889SriovHeavyEnabled(pGpu)))
|
||||
{
|
||||
|
@ -1738,8 +1738,9 @@ kgraphicsCreateGoldenImageChannel_IMPL
|
||||
NvU32 classNum;
|
||||
MIG_INSTANCE_REF ref;
|
||||
NvU32 objectType;
|
||||
NvU32 primarySliSubDeviceInstance;
|
||||
|
||||
// XXX This should be removed when braodcast SLI support is deprecated
|
||||
// XXX This should be removed when broadcast SLI support is deprecated
|
||||
if (!gpumgrIsParentGPU(pGpu))
|
||||
{
|
||||
return NV_OK;
|
||||
@ -1750,6 +1751,8 @@ kgraphicsCreateGoldenImageChannel_IMPL
|
||||
// FIXME these allocations corrupt BC state
|
||||
NV_ASSERT_OK_OR_RETURN(
|
||||
rmapiutilAllocClientAndDeviceHandles(pRmApi, pGpu, &hClientId, &hDeviceId, &hSubdeviceId));
|
||||
// rmapiutilAllocClientAndDeviceHandles allocates a subdevice object for this subDeviceInstance
|
||||
primarySliSubDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
|
||||
|
||||
NV_ASSERT_OK_OR_RETURN(serverGetClientUnderLock(&g_resServ, hClientId, &pClientId));
|
||||
|
||||
@ -1765,6 +1768,11 @@ kgraphicsCreateGoldenImageChannel_IMPL
|
||||
{
|
||||
NvHandle hSecondary;
|
||||
NV2080_ALLOC_PARAMETERS nv2080AllocParams;
|
||||
NvU32 thisSubDeviceInstance = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
|
||||
|
||||
// Skip if already allocated by rmapiutilAllocClientAndDeviceHandles()
|
||||
if (thisSubDeviceInstance == primarySliSubDeviceInstance)
|
||||
SLI_LOOP_CONTINUE;
|
||||
|
||||
// Allocate a subDevice
|
||||
NV_CHECK_OK_OR_GOTO(status, LEVEL_ERROR,
|
||||
@ -1772,7 +1780,7 @@ kgraphicsCreateGoldenImageChannel_IMPL
|
||||
cleanup);
|
||||
|
||||
portMemSet(&nv2080AllocParams, 0, sizeof(nv2080AllocParams));
|
||||
nv2080AllocParams.subDeviceId = gpumgrGetSubDeviceInstanceFromGpu(pGpu);
|
||||
nv2080AllocParams.subDeviceId = thisSubDeviceInstance;
|
||||
|
||||
NV_CHECK_OK(status, LEVEL_SILENT,
|
||||
pRmApi->AllocWithHandle(pRmApi,
|
||||
|
@ -390,8 +390,8 @@ memmgrInitInternalChannels_IMPL
|
||||
IS_MIG_ENABLED(pGpu) ||
|
||||
gpuIsCCorApmFeatureEnabled(pGpu) ||
|
||||
IsSLIEnabled(pGpu) ||
|
||||
RMCFG_FEATURE_ARCH_PPC64LE ||
|
||||
RMCFG_FEATURE_ARCH_AARCH64)
|
||||
NVCPU_IS_PPC64LE ||
|
||||
NVCPU_IS_AARCH64)
|
||||
{
|
||||
// BUG 4167899: Temporarily skip CeUtils creation on platforms where it fails
|
||||
NV_PRINTF(LEVEL_INFO, "Skipping global CeUtils creation\n");
|
||||
|
@ -1890,8 +1890,8 @@ __pmaAddrtreeChangePageStateAttribEx
|
||||
// This function wraps the real __pmaAddrtreeChangePageStateAttribEx
|
||||
// to allow addrtree to set 128KB page size
|
||||
//
|
||||
static void
|
||||
_pmaAddrtreeChangePageStateAttribEx
|
||||
void
|
||||
pmaAddrtreeChangePageStateAttribEx
|
||||
(
|
||||
void *pMap,
|
||||
NvU64 frameNumStart,
|
||||
@ -1939,35 +1939,24 @@ pmaAddrtreeChangeStateAttribEx
|
||||
PMA_PAGESTATUS newStateMask
|
||||
)
|
||||
{
|
||||
_pmaAddrtreeChangePageStateAttribEx(pMap, frameNum, _PMA_64KB, newState, newStateMask);
|
||||
pmaAddrtreeChangePageStateAttribEx(pMap, frameNum, _PMA_64KB, newState, newStateMask);
|
||||
}
|
||||
|
||||
// TODO: merge this on PMA level
|
||||
void pmaAddrtreeChangeState(void *pTree, NvU64 frameNum, PMA_PAGESTATUS newState)
|
||||
{
|
||||
pmaAddrtreeChangeStateAttribEx(pTree, frameNum, newState, STATE_MASK);
|
||||
}
|
||||
|
||||
// TODO: merge this on PMA level
|
||||
void pmaAddrtreeChangeStateAttrib(void *pTree, NvU64 frameNum, PMA_PAGESTATUS newState, NvBool writeAttrib)
|
||||
{
|
||||
PMA_PAGESTATUS mask = writeAttrib ? MAP_MASK : STATE_MASK;
|
||||
pmaAddrtreeChangeStateAttribEx(pTree, frameNum, newState, mask);
|
||||
}
|
||||
|
||||
// TODO: merge this on PMA level
|
||||
void
|
||||
pmaAddrtreeChangePageStateAttrib
|
||||
pmaAddrtreeChangeBlockStateAttrib
|
||||
(
|
||||
void * pTree,
|
||||
NvU64 frameNumStart,
|
||||
NvU64 pageSize,
|
||||
void *pMap,
|
||||
NvU64 frame,
|
||||
NvU64 len,
|
||||
PMA_PAGESTATUS newState,
|
||||
NvBool writeAttrib
|
||||
PMA_PAGESTATUS writeMask
|
||||
)
|
||||
{
|
||||
PMA_PAGESTATUS mask = writeAttrib ? MAP_MASK : STATE_MASK;
|
||||
_pmaAddrtreeChangePageStateAttribEx(pTree, frameNumStart, pageSize, newState, mask);
|
||||
while (len != 0)
|
||||
{
|
||||
len--;
|
||||
pmaAddrtreeChangeStateAttribEx(pMap, frame + len, newState, writeMask);
|
||||
}
|
||||
}
|
||||
|
||||
PMA_PAGESTATUS pmaAddrtreeRead
|
||||
|
@ -651,7 +651,7 @@ NV_STATUS pmaNumaAllocate
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
break;
|
||||
}
|
||||
pPma->pMapInfo->pmaMapChangeStateAttrib(pMap, frameOffset, allocOption, NV_TRUE);
|
||||
pPma->pMapInfo->pmaMapChangeStateAttribEx(pMap, frameOffset, allocOption, MAP_MASK);
|
||||
}
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
@ -97,7 +97,7 @@ _pmaRollback
|
||||
|
||||
for (j = 0; j < framesPerPage; j++)
|
||||
{
|
||||
pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + j), oldState);
|
||||
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), oldState, STATE_MASK);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -110,7 +110,7 @@ _pmaRollback
|
||||
frameNum = PMA_ADDR2FRAME(pPages[failCount], addrBase);
|
||||
for(i = 0; i < failFrame; i++)
|
||||
{
|
||||
pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + i), oldState);
|
||||
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + i), oldState, STATE_MASK);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -208,10 +208,9 @@ pmaInitialize(PMA *pPma, NvU32 initFlags)
|
||||
//
|
||||
pMapInfo->pmaMapInit = pmaRegmapInit;
|
||||
pMapInfo->pmaMapDestroy = pmaRegmapDestroy;
|
||||
pMapInfo->pmaMapChangeState = pmaRegmapChangeState;
|
||||
pMapInfo->pmaMapChangeStateAttrib = pmaRegmapChangeStateAttrib;
|
||||
pMapInfo->pmaMapChangeStateAttribEx = pmaRegmapChangeStateAttribEx;
|
||||
pMapInfo->pmaMapChangePageStateAttrib = pmaRegmapChangePageStateAttrib;
|
||||
pMapInfo->pmaMapChangePageStateAttribEx = pmaRegmapChangePageStateAttribEx;
|
||||
pMapInfo->pmaMapChangeBlockStateAttrib = pmaRegmapChangeBlockStateAttrib;
|
||||
pMapInfo->pmaMapRead = pmaRegmapRead;
|
||||
pMapInfo->pmaMapScanContiguous = pmaRegmapScanContiguous;
|
||||
pMapInfo->pmaMapScanDiscontiguous = pmaRegmapScanDiscontiguous;
|
||||
@ -246,10 +245,9 @@ pmaInitialize(PMA *pPma, NvU32 initFlags)
|
||||
{
|
||||
pMapInfo->pmaMapInit = pmaAddrtreeInit;
|
||||
pMapInfo->pmaMapDestroy = pmaAddrtreeDestroy;
|
||||
pMapInfo->pmaMapChangeState = pmaAddrtreeChangeState;
|
||||
pMapInfo->pmaMapChangeStateAttrib = pmaAddrtreeChangeStateAttrib;
|
||||
pMapInfo->pmaMapChangeStateAttribEx = pmaAddrtreeChangeStateAttribEx;
|
||||
pMapInfo->pmaMapChangePageStateAttrib = pmaAddrtreeChangePageStateAttrib;
|
||||
pMapInfo->pmaMapChangePageStateAttribEx = pmaAddrtreeChangePageStateAttribEx;
|
||||
pMapInfo->pmaMapChangeBlockStateAttrib = pmaAddrtreeChangeBlockStateAttrib;
|
||||
pMapInfo->pmaMapRead = pmaAddrtreeRead;
|
||||
pMapInfo->pmaMapScanContiguous = pmaAddrtreeScanContiguous;
|
||||
pMapInfo->pmaMapScanDiscontiguous = pmaAddrtreeScanDiscontiguous;
|
||||
@ -1084,11 +1082,8 @@ pmaAllocatePages_retry:
|
||||
frameBase,
|
||||
frameBase + numFramesAllocated - 1);
|
||||
|
||||
for (i = 0; i < numPagesAllocatedSoFar; i++)
|
||||
{
|
||||
pPma->pMapInfo->pmaMapChangePageStateAttrib(pMap, frameBase + (i * framesPerPage),
|
||||
pageSize, pinOption, NV_TRUE);
|
||||
}
|
||||
pPma->pMapInfo->pmaMapChangeBlockStateAttrib(pMap, frameBase, numPagesAllocatedSoFar * framesPerPage,
|
||||
pinOption, MAP_MASK);
|
||||
|
||||
if (blacklistOffFlag && blacklistOffPerRegion[regId])
|
||||
{
|
||||
@ -1134,8 +1129,8 @@ pmaAllocatePages_retry:
|
||||
}
|
||||
lastFrameRangeEnd = frameBase + framesPerPage - 1;
|
||||
|
||||
pPma->pMapInfo->pmaMapChangePageStateAttrib(pMap, PMA_ADDR2FRAME(pPages[i], addrBase),
|
||||
pageSize, pinOption, NV_TRUE);
|
||||
pPma->pMapInfo->pmaMapChangePageStateAttribEx(pMap, PMA_ADDR2FRAME(pPages[i], addrBase),
|
||||
pageSize, pinOption, MAP_MASK);
|
||||
|
||||
}
|
||||
NV_PRINTF(LEVEL_INFO, "0x%llx through 0x%llx \n",
|
||||
@ -1267,7 +1262,7 @@ pmaPinPages
|
||||
}
|
||||
else
|
||||
{
|
||||
pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + j), STATE_PIN);
|
||||
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), STATE_PIN, STATE_MASK);
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -1320,7 +1315,7 @@ pmaUnpinPages
|
||||
}
|
||||
else
|
||||
{
|
||||
pPma->pMapInfo->pmaMapChangeState(pPma->pRegions[regId], (frameNum + j), STATE_UNPIN);
|
||||
pPma->pMapInfo->pmaMapChangeStateAttribEx(pPma->pRegions[regId], (frameNum + j), STATE_UNPIN, STATE_MASK);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -35,12 +35,13 @@
|
||||
#include "nvport/nvport.h"
|
||||
#include "nvmisc.h"
|
||||
|
||||
#define _UINT_SIZE 64
|
||||
#define _UINT_SHIFT 6
|
||||
#define FRAME_TO_U64_SHIFT 6
|
||||
#define FRAME_TO_U64_SIZE (1llu << FRAME_TO_U64_SHIFT)
|
||||
#define FRAME_TO_U64_MASK (FRAME_TO_U64_SIZE - 1llu)
|
||||
|
||||
#define PAGE_BITIDX(n) ((n) & (_UINT_SIZE - 1))
|
||||
#define PAGE_MAPIDX(n) ((n) >> _UINT_SHIFT)
|
||||
#define MAKE_BITMASK(n) ((NvU64)0x1 << (n))
|
||||
#define PAGE_BITIDX(n) ((n) & (FRAME_TO_U64_SIZE - 1llu))
|
||||
#define PAGE_MAPIDX(n) ((n) >> FRAME_TO_U64_SHIFT)
|
||||
#define MAKE_BITMASK(n) (1llu << (n))
|
||||
|
||||
#define SETBITS(bits, mask, newVal) ((bits & (~mask)) | (mask & newVal))
|
||||
|
||||
@ -105,12 +106,12 @@ _checkOne(NvU64 *bits, NvU64 start, NvU64 end)
|
||||
if (bits[mapIdx] != 0)
|
||||
{
|
||||
firstSetBit = portUtilCountTrailingZeros64(bits[mapIdx]);
|
||||
return ((mapIdx << _UINT_SHIFT) + firstSetBit);
|
||||
return ((mapIdx << FRAME_TO_U64_SHIFT) + firstSetBit);
|
||||
}
|
||||
}
|
||||
|
||||
// handle edge case
|
||||
endMask = (NV_U64_MAX >> (_UINT_SIZE - endBitIdx - 1));
|
||||
endMask = (NV_U64_MAX >> (FRAME_TO_U64_SIZE - endBitIdx - 1));
|
||||
|
||||
if ((bits[endMapIdx] & endMask) == 0)
|
||||
{
|
||||
@ -140,7 +141,7 @@ _checkOne(NvU64 *bits, NvU64 start, NvU64 end)
|
||||
NV_ASSERT(startMapIdx == endMapIdx);
|
||||
|
||||
startMask = (NV_U64_MAX << startBitIdx);
|
||||
endMask = (NV_U64_MAX >> (_UINT_SIZE - endBitIdx - 1));
|
||||
endMask = (NV_U64_MAX >> (FRAME_TO_U64_SIZE - endBitIdx - 1));
|
||||
|
||||
handle = (startMask & endMask);
|
||||
if ((handle & bits[startMapIdx]) == 0)
|
||||
@ -159,43 +160,11 @@ _checkOne(NvU64 *bits, NvU64 start, NvU64 end)
|
||||
|
||||
static NvU64 alignUpToMod(NvU64 frame, NvU64 alignment, NvU64 mod)
|
||||
{
|
||||
if ((frame & (alignment - 1)) <= mod)
|
||||
return NV_ALIGN_DOWN(frame, alignment) + mod;
|
||||
else
|
||||
return NV_ALIGN_UP(frame, alignment) + mod;
|
||||
return ((frame - mod + alignment - 1ll) & ~(alignment - 1ll)) + mod;
|
||||
}
|
||||
|
||||
//
|
||||
// Determine if all frames in the 2MB range is not allocated
|
||||
// They could be in scrubbing or eviction state.
|
||||
//
|
||||
static NvBool _pmaRegmapAllFree2mb(PMA_REGMAP *pRegmap, NvU64 frameNum)
|
||||
static NvU64 alignDownToMod(NvU64 frame, NvU64 alignment, NvU64 mod)
|
||||
{
|
||||
NvU64 baseFrame = (NV_ALIGN_DOWN((frameNum << PMA_PAGE_SHIFT), _PMA_2MB)) >> PMA_PAGE_SHIFT;
|
||||
NvU32 numFrames = _PMA_2MB >> PMA_PAGE_SHIFT;
|
||||
|
||||
// Always return false if the last 2MB range is incomplete
|
||||
if ((baseFrame + numFrames) >= pRegmap->totalFrames)
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
//
|
||||
// We only care about STATE_PIN and STATE_UNPIN because:
|
||||
// Even if the page is marked as SCRUBBING for example, we should not report OOM and prevent
|
||||
// the clients from scanning the bitmap.
|
||||
//
|
||||
if (_checkOne(pRegmap->map[MAP_IDX_ALLOC_PIN], baseFrame, (baseFrame + numFrames - 1)) != -1)
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
if (_checkOne(pRegmap->map[MAP_IDX_ALLOC_UNPIN], baseFrame, (baseFrame + numFrames - 1)) != -1)
|
||||
{
|
||||
return NV_FALSE;
|
||||
}
|
||||
|
||||
return NV_TRUE;
|
||||
return ((frame - mod) & ~(alignment - 1ll)) + mod;
|
||||
}
|
||||
|
||||
//
|
||||
@ -235,7 +204,7 @@ _pmaRegmapScanNumaUnevictable
|
||||
|
||||
if (mapIter == endMapIdx)
|
||||
{
|
||||
mask = (mask >> (_UINT_SIZE - endBitIdx - 1));
|
||||
mask = (mask >> (FRAME_TO_U64_SIZE - endBitIdx - 1));
|
||||
}
|
||||
|
||||
if (mapIter == startMapIdx)
|
||||
@ -246,7 +215,7 @@ _pmaRegmapScanNumaUnevictable
|
||||
#ifdef DEBUG_VERBOSE
|
||||
|
||||
NV_PRINTF(LEVEL_INFO, "mapIter %llx frame %llx mask %llx unpinbitmap %llx pinbitmap %llx evictbitmap %llx",
|
||||
mapIter, (mapIter << _UINT_SHIFT), mask, unpinBitmap[mapIter], pinBitmap[mapIter], evictBitmap[mapIter]);
|
||||
mapIter, (mapIter << FRAME_TO_U64_SHIFT), mask, unpinBitmap[mapIter], pinBitmap[mapIter], evictBitmap[mapIter]);
|
||||
#endif
|
||||
// start from the end
|
||||
if ((unpinBitmap[mapIter] & mask) == mask)
|
||||
@ -259,15 +228,15 @@ _pmaRegmapScanNumaUnevictable
|
||||
if (mapIter == endMapIdx)
|
||||
unevictableFrameIndex = frameEnd;
|
||||
else
|
||||
unevictableFrameIndex = (mapIter << _UINT_SHIFT) + (_UINT_SIZE - 1);
|
||||
unevictableFrameIndex = (mapIter << FRAME_TO_U64_SHIFT) + (FRAME_TO_U64_SIZE - 1);
|
||||
break;
|
||||
}
|
||||
#ifdef DEBUG_VERBOSE
|
||||
NV_PRINTF(LEVEL_INFO, "Check leading zero of %llx", ~(unpinBitmap[mapIter] & mask));
|
||||
#endif
|
||||
|
||||
unevictableIndex = _UINT_SIZE - portUtilCountLeadingZeros64((~unpinBitmap[mapIter]) & mask) - 1;
|
||||
unevictableFrameIndex = (mapIter << _UINT_SHIFT) + unevictableIndex;
|
||||
unevictableIndex = FRAME_TO_U64_SIZE - portUtilCountLeadingZeros64((~unpinBitmap[mapIter]) & mask) - 1;
|
||||
unevictableFrameIndex = (mapIter << FRAME_TO_U64_SHIFT) + unevictableIndex;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -384,119 +353,6 @@ pmaRegMapScanContiguousNumaEviction
|
||||
|
||||
return status;
|
||||
}
|
||||
//
|
||||
// Check whether the specified frame range is available for allocation or
|
||||
// eviction.
|
||||
//
|
||||
// Returns:
|
||||
// - NV_OK if the whole range is available and leaves frameIndex unset.
|
||||
//
|
||||
// - NV_ERR_IN_USE if some frames would need to be evicted, and sets frameIndex
|
||||
// to the first one.
|
||||
//
|
||||
// - NV_ERR_NO_MEMORY if some frames are unavailable, and sets frameIndex to
|
||||
// the first one.
|
||||
//
|
||||
// TODO: Would it be better to return the last frame index instead, given how the
|
||||
// search skips over right past it?
|
||||
//
|
||||
static NV_STATUS
|
||||
_pmaRegmapStatus(PMA_REGMAP *pRegmap, NvU64 start, NvU64 end, NvU64 *frameIndex)
|
||||
{
|
||||
NvS64 diff;
|
||||
|
||||
if ((diff = _checkOne(pRegmap->map[MAP_IDX_ALLOC_PIN], start, end)) != -1)
|
||||
{
|
||||
*frameIndex = diff;
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
if (pRegmap->frameEvictionsInProcess > 0)
|
||||
{
|
||||
//
|
||||
// Pages that are being evicted may be in the free state so we need to
|
||||
// check for eviction on all frames as long as any eviction is happening
|
||||
// in the region.
|
||||
//
|
||||
if ((diff = _checkOne(pRegmap->map[MAP_IDX_EVICTING], start, end)) != -1)
|
||||
{
|
||||
*frameIndex = diff;
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
// Check SCRUBBING
|
||||
// TODO: Skip this check if scrubbing has been completed for all frames.
|
||||
//
|
||||
if ((diff = _checkOne(pRegmap->map[MAP_IDX_SCRUBBING], start, end)) != -1)
|
||||
{
|
||||
*frameIndex = diff;
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
if ((diff = _checkOne(pRegmap->map[MAP_IDX_NUMA_REUSE], start, end)) != -1)
|
||||
{
|
||||
*frameIndex = diff;
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
if ((diff = _checkOne(pRegmap->map[MAP_IDX_ALLOC_UNPIN], start, end)) != -1)
|
||||
{
|
||||
*frameIndex = diff;
|
||||
return NV_ERR_IN_USE;
|
||||
}
|
||||
|
||||
if ((diff = _checkOne(pRegmap->map[MAP_IDX_BLACKLIST], start, end)) != -1)
|
||||
{
|
||||
*frameIndex = diff;
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
//
|
||||
// Return ALL_FREE if all frames in the [start, end] range are available for
|
||||
// allocation or the first frame index that isn't.
|
||||
//
|
||||
static NvS64
|
||||
_pmaRegmapAvailable(PMA_REGMAP *pRegmap, NvU64 start, NvU64 end)
|
||||
{
|
||||
NvU64 unavailableFrameIndex;
|
||||
NV_STATUS frameStatus = _pmaRegmapStatus(pRegmap, start, end, &unavailableFrameIndex);
|
||||
|
||||
if (frameStatus == NV_OK)
|
||||
return ALL_FREE;
|
||||
|
||||
NV_ASSERT(unavailableFrameIndex >= start);
|
||||
NV_ASSERT(unavailableFrameIndex <= end);
|
||||
|
||||
return unavailableFrameIndex;
|
||||
}
|
||||
|
||||
//
|
||||
// Return ALL_FREE if all frames in the [start, end] range are available for
|
||||
// allocation, EVICTABLE if some of them would need to be evicted, or the first
|
||||
// frame index that isn't free nor evictable.
|
||||
//
|
||||
static NvS64
|
||||
_pmaRegmapEvictable(PMA_REGMAP *pRegmap, NvU64 start, NvU64 end)
|
||||
{
|
||||
NvU64 unavailableFrameIndex;
|
||||
NvS64 frameStatus = _pmaRegmapStatus(pRegmap, start, end, &unavailableFrameIndex);
|
||||
|
||||
if (frameStatus == NV_OK)
|
||||
return ALL_FREE;
|
||||
|
||||
NV_ASSERT(unavailableFrameIndex >= start);
|
||||
NV_ASSERT(unavailableFrameIndex <= end);
|
||||
|
||||
if (frameStatus == NV_ERR_IN_USE)
|
||||
return EVICTABLE;
|
||||
|
||||
return unavailableFrameIndex;
|
||||
}
|
||||
|
||||
void *
|
||||
pmaRegmapInit
|
||||
@ -552,6 +408,16 @@ pmaRegmapInit
|
||||
}
|
||||
portMemSet(newMap->map[i], 0, (NvLength) (newMap->mapLength * sizeof(NvU64)));
|
||||
}
|
||||
{
|
||||
//
|
||||
// Simplify logic for 2M tracking. Set the last few nonaligned bits as pinned
|
||||
// so that the XOR logic for delta 2M tracking is never true for an incomplete final page
|
||||
//
|
||||
NvU64 endOffs = (numFrames - 1llu) >> FRAME_TO_U64_SHIFT;
|
||||
NvU64 endBit = (numFrames - 1llu) & FRAME_TO_U64_MASK;
|
||||
NvU64 endMask = endBit == FRAME_TO_U64_MASK ? 0llu : ~(NV_U64_MAX >> (FRAME_TO_U64_MASK - endBit));
|
||||
newMap->map[MAP_IDX_ALLOC_PIN][endOffs] |= endMask;
|
||||
}
|
||||
|
||||
return (void *)newMap;
|
||||
}
|
||||
@ -596,7 +462,6 @@ pmaRegmapDestroy(void *pMap)
|
||||
// Masks:
|
||||
// STATE_MASK, ATTRIB_MASK
|
||||
//
|
||||
|
||||
void
|
||||
pmaRegmapChangeStateAttribEx
|
||||
(
|
||||
@ -606,175 +471,178 @@ pmaRegmapChangeStateAttribEx
|
||||
PMA_PAGESTATUS newStateMask
|
||||
)
|
||||
{
|
||||
NvU64 mapIndex, mapOffset, bits, newVal, mask;
|
||||
NvU32 i, bitWriteCount;
|
||||
PMA_PAGESTATUS oldState, updatedState;
|
||||
NvBool bUpdate2mbTracking = NV_FALSE;
|
||||
PMA_REGMAP *pRegmap = (PMA_REGMAP *)pMap;
|
||||
|
||||
mapIndex = PAGE_MAPIDX(frameNum);
|
||||
mapOffset = PAGE_BITIDX(frameNum);
|
||||
|
||||
NV_ASSERT(pRegmap != NULL); // possible error code return
|
||||
NV_ASSERT(mapIndex < pRegmap->mapLength);
|
||||
|
||||
bitWriteCount = PMA_STATE_BITS_PER_PAGE + PMA_ATTRIB_BITS_PER_PAGE;
|
||||
|
||||
mask = (NvU64)MAKE_BITMASK(mapOffset);
|
||||
|
||||
oldState = pmaRegmapRead(pRegmap, frameNum, NV_TRUE);
|
||||
|
||||
//
|
||||
// If we are going to allocate the 2MB page, we need bookkeeping
|
||||
// before the bitmap is changed
|
||||
//
|
||||
if (((newState & STATE_MASK) != STATE_FREE) && _pmaRegmapAllFree2mb(pRegmap, frameNum))
|
||||
{
|
||||
bUpdate2mbTracking = NV_TRUE;
|
||||
}
|
||||
|
||||
for (i = 0; i < bitWriteCount; i++)
|
||||
{
|
||||
if (NVBIT(i) & newStateMask)
|
||||
{
|
||||
newVal = ((NvU64) (newState & (1 << i)) >> i) << mapOffset;
|
||||
bits = pRegmap->map[i][mapIndex];
|
||||
pRegmap->map[i][mapIndex] = (NvU64) SETBITS(bits, mask, newVal);
|
||||
}
|
||||
}
|
||||
|
||||
// Update some stats for optimization
|
||||
updatedState = pmaRegmapRead(pRegmap, frameNum, NV_TRUE);
|
||||
|
||||
pmaStatsUpdateState(&pRegmap->pPmaStats->numFreeFrames, 1,
|
||||
oldState, updatedState);
|
||||
|
||||
if (pRegmap->bProtected)
|
||||
{
|
||||
pmaStatsUpdateState(&pRegmap->pPmaStats->numFreeFramesProtected, 1,
|
||||
oldState, updatedState);
|
||||
}
|
||||
|
||||
//
|
||||
// If we are freeing a frame, we should check if we need to update the 2MB
|
||||
// page tracking
|
||||
//
|
||||
if (bUpdate2mbTracking ||
|
||||
(((oldState & STATE_MASK) != STATE_FREE) && _pmaRegmapAllFree2mb(pRegmap, frameNum)))
|
||||
{
|
||||
pmaStatsUpdateState(&pRegmap->pPmaStats->numFree2mbPages, 1,
|
||||
oldState, updatedState);
|
||||
|
||||
if (pRegmap->bProtected)
|
||||
{
|
||||
pmaStatsUpdateState(&pRegmap->pPmaStats->numFree2mbPagesProtected, 1,
|
||||
oldState, updatedState);
|
||||
}
|
||||
}
|
||||
pmaRegmapChangeBlockStateAttrib(pMap, frameNum, 1, newState, newStateMask);
|
||||
}
|
||||
|
||||
void
|
||||
pmaRegmapChangeStateAttrib
|
||||
(
|
||||
void *pMap,
|
||||
NvU64 frameNum,
|
||||
PMA_PAGESTATUS newState,
|
||||
NvBool writeAttrib
|
||||
)
|
||||
{
|
||||
NvU64 mapIndex, mapOffset, bits, newVal, mask;
|
||||
NvU32 i;
|
||||
NvU32 bitWriteCount;
|
||||
PMA_PAGESTATUS oldState;
|
||||
NvBool bUpdate2mbTracking = NV_FALSE;
|
||||
PMA_REGMAP *pRegmap = (PMA_REGMAP *)pMap;
|
||||
|
||||
mapIndex = PAGE_MAPIDX(frameNum);
|
||||
mapOffset = PAGE_BITIDX(frameNum);
|
||||
|
||||
NV_ASSERT(pRegmap != NULL); // possible error code return
|
||||
NV_ASSERT(mapIndex < pRegmap->mapLength);
|
||||
|
||||
bitWriteCount = (writeAttrib ?
|
||||
(PMA_STATE_BITS_PER_PAGE + PMA_ATTRIB_BITS_PER_PAGE) :
|
||||
PMA_STATE_BITS_PER_PAGE);
|
||||
|
||||
mask = (NvU64)MAKE_BITMASK(mapOffset);
|
||||
|
||||
oldState = pmaRegmapRead(pRegmap, frameNum, NV_TRUE);
|
||||
|
||||
//
|
||||
// If we are going to allocate the 2MB page, we need bookkeeping
|
||||
// before the bitmap is changed
|
||||
//
|
||||
if (((newState & STATE_MASK) != STATE_FREE) && _pmaRegmapAllFree2mb(pRegmap, frameNum))
|
||||
{
|
||||
bUpdate2mbTracking = NV_TRUE;
|
||||
}
|
||||
|
||||
for (i = 0; i < bitWriteCount; i++)
|
||||
{
|
||||
newVal = ((NvU64) (newState & (1 << i)) >> i) << mapOffset;
|
||||
bits = pRegmap->map[i][mapIndex];
|
||||
pRegmap->map[i][mapIndex] = (NvU64) SETBITS(bits, mask, newVal);
|
||||
}
|
||||
|
||||
NV_ASSERT(pmaRegmapRead(pRegmap, frameNum, writeAttrib) == newState);
|
||||
|
||||
// Update some stats for optimization
|
||||
pmaStatsUpdateState(&pRegmap->pPmaStats->numFreeFrames, 1,
|
||||
oldState, newState);
|
||||
|
||||
if (pRegmap->bProtected)
|
||||
{
|
||||
pmaStatsUpdateState(&pRegmap->pPmaStats->numFreeFramesProtected, 1,
|
||||
oldState, newState);
|
||||
}
|
||||
|
||||
//
|
||||
// If we are freeing a frame, we should check if we need to update the 2MB
|
||||
// page tracking
|
||||
//
|
||||
if (bUpdate2mbTracking ||
|
||||
(((oldState & STATE_MASK) != STATE_FREE) && _pmaRegmapAllFree2mb(pRegmap, frameNum)))
|
||||
{
|
||||
pmaStatsUpdateState(&pRegmap->pPmaStats->numFree2mbPages, 1,
|
||||
oldState, newState);
|
||||
|
||||
if (pRegmap->bProtected)
|
||||
{
|
||||
pmaStatsUpdateState(&pRegmap->pPmaStats->numFree2mbPagesProtected, 1,
|
||||
oldState, newState);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
pmaRegmapChangeState(void *pMap, NvU64 frameNum, PMA_PAGESTATUS newState)
|
||||
{
|
||||
NV_ASSERT(newState <= STATE_PIN);
|
||||
// Write state bits, but not attrib bits
|
||||
pmaRegmapChangeStateAttrib((PMA_REGMAP *)pMap, frameNum, newState, NV_FALSE);
|
||||
}
|
||||
|
||||
void
|
||||
pmaRegmapChangePageStateAttrib
|
||||
pmaRegmapChangePageStateAttribEx
|
||||
(
|
||||
void * pMap,
|
||||
NvU64 startFrame,
|
||||
NvU64 pageSize,
|
||||
PMA_PAGESTATUS newState,
|
||||
NvBool writeAttrib
|
||||
PMA_PAGESTATUS newStateMask
|
||||
)
|
||||
{
|
||||
NvU32 framesPerPage = (NvU32)(pageSize >> PMA_PAGE_SHIFT);
|
||||
NvU64 frame;
|
||||
for (frame = startFrame; frame < startFrame + framesPerPage; frame++)
|
||||
{
|
||||
pmaRegmapChangeStateAttrib((PMA_REGMAP *)pMap, frame, newState, writeAttrib);
|
||||
}
|
||||
pmaRegmapChangeBlockStateAttrib(pMap, startFrame, pageSize / _PMA_64KB, newState, newStateMask);
|
||||
}
|
||||
|
||||
|
||||
static NV_FORCEINLINE
|
||||
void
|
||||
_pmaRegmapDoSingleStateChange
|
||||
(
|
||||
PMA_REGMAP *pRegmap,
|
||||
NvU64 idx,
|
||||
NvU32 newState,
|
||||
NvU32 writeMask,
|
||||
NvU64 bitMask,
|
||||
NvU64 *delta2m,
|
||||
NvU64 *delta64k
|
||||
)
|
||||
{
|
||||
// get bits from map
|
||||
NvU64 pinIn = pRegmap->map[MAP_IDX_ALLOC_PIN][idx];
|
||||
NvU64 unpinIn = pRegmap->map[MAP_IDX_ALLOC_UNPIN][idx];
|
||||
// Or of state for delta-tracking purposes
|
||||
NvU64 initialState = pinIn | unpinIn;
|
||||
// Mask out bits that are being upda
|
||||
NvU64 maskedPin = pinIn & ~bitMask;
|
||||
NvU64 maskedUnpin = unpinIn & ~bitMask;
|
||||
// Update bits in new with bitMask
|
||||
NvU64 pinRes = ((newState & (1llu << MAP_IDX_ALLOC_PIN)) ? bitMask : 0llu);
|
||||
NvU64 unpinRes = ((newState & (1llu << MAP_IDX_ALLOC_UNPIN)) ? bitMask : 0llu);
|
||||
// Output state based on whether writeMask is set
|
||||
NvU64 pinOut = (writeMask & (1llu << MAP_IDX_ALLOC_PIN)) ? (maskedPin | pinRes) : pinIn;
|
||||
NvU64 unpinOut = (writeMask & (1llu << MAP_IDX_ALLOC_UNPIN)) ? (maskedUnpin | unpinRes) : unpinIn;
|
||||
// Or of final state for delta-tracking purposes
|
||||
NvU64 finalState = pinOut | unpinOut;
|
||||
NvU64 xored = initialState ^ finalState;
|
||||
|
||||
// Write out new bits
|
||||
pRegmap->map[MAP_IDX_ALLOC_PIN][idx] = pinOut;
|
||||
pRegmap->map[MAP_IDX_ALLOC_UNPIN][idx] = unpinOut;
|
||||
|
||||
// Update deltas
|
||||
(*delta64k) += nvPopCount64(xored);
|
||||
// Each 2M page is 32 64K pages, so we check each half of a 64-bit qword and xor them
|
||||
(*delta2m) += ((((NvU32)finalState) == 0) != (((NvU32)initialState) == 0)) +
|
||||
((((NvU32)(finalState >> 32)) == 0) != (((NvU32)(initialState >> 32)) == 0));
|
||||
}
|
||||
|
||||
void
|
||||
pmaRegmapChangeBlockStateAttrib
|
||||
(
|
||||
void *pMap,
|
||||
NvU64 frame,
|
||||
NvU64 len,
|
||||
PMA_PAGESTATUS newState,
|
||||
PMA_PAGESTATUS writeMask
|
||||
)
|
||||
{
|
||||
NvU64 initialIdx = PAGE_MAPIDX(frame);
|
||||
NvU64 finalIdx = PAGE_MAPIDX(frame + len - 1llu);
|
||||
NvU64 initialOffs = PAGE_BITIDX(frame);
|
||||
NvU64 finalOffs = PAGE_BITIDX(frame + len - 1llu);
|
||||
NvU64 initialMask = NV_U64_MAX << initialOffs;
|
||||
NvU64 finalMask = NV_U64_MAX >> (FRAME_TO_U64_MASK - finalOffs);
|
||||
PMA_REGMAP *pRegmap = (PMA_REGMAP *)pMap;
|
||||
NvU64 i;
|
||||
NvU64 delta2m = 0, delta64k = 0;
|
||||
|
||||
NV_ASSERT(pRegmap != NULL);
|
||||
NV_ASSERT(frame + len <= pRegmap->totalFrames);
|
||||
|
||||
// Update non-state attributes first in a tight loop.
|
||||
for (i = PMA_STATE_BITS_PER_PAGE; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
NvU64 j;
|
||||
NvU64 toWrite = (newState & (1u << i)) ? NV_U64_MAX : 0llu;
|
||||
if (!((1u << i) & writeMask))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
if (initialIdx == finalIdx)
|
||||
{
|
||||
pRegmap->map[i][initialIdx] &= ~(initialMask & finalMask);
|
||||
pRegmap->map[i][initialIdx] |= toWrite & (initialMask & finalMask);
|
||||
continue;
|
||||
}
|
||||
|
||||
pRegmap->map[i][initialIdx] &= ~initialMask;
|
||||
pRegmap->map[i][initialIdx] |= toWrite & initialMask;
|
||||
|
||||
for (j = initialIdx + 1; j < finalIdx; j++)
|
||||
{
|
||||
pRegmap->map[i][j] = toWrite;
|
||||
|
||||
}
|
||||
pRegmap->map[i][finalIdx] &= ~finalMask;
|
||||
pRegmap->map[i][finalIdx] |= toWrite & finalMask;
|
||||
|
||||
}
|
||||
|
||||
if (!(writeMask & STATE_MASK))
|
||||
{
|
||||
return;
|
||||
}
|
||||
|
||||
// Entire state is in one NvU64, so exit immediately after
|
||||
if (initialIdx == finalIdx)
|
||||
{
|
||||
_pmaRegmapDoSingleStateChange(pRegmap, initialIdx, newState, writeMask, initialMask & finalMask, &delta2m, &delta64k);
|
||||
goto set_regs;
|
||||
}
|
||||
|
||||
// Checks for 64-aligned start/end so we don't have to deal with partial coverage in the main loop
|
||||
if (initialOffs != 0)
|
||||
{
|
||||
// Do first state update with partial NvU64 coverage
|
||||
_pmaRegmapDoSingleStateChange(pRegmap, initialIdx, newState, writeMask, initialMask, &delta2m, &delta64k);
|
||||
initialIdx++;
|
||||
}
|
||||
if (finalOffs != FRAME_TO_U64_MASK)
|
||||
{
|
||||
// Update last partial NvU64
|
||||
_pmaRegmapDoSingleStateChange(pRegmap, finalIdx, newState, writeMask, finalMask, &delta2m, &delta64k);
|
||||
finalIdx--;
|
||||
}
|
||||
|
||||
// Update all full-size
|
||||
for (i = initialIdx; i <= finalIdx; i++)
|
||||
{
|
||||
_pmaRegmapDoSingleStateChange(pRegmap, i, newState, writeMask, NV_U64_MAX, &delta2m, &delta64k);
|
||||
}
|
||||
|
||||
set_regs:
|
||||
if ((newState & writeMask & STATE_MASK) != 0)
|
||||
{
|
||||
pRegmap->pPmaStats->numFreeFrames -= delta64k;
|
||||
pRegmap->pPmaStats->numFree2mbPages -= delta2m;
|
||||
}
|
||||
else
|
||||
{
|
||||
pRegmap->pPmaStats->numFreeFrames += delta64k;
|
||||
pRegmap->pPmaStats->numFree2mbPages += delta2m;
|
||||
}
|
||||
if (!pRegmap->bProtected)
|
||||
{
|
||||
return;
|
||||
}
|
||||
if ((writeMask & newState & STATE_MASK) != 0)
|
||||
{
|
||||
pRegmap->pPmaStats->numFreeFramesProtected -= delta64k;
|
||||
pRegmap->pPmaStats->numFree2mbPagesProtected -= delta2m;
|
||||
}
|
||||
else
|
||||
{
|
||||
pRegmap->pPmaStats->numFreeFramesProtected += delta64k;
|
||||
pRegmap->pPmaStats->numFree2mbPagesProtected += delta2m;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
PMA_PAGESTATUS
|
||||
pmaRegmapRead(void *pMap, NvU64 frameNum, NvBool readAttrib)
|
||||
{
|
||||
@ -811,71 +679,92 @@ static NvS64 _scanContiguousSearchLoop
|
||||
NvBool bSearchEvictable
|
||||
)
|
||||
{
|
||||
NvU64 freeStart;
|
||||
PMA_PAGESTATUS startStatus, endStatus, state;
|
||||
NvS64 checkDiff;
|
||||
NvS64 (*useFunc)(PMA_REGMAP *, NvU64, NvU64);
|
||||
|
||||
if (!bSearchEvictable)
|
||||
NvU64 frameBaseIdx = alignUpToMod(localStart, frameAlignment, frameAlignmentPadding);
|
||||
//
|
||||
// latestFree stores the highest '0' seen in the given map array in the current run
|
||||
// ie we have the needed pages if frameBaseIdx + numPages == latestFree. Initialize to first aligned frame
|
||||
//
|
||||
NvU64 latestFree[PMA_BITS_PER_PAGE];
|
||||
NvU64 i;
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
// Look for available frames
|
||||
state = STATE_FREE;
|
||||
checkDiff = ALL_FREE;
|
||||
useFunc = _pmaRegmapAvailable;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Look for evictable frames
|
||||
state = STATE_UNPIN;
|
||||
checkDiff = EVICTABLE;
|
||||
useFunc = _pmaRegmapEvictable;
|
||||
latestFree[i] = frameBaseIdx;
|
||||
}
|
||||
|
||||
freeStart = localStart;
|
||||
while ((freeStart + numFrames - 1) <= localEnd)
|
||||
loop_begin:
|
||||
//
|
||||
// Always start a loop iteration with an updated frameBaseIdx by ensuring that latestFree is always >= frameBaseIdx
|
||||
// frameBaseIdx == latestFree[i] means that there are no observed 0s so far in the current run
|
||||
//
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
startStatus = pmaRegmapRead(pRegmap, freeStart, NV_TRUE);
|
||||
endStatus = pmaRegmapRead(pRegmap, (freeStart + numFrames - 1), NV_TRUE);
|
||||
|
||||
if (endStatus == STATE_FREE || endStatus == state)
|
||||
if (latestFree[i] < frameBaseIdx)
|
||||
{
|
||||
if (startStatus == STATE_FREE || startStatus == state)
|
||||
{
|
||||
NvS64 diff = (*useFunc)(pRegmap, freeStart, (freeStart + numFrames - 1));
|
||||
if (diff == checkDiff)
|
||||
{
|
||||
return (NvS64)freeStart;
|
||||
}
|
||||
else
|
||||
{
|
||||
//
|
||||
// Find the next aligned free frame and set it as the start
|
||||
// frame for next iteration's scan.
|
||||
//
|
||||
NV_ASSERT(diff >= 0);
|
||||
|
||||
freeStart = alignUpToMod(diff + 1, frameAlignment, frameAlignmentPadding);
|
||||
|
||||
NV_ASSERT(freeStart != 0);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Start point isn't free, so bump to check the next aligned frame
|
||||
freeStart += frameAlignment;
|
||||
}
|
||||
latestFree[i] = frameBaseIdx;
|
||||
}
|
||||
else
|
||||
}
|
||||
// At the end of memory, pages not available
|
||||
if ((frameBaseIdx + numFrames - 1llu) > localEnd)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
// TODO, merge logic so we don't need multiple calls for unpin
|
||||
if (i == MAP_IDX_ALLOC_UNPIN && bSearchEvictable)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
while (latestFree[i] < (frameBaseIdx + numFrames))
|
||||
{
|
||||
//
|
||||
// End point isn't usable, so jump to after the end to check again
|
||||
// However, align the new start point properly before next iteration.
|
||||
// All this logic looks complicated, but essentially all it is doing is getting the NvU64 from
|
||||
// the correct index in the array and shifting and masking so that the first bit is latestFree[i].
|
||||
// endOffs is set then to the length of the run of zeros at the beginning
|
||||
//
|
||||
freeStart += NV_ALIGN_UP(numFrames, frameAlignment);
|
||||
NvU64 curMapIdx = PAGE_MAPIDX(latestFree[i]);
|
||||
NvU64 beginOffs = PAGE_BITIDX(latestFree[i]);
|
||||
NvU64 mask = beginOffs == 0 ? 0 : NV_U64_MAX << (FRAME_TO_U64_SIZE - beginOffs);
|
||||
NvU64 curWithOffs = (pRegmap->map[i][curMapIdx] >> beginOffs) | mask;
|
||||
NvU64 endOffs = portUtilCountTrailingZeros64(curWithOffs);
|
||||
//
|
||||
// If no more are free, we have not hit the needed number of pages. Following loop finds
|
||||
// the next free page
|
||||
//
|
||||
if (endOffs == 0)
|
||||
{
|
||||
mask = beginOffs == 0 ? 0 : NV_U64_MAX >> (FRAME_TO_U64_SIZE - beginOffs);
|
||||
NvU64 curMap = pRegmap->map[i][curMapIdx] | mask;
|
||||
frameBaseIdx = latestFree[i] - beginOffs;
|
||||
if (curMap != NV_U64_MAX)
|
||||
{
|
||||
goto free_found;
|
||||
}
|
||||
curMapIdx++;
|
||||
frameBaseIdx += FRAME_TO_U64_SIZE;
|
||||
while (frameBaseIdx <= localEnd)
|
||||
{
|
||||
curMap = pRegmap->map[i][curMapIdx];
|
||||
if(curMap != NV_U64_MAX)
|
||||
{
|
||||
goto free_found;
|
||||
}
|
||||
frameBaseIdx += FRAME_TO_U64_SIZE;
|
||||
curMapIdx++;
|
||||
}
|
||||
// No more free pages, exit
|
||||
return -1;
|
||||
free_found:
|
||||
// Found a free page, set frameBaseIdx and go back to the beginning of the loop
|
||||
frameBaseIdx += portUtilCountTrailingZeros64(~curMap);
|
||||
frameBaseIdx = alignUpToMod(frameBaseIdx, frameAlignment, frameAlignmentPadding);
|
||||
goto loop_begin;
|
||||
}
|
||||
latestFree[i] += endOffs;
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
return frameBaseIdx;
|
||||
}
|
||||
|
||||
static NvS64 _scanContiguousSearchLoopReverse
|
||||
@ -889,70 +778,405 @@ static NvS64 _scanContiguousSearchLoopReverse
|
||||
NvBool bSearchEvictable
|
||||
)
|
||||
{
|
||||
NvU64 freeStart;
|
||||
PMA_PAGESTATUS startStatus, endStatus, state;
|
||||
NvS64 checkDiff;
|
||||
NvS64 (*useFunc)(PMA_REGMAP *, NvU64, NvU64);
|
||||
|
||||
if (!bSearchEvictable)
|
||||
NvU64 realAlign = (frameAlignmentPadding + numFrames) & (frameAlignment - 1ll);
|
||||
NvU64 frameBaseIdx = alignDownToMod(localEnd + 1llu, frameAlignment, realAlign);
|
||||
//
|
||||
// latestFree stores the lowest '0' seen in the given map array in the current run
|
||||
// ie we have the needed pages if frameBaseIdx - numPages == latestFree. Initialize to last aligned frame
|
||||
//
|
||||
NvU64 latestFree[PMA_BITS_PER_PAGE];
|
||||
NvU64 i;
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
// Look for available frames
|
||||
state = STATE_FREE;
|
||||
checkDiff = ALL_FREE;
|
||||
useFunc = _pmaRegmapAvailable;
|
||||
latestFree[i] = frameBaseIdx;
|
||||
}
|
||||
else
|
||||
loop_begin:
|
||||
//
|
||||
// Always start a loop iteration with an updated frameBaseIdx by ensuring that latestFree is always <= frameBaseIdx
|
||||
// frameBaseIdx == latestFree[i] means that there are no observed 0s so far in the current run
|
||||
//
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
// Look for evictable frames
|
||||
state = STATE_UNPIN;
|
||||
checkDiff = EVICTABLE;
|
||||
useFunc = _pmaRegmapEvictable;
|
||||
}
|
||||
|
||||
// First frame from end able to accommodate num_frames allocation.
|
||||
freeStart = localEnd + 1 - numFrames;
|
||||
freeStart -= (freeStart - localStart) % frameAlignment;
|
||||
|
||||
while (freeStart >= localStart && (NvS64)freeStart >= 0)
|
||||
{
|
||||
startStatus = pmaRegmapRead(pRegmap, freeStart, NV_TRUE);
|
||||
endStatus = pmaRegmapRead(pRegmap, (freeStart + numFrames - 1), NV_TRUE);
|
||||
|
||||
if (startStatus == STATE_FREE || startStatus == state)
|
||||
if (latestFree[i] > frameBaseIdx)
|
||||
{
|
||||
if (endStatus == STATE_FREE || endStatus == state)
|
||||
latestFree[i] = frameBaseIdx;
|
||||
}
|
||||
}
|
||||
// At the beginning of memory, pages not available
|
||||
if ((localStart + numFrames) > frameBaseIdx)
|
||||
{
|
||||
return -1;
|
||||
}
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
// TODO, merge logic so we don't need multiple calls for unpin
|
||||
if (i == MAP_IDX_ALLOC_UNPIN && bSearchEvictable)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
while (latestFree[i] > (frameBaseIdx - numFrames))
|
||||
{
|
||||
//
|
||||
// All this logic looks complicated, but essentially all it is doing is getting the NvU64 from
|
||||
// the correct index in the array and shifting and masking so that the last bit is latestFree[i].
|
||||
// endOffs is set then to the length of the run of zeros at the end
|
||||
//
|
||||
NvU64 curId = latestFree[i] - 1llu;
|
||||
NvU64 curMapIdx = PAGE_MAPIDX(curId);
|
||||
NvU64 beginOffs = PAGE_BITIDX(curId);
|
||||
NvU64 mask = beginOffs == FRAME_TO_U64_MASK ? 0 : NV_U64_MAX >> (1llu + beginOffs);
|
||||
NvU64 curWithOffs = (pRegmap->map[i][curMapIdx] << (FRAME_TO_U64_MASK - beginOffs)) | mask;
|
||||
NvU64 endOffs = portUtilCountLeadingZeros64(curWithOffs);
|
||||
//
|
||||
// If no more are free, we have not hit the needed number of pages. Following loop finds
|
||||
// the next free page
|
||||
//
|
||||
if (endOffs == 0)
|
||||
{
|
||||
NvS64 diff = (*useFunc)(pRegmap, freeStart, (freeStart + numFrames - 1));
|
||||
if (diff == checkDiff)
|
||||
mask = beginOffs == FRAME_TO_U64_MASK ? 0 : NV_U64_MAX << (1llu + beginOffs);
|
||||
NvU64 curMap = (pRegmap->map[i][curMapIdx]) | mask;
|
||||
frameBaseIdx = latestFree[i] + FRAME_TO_U64_MASK - beginOffs;
|
||||
if (curMap != NV_U64_MAX)
|
||||
{
|
||||
return (NvS64)freeStart;
|
||||
goto free_found;
|
||||
}
|
||||
else
|
||||
curMapIdx--;
|
||||
frameBaseIdx -= FRAME_TO_U64_SIZE;
|
||||
while (frameBaseIdx > localStart)
|
||||
{
|
||||
NV_ASSERT(diff >= 0);
|
||||
|
||||
// Set end point to one frame before the first unavailable frame found
|
||||
freeStart = diff - numFrames;
|
||||
freeStart -= (freeStart - localStart) % frameAlignment;
|
||||
curMap = pRegmap->map[i][curMapIdx];
|
||||
if(curMap != NV_U64_MAX)
|
||||
{
|
||||
goto free_found;
|
||||
}
|
||||
frameBaseIdx -= FRAME_TO_U64_SIZE;
|
||||
curMapIdx--;
|
||||
}
|
||||
// No more free pages, exit
|
||||
return -1;
|
||||
free_found:
|
||||
// Found a free page, set frameBaseIdx and go back to the beginning of the loop
|
||||
frameBaseIdx -= portUtilCountLeadingZeros64(~curMap);
|
||||
frameBaseIdx = alignDownToMod(frameBaseIdx, frameAlignment, realAlign);
|
||||
goto loop_begin;
|
||||
}
|
||||
else
|
||||
latestFree[i] -= endOffs;
|
||||
}
|
||||
}
|
||||
|
||||
return frameBaseIdx - numFrames;
|
||||
}
|
||||
|
||||
static NV_FORCEINLINE
|
||||
NvU64
|
||||
_scanDiscontiguousSearchLoop
|
||||
(
|
||||
PMA_REGMAP *pRegmap,
|
||||
NvU64 numPages,
|
||||
NvU64 framesPerPage,
|
||||
NvU64 localStart,
|
||||
NvU64 localEnd,
|
||||
NvU64 frameAlignment,
|
||||
NvU64 frameAlignmentPadding,
|
||||
NvU64 *pPages,
|
||||
NvU64 *pNumEvictablePages
|
||||
)
|
||||
{
|
||||
NvU64 frameBaseIdx = alignUpToMod(localStart, frameAlignment, frameAlignmentPadding);
|
||||
|
||||
//
|
||||
// latestFree stores the lowest '0' seen in the given map array in the current run
|
||||
// ie we have the needed pages if frameBaseIdx - numPages == latestFree. Initialize to last aligned frame
|
||||
//
|
||||
NvU64 latestFree[PMA_BITS_PER_PAGE];
|
||||
NvU64 totalFound = 0;
|
||||
|
||||
// Evictable pages count down from end of array
|
||||
NvU64 curEvictPage = numPages;
|
||||
NvBool bEvictablePage = NV_FALSE;
|
||||
NvU64 i;
|
||||
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
latestFree[i] = frameBaseIdx;
|
||||
}
|
||||
loop_begin:
|
||||
//
|
||||
// Always start a loop iteration with an updated frameBaseIdx by ensuring that latestFree is always >= frameBaseIdx
|
||||
// frameBaseIdx == latestFree[i] means that there are no observed 0s so far in the current run
|
||||
//
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
if (latestFree[i] < frameBaseIdx)
|
||||
{
|
||||
latestFree[i] = frameBaseIdx;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize to standard free page state
|
||||
bEvictablePage = NV_FALSE;
|
||||
|
||||
// At the end of memory, pages not available
|
||||
if ((frameBaseIdx + framesPerPage - 1llu) > localEnd)
|
||||
{
|
||||
*pNumEvictablePages = numPages - curEvictPage;
|
||||
return totalFound;
|
||||
}
|
||||
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
// If array is not already full of evictable and free pages, go to evictable loop
|
||||
if ((i != MAP_IDX_ALLOC_UNPIN) || (curEvictPage <= totalFound))
|
||||
{
|
||||
while (latestFree[i] < (frameBaseIdx + framesPerPage))
|
||||
{
|
||||
// Start point isn't free, so bump to check the next aligned frame
|
||||
freeStart -= frameAlignment;
|
||||
//
|
||||
// All this logic looks complicated, but essentially all it is doing is getting the NvU64 from
|
||||
// the correct index in the array and shifting and masking so that the first bit is latestFree[i].
|
||||
// endOffs is set then to the length of the run of zeros at the beginning
|
||||
//
|
||||
NvU64 curMapIdx = PAGE_MAPIDX(latestFree[i]);
|
||||
NvU64 beginOffs = PAGE_BITIDX(latestFree[i]);
|
||||
NvU64 mask = beginOffs == 0 ? 0 : NV_U64_MAX << (FRAME_TO_U64_SIZE - beginOffs);
|
||||
NvU64 curWithOffs = (pRegmap->map[i][curMapIdx] >> beginOffs) | mask;
|
||||
NvU64 endOffs = portUtilCountTrailingZeros64(curWithOffs);
|
||||
//
|
||||
// If no more are free, we have not hit the needed number of pages. Following loop finds
|
||||
// the next free page
|
||||
//
|
||||
if (endOffs == 0)
|
||||
{
|
||||
mask = beginOffs == 0 ? 0 : NV_U64_MAX >> (FRAME_TO_U64_SIZE - beginOffs);
|
||||
NvU64 curMap = pRegmap->map[i][curMapIdx] | mask;
|
||||
frameBaseIdx = latestFree[i] - beginOffs;
|
||||
if (curMap != NV_U64_MAX)
|
||||
{
|
||||
goto free_found;
|
||||
}
|
||||
curMapIdx++;
|
||||
frameBaseIdx += FRAME_TO_U64_SIZE;
|
||||
while (frameBaseIdx <= localEnd)
|
||||
{
|
||||
curMap = pRegmap->map[i][curMapIdx];
|
||||
if(curMap != NV_U64_MAX)
|
||||
{
|
||||
goto free_found;
|
||||
}
|
||||
frameBaseIdx += FRAME_TO_U64_SIZE;
|
||||
curMapIdx++;
|
||||
}
|
||||
// No more free pages, exit
|
||||
*pNumEvictablePages = numPages - curEvictPage;
|
||||
return totalFound;
|
||||
free_found:
|
||||
// Found a free page, set frameBaseIdx and go back to the beginning of the loop
|
||||
frameBaseIdx += portUtilCountTrailingZeros64(~curMap);
|
||||
frameBaseIdx = alignUpToMod(frameBaseIdx, frameAlignment, frameAlignmentPadding);
|
||||
goto loop_begin;
|
||||
}
|
||||
latestFree[i] += endOffs;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
//
|
||||
// End point isn't usable, so jump to after the end to check again
|
||||
// However, align the new start point properly before next iteration.
|
||||
//
|
||||
freeStart -= NV_ALIGN_UP(numFrames, frameAlignment);
|
||||
// Loop to check if current range has an unpinned page, then it gets stored in the evictable area
|
||||
while (latestFree[i] < (frameBaseIdx + framesPerPage))
|
||||
{
|
||||
// Basically same as above loop, just not exiting if 0 not found, instead setting bEvictablePage
|
||||
NvU64 curMapIdx = PAGE_MAPIDX(latestFree[i]);
|
||||
NvU64 beginOffs = PAGE_BITIDX(latestFree[i]);
|
||||
NvU64 mask = beginOffs == 0 ? 0 : NV_U64_MAX << (FRAME_TO_U64_SIZE - beginOffs);
|
||||
NvU64 curWithOffs = (pRegmap->map[i][curMapIdx] >> beginOffs) | mask;
|
||||
NvU64 endOffs = portUtilCountTrailingZeros64(curWithOffs);
|
||||
latestFree[i] += endOffs;
|
||||
if (endOffs == 0)
|
||||
{
|
||||
bEvictablePage = NV_TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return -1;
|
||||
// Store evictable pages at end of array to not interfere with free pages
|
||||
if (bEvictablePage)
|
||||
{
|
||||
curEvictPage--;
|
||||
pPages[curEvictPage] = frameBaseIdx;
|
||||
frameBaseIdx += framesPerPage;
|
||||
goto loop_begin;
|
||||
}
|
||||
|
||||
pPages[totalFound] = frameBaseIdx;
|
||||
totalFound++;
|
||||
frameBaseIdx += framesPerPage;
|
||||
|
||||
// Found all needed pages (all free and not STATE_UNPIN)
|
||||
if (totalFound == numPages)
|
||||
{
|
||||
*pNumEvictablePages = 0;
|
||||
return numPages;
|
||||
}
|
||||
goto loop_begin;
|
||||
}
|
||||
|
||||
static NV_FORCEINLINE
|
||||
NvU64
|
||||
_scanDiscontiguousSearchLoopReverse
|
||||
(
|
||||
PMA_REGMAP *pRegmap,
|
||||
NvU64 numPages,
|
||||
NvU64 framesPerPage,
|
||||
NvU64 localStart,
|
||||
NvU64 localEnd,
|
||||
NvU64 frameAlignment,
|
||||
NvU64 frameAlignmentPadding,
|
||||
NvU64 *pPages,
|
||||
NvU64 *pNumEvictablePages
|
||||
)
|
||||
{
|
||||
NvU64 realAlign = (frameAlignmentPadding + framesPerPage) & (frameAlignment - 1ll);
|
||||
NvU64 frameBaseIdx = alignDownToMod(localEnd+1llu, frameAlignment, realAlign);
|
||||
|
||||
//
|
||||
// latestFree stores the lowest '0' seen in the given map array in the current run
|
||||
// ie we have the needed pages if frameBaseIdx - numPages == latestFree. Initialize to last aligned frame
|
||||
//
|
||||
NvU64 latestFree[PMA_BITS_PER_PAGE];
|
||||
NvU64 totalFound = 0;
|
||||
|
||||
// Evictable pages count down from end of array
|
||||
NvU64 curEvictPage = numPages;
|
||||
NvBool bEvictablePage = NV_FALSE;
|
||||
NvU64 i;
|
||||
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
latestFree[i] = frameBaseIdx;
|
||||
}
|
||||
loop_begin:
|
||||
//
|
||||
// Always start a loop iteration with an updated frameBaseIdx by ensuring that latestFree is always <= frameBaseIdx
|
||||
// frameBaseIdx == latestFree[i] means that there are no observed 0s so far in the current run
|
||||
//
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
if (latestFree[i] > frameBaseIdx)
|
||||
{
|
||||
latestFree[i] = frameBaseIdx;
|
||||
}
|
||||
}
|
||||
|
||||
// Initialize to standard free page state
|
||||
bEvictablePage = NV_FALSE;
|
||||
|
||||
// At the beginning of memory, pages not available
|
||||
if ((localStart + framesPerPage) > frameBaseIdx)
|
||||
{
|
||||
*pNumEvictablePages = numPages - curEvictPage;
|
||||
return totalFound;
|
||||
}
|
||||
|
||||
for (i = 0; i < PMA_BITS_PER_PAGE; i++)
|
||||
{
|
||||
// If array is not already full of evictable and free pages, go to evictable loop
|
||||
if ((i != MAP_IDX_ALLOC_UNPIN) || (curEvictPage <= totalFound))
|
||||
{
|
||||
while (latestFree[i] > (frameBaseIdx - framesPerPage))
|
||||
{
|
||||
//
|
||||
// All this logic looks complicated, but essentially all it is doing is getting the NvU64 from
|
||||
// the correct index in the array and shifting and masking so that the last bit is latestFree[i].
|
||||
// endOffs is set then to the length of the run of zeros at the end
|
||||
//
|
||||
NvU64 curId = latestFree[i] - 1llu;
|
||||
NvU64 curMapIdx = PAGE_MAPIDX(curId);
|
||||
NvU64 beginOffs = PAGE_BITIDX(curId);
|
||||
NvU64 mask = beginOffs == FRAME_TO_U64_MASK ? 0 : NV_U64_MAX >> (1llu + beginOffs);
|
||||
NvU64 curWithOffs = (pRegmap->map[i][curMapIdx] << (FRAME_TO_U64_MASK - beginOffs)) | mask;
|
||||
NvU64 endOffs = portUtilCountLeadingZeros64(curWithOffs);
|
||||
|
||||
//
|
||||
// If no more are free, we have not hit the needed number of pages. Following loop finds
|
||||
// the next free page
|
||||
//
|
||||
if (endOffs == 0)
|
||||
{
|
||||
mask = beginOffs == FRAME_TO_U64_MASK ? 0 : NV_U64_MAX << (1llu + beginOffs);
|
||||
NvU64 curMap = pRegmap->map[i][curMapIdx] | mask;
|
||||
frameBaseIdx = latestFree[i] + FRAME_TO_U64_MASK - beginOffs;
|
||||
if (curMap != NV_U64_MAX)
|
||||
{
|
||||
goto free_found;
|
||||
}
|
||||
curMapIdx--;
|
||||
frameBaseIdx -= 64;
|
||||
while (frameBaseIdx > localStart)
|
||||
{
|
||||
curMap = pRegmap->map[i][curMapIdx];
|
||||
if(curMap != NV_U64_MAX)
|
||||
{
|
||||
goto free_found;
|
||||
}
|
||||
frameBaseIdx -= 64;
|
||||
curMapIdx--;
|
||||
}
|
||||
|
||||
// No more free pages, exit
|
||||
*pNumEvictablePages = numPages - curEvictPage;
|
||||
return totalFound;
|
||||
free_found:
|
||||
// Found a free page, set frameBaseIdx and go back to the beginning of the loop
|
||||
frameBaseIdx -= portUtilCountLeadingZeros64(~curMap);
|
||||
frameBaseIdx = alignDownToMod(frameBaseIdx, frameAlignment, realAlign);
|
||||
goto loop_begin;
|
||||
}
|
||||
latestFree[i] -= endOffs;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Loop to check if current range has an unpinned page, then it gets stored in the evictable area
|
||||
while (latestFree[i] > (frameBaseIdx - framesPerPage))
|
||||
{
|
||||
// Basically same as above loop, just not exiting if 0 not found, instead setting bEvictablePage
|
||||
NvU64 curId = latestFree[i] - 1llu;
|
||||
NvU64 curMapIdx = PAGE_MAPIDX(curId);
|
||||
NvU64 beginOffs = PAGE_BITIDX(curId);
|
||||
NvU64 mask = beginOffs == FRAME_TO_U64_MASK ? 0 : NV_U64_MAX >> (1llu + beginOffs);
|
||||
NvU64 curWithOffs = (pRegmap->map[i][curMapIdx] << (FRAME_TO_U64_MASK - beginOffs)) | mask;
|
||||
NvU64 endOffs = portUtilCountLeadingZeros64(curWithOffs);
|
||||
latestFree[i] -= endOffs;
|
||||
if (endOffs == 0)
|
||||
{
|
||||
bEvictablePage = NV_TRUE;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
frameBaseIdx -= framesPerPage;
|
||||
|
||||
// Store evictable pages at end of array to not interfere with free pages
|
||||
if (bEvictablePage)
|
||||
{
|
||||
curEvictPage--;
|
||||
pPages[curEvictPage] = frameBaseIdx;
|
||||
goto loop_begin;
|
||||
}
|
||||
|
||||
pPages[totalFound] = frameBaseIdx;
|
||||
totalFound++;
|
||||
|
||||
// Found all needed pages (all free and not STATE_UNPIN)
|
||||
if (totalFound == numPages)
|
||||
{
|
||||
*pNumEvictablePages = 0;
|
||||
return numPages;
|
||||
}
|
||||
|
||||
goto loop_begin;
|
||||
}
|
||||
|
||||
//
|
||||
@ -1004,10 +1228,6 @@ pmaRegmapScanContiguous
|
||||
}
|
||||
localStart = alignUpToMod(localStart, frameAlignment, frameAlignmentPadding);
|
||||
|
||||
NV_PRINTF(LEVEL_INFO,
|
||||
"Scanning with addrBase 0x%llx in frame range 0x%llx..0x%llx, pages to allocate 0x%llx\n",
|
||||
addrBase, localStart, localEnd, numPages);
|
||||
|
||||
if (!bReverseAlloc)
|
||||
{
|
||||
frameFound = _scanContiguousSearchLoop(pRegmap, numFrames, localStart, localEnd,
|
||||
@ -1069,10 +1289,12 @@ pmaRegmapScanDiscontiguous
|
||||
NvBool bReverseAlloc
|
||||
)
|
||||
{
|
||||
NvU64 freeStart, found, framesPerPage, localStart, localEnd;
|
||||
NvU64 alignedAddrBase, frameAlignmentPadding;
|
||||
PMA_PAGESTATUS startStatus, endStatus;
|
||||
PMA_REGMAP *pRegmap = (PMA_REGMAP *)pMap;
|
||||
PMA_REGMAP *pRegmap = (PMA_REGMAP*) pMap;
|
||||
NvU64 localStart, localEnd, framesPerPage, alignedAddrBase, frameAlignmentPadding;
|
||||
NvU64 freeFound = 0, evictFound = 0;
|
||||
NvU64 totalFound = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 i;
|
||||
|
||||
NV_ASSERT(alignment == pageSize);
|
||||
|
||||
@ -1100,101 +1322,69 @@ pmaRegmapScanDiscontiguous
|
||||
else
|
||||
{
|
||||
localStart = 0;
|
||||
localEnd = pRegmap->totalFrames-1;
|
||||
localEnd = pRegmap->totalFrames - 1;
|
||||
}
|
||||
|
||||
localStart = alignUpToMod(localStart, framesPerPage, frameAlignmentPadding);
|
||||
found = 0;
|
||||
//
|
||||
// Do the actual scanning here. The scanning functions return free pages at the beginning of
|
||||
// the array, and evictable pages in reverse order at the end of the array
|
||||
//
|
||||
if (!bReverseAlloc)
|
||||
{
|
||||
freeStart = localStart;
|
||||
freeFound = _scanDiscontiguousSearchLoop(pRegmap, numPages, framesPerPage,
|
||||
localStart, localEnd, alignment >> PMA_PAGE_SHIFT,
|
||||
frameAlignmentPadding, freeList, &evictFound);
|
||||
}
|
||||
else
|
||||
{
|
||||
// First frame from end able to accommodate page allocation.
|
||||
freeStart = localEnd + 1 - framesPerPage;
|
||||
freeStart -= (freeStart - localStart) % framesPerPage;
|
||||
freeFound = _scanDiscontiguousSearchLoopReverse(pRegmap, numPages, framesPerPage,
|
||||
localStart, localEnd, alignment >> PMA_PAGE_SHIFT,
|
||||
frameAlignmentPadding, freeList, &evictFound);
|
||||
}
|
||||
|
||||
NV_PRINTF(LEVEL_INFO,
|
||||
"Scanning with addrBase 0x%llx in frame range 0x%llx..0x%llx, pages to allocate 0x%llx\n",
|
||||
addrBase, localStart, localEnd, numPages);
|
||||
*numPagesAlloc = freeFound;
|
||||
|
||||
// scan for allocatable pages
|
||||
// two-pass algorithm
|
||||
while (found != numPages)
|
||||
// Scanning implementations don't actually decrement evictFound, so adjust appropriately here
|
||||
evictFound = freeFound + evictFound > numPages ? numPages - freeFound : evictFound;
|
||||
|
||||
// Not enough pages
|
||||
if (((freeFound + evictFound) != numPages) ||
|
||||
(bSkipEvict && (freeFound != numPages)))
|
||||
{
|
||||
if (!bReverseAlloc)
|
||||
{
|
||||
if ((freeStart + framesPerPage - 1) > localEnd) break;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (freeStart < localStart || (NvS64)freeStart < 0) break;
|
||||
}
|
||||
|
||||
startStatus = pmaRegmapRead(pRegmap, freeStart, NV_TRUE);
|
||||
endStatus = pmaRegmapRead(pRegmap, (freeStart + framesPerPage - 1), NV_TRUE);
|
||||
|
||||
if (startStatus == STATE_FREE)
|
||||
{
|
||||
if(endStatus == STATE_FREE)
|
||||
{
|
||||
NvS64 diff = _pmaRegmapAvailable(pRegmap, freeStart, (freeStart + framesPerPage - 1));
|
||||
if (diff == ALL_FREE)
|
||||
{
|
||||
freeList[found++] = addrBase + (freeStart << PMA_PAGE_SHIFT);
|
||||
}
|
||||
}
|
||||
}
|
||||
freeStart = !bReverseAlloc ? (freeStart + framesPerPage) : (freeStart - framesPerPage);
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
*numPagesAlloc = found;
|
||||
if(found == numPages) return NV_OK;
|
||||
if(bSkipEvict) return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (!bReverseAlloc)
|
||||
else if (evictFound != 0)
|
||||
{
|
||||
freeStart = localStart;
|
||||
status = NV_ERR_IN_USE;
|
||||
}
|
||||
else
|
||||
|
||||
// Set totalFound appropriately to shift pages at the end of the function
|
||||
totalFound = freeFound + evictFound;
|
||||
if (bSkipEvict)
|
||||
{
|
||||
// First frame from end able to accommodate page allocation.
|
||||
freeStart = localEnd + 1 - framesPerPage;
|
||||
freeStart -= (freeStart - localStart) % framesPerPage;
|
||||
totalFound = freeFound;
|
||||
goto alignAndReturn;
|
||||
}
|
||||
while (found != numPages)
|
||||
|
||||
// End of list contains the evictable pages, swap elements from beginning of range to end
|
||||
for (i = (numPages - freeFound) >> 1; i != 0; i--)
|
||||
{
|
||||
if (!bReverseAlloc)
|
||||
{
|
||||
if ((freeStart + framesPerPage - 1) > localEnd) return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (freeStart < localStart || (NvS64)freeStart < 0) return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
startStatus = pmaRegmapRead(pRegmap, freeStart, NV_TRUE);
|
||||
endStatus = pmaRegmapRead(pRegmap, (freeStart + framesPerPage - 1), NV_TRUE);
|
||||
|
||||
if (startStatus == STATE_FREE || startStatus == STATE_UNPIN)
|
||||
{
|
||||
if(endStatus == STATE_FREE || endStatus == STATE_UNPIN)
|
||||
{
|
||||
NvS64 diff = _pmaRegmapEvictable(pRegmap, freeStart, (freeStart + framesPerPage - 1));
|
||||
if (diff == EVICTABLE)
|
||||
{
|
||||
freeList[found++] = addrBase + (freeStart << PMA_PAGE_SHIFT);
|
||||
}
|
||||
}
|
||||
}
|
||||
freeStart = !bReverseAlloc ? (freeStart + framesPerPage) : (freeStart - framesPerPage);
|
||||
NvU64 temp = freeList[freeFound + i - 1llu];
|
||||
freeList[freeFound + i - 1llu] = freeList[numPages - i];
|
||||
freeList[numPages - i] = temp;
|
||||
}
|
||||
|
||||
return NV_ERR_IN_USE;
|
||||
alignAndReturn:
|
||||
while (totalFound != 0)
|
||||
{
|
||||
totalFound--;
|
||||
freeList[totalFound] <<= PMA_PAGE_SHIFT;
|
||||
freeList[totalFound] += addrBase;
|
||||
}
|
||||
return status;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
pmaRegmapGetSize
|
||||
(
|
||||
@ -1232,9 +1422,9 @@ pmaRegmapGetLargestFree
|
||||
bitmap |= (~0ULL) << PAGE_BITIDX(pRegmap->totalFrames);
|
||||
}
|
||||
|
||||
if (maxZerosGet(bitmap) == _UINT_SIZE)
|
||||
if (maxZerosGet(bitmap) == FRAME_TO_U64_SIZE)
|
||||
{
|
||||
mapTrailZeros += _UINT_SIZE;
|
||||
mapTrailZeros += FRAME_TO_U64_SIZE;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1993-2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
|
@ -177,6 +177,7 @@ mmuWalkSetUserCtx
|
||||
MMU_WALK_USER_CTX *pUserCtx
|
||||
)
|
||||
{
|
||||
NV_ASSERT_OR_RETURN(NULL != pWalk, NV_ERR_INVALID_STATE);
|
||||
|
||||
pWalk->pUserCtx = pUserCtx;
|
||||
return NV_OK;
|
||||
@ -223,9 +224,12 @@ mmuWalkFindLevel
|
||||
)
|
||||
{
|
||||
const MMU_WALK_LEVEL *pLevel = &pWalk->root;
|
||||
while (pLevel->pFmt != pLevelFmt)
|
||||
while (pLevel != NULL && pLevel->pFmt != pLevelFmt)
|
||||
{
|
||||
NvU32 subLevel;
|
||||
|
||||
NV_ASSERT_OR_RETURN(pLevel->pFmt != NULL, NULL);
|
||||
|
||||
// Single sub-level always continues.
|
||||
if (1 == pLevel->pFmt->numSubLevels)
|
||||
{
|
||||
|
8
utils.mk
8
utils.mk
@ -575,8 +575,14 @@ LD_TARGET_EMULATION_FLAG_SunOS_x86_64 = elf_x86_64_sol2
|
||||
LD_TARGET_EMULATION_FLAG_FreeBSD_x86 = elf_i386_fbsd
|
||||
LD_TARGET_EMULATION_FLAG_FreeBSD_x86_64 = elf_x86_64_fbsd
|
||||
|
||||
# Different linkers (GNU ld versus ld.lld versus ld.gold) expect different
|
||||
# target architecture values for '-m'. Empirically, only ld.lld appears to
|
||||
# actually need it, so only add the option when linking with ld.lld. Example
|
||||
# `ld.lld -v` output: "LLD 15.0.7 (compatible with GNU linkers)".
|
||||
LD_IS_LLD := $(if $(filter LLD,$(shell $(LD) -v)),1)
|
||||
|
||||
ifdef LD_TARGET_EMULATION_FLAG_$(TARGET_OS)_$(TARGET_ARCH)
|
||||
LD_TARGET_EMULATION_FLAG = -m $(LD_TARGET_EMULATION_FLAG_$(TARGET_OS)_$(TARGET_ARCH))
|
||||
LD_TARGET_EMULATION_FLAG = $(if $(LD_IS_LLD), -m $(LD_TARGET_EMULATION_FLAG_$(TARGET_OS)_$(TARGET_ARCH)))
|
||||
endif
|
||||
|
||||
define READ_ONLY_OBJECT_FROM_FILE_RULE
|
||||
|
@ -1,4 +1,4 @@
|
||||
NVIDIA_VERSION = 545.23.06
|
||||
NVIDIA_VERSION = 545.29.03
|
||||
|
||||
# This file.
|
||||
VERSION_MK_FILE := $(lastword $(MAKEFILE_LIST))
|
||||
|
Loading…
x
Reference in New Issue
Block a user