mirror of
https://github.com/NVIDIA/open-gpu-kernel-modules.git
synced 2025-03-21 13:29:11 +01:00
570.133.07
This commit is contained in:
parent
25bef4626e
commit
c5e439fea4
1537
README.md
1537
README.md
@ -1,7 +1,7 @@
|
||||
# NVIDIA Linux Open GPU Kernel Module Source
|
||||
|
||||
This is the source release of the NVIDIA Linux open GPU kernel modules,
|
||||
version 570.124.06.
|
||||
version 570.133.07.
|
||||
|
||||
|
||||
## How to Build
|
||||
@ -17,7 +17,7 @@ as root:
|
||||
|
||||
Note that the kernel modules built here must be used with GSP
|
||||
firmware and user-space NVIDIA GPU driver components from a corresponding
|
||||
570.124.06 driver release. This can be achieved by installing
|
||||
570.133.07 driver release. This can be achieved by installing
|
||||
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
|
||||
option. E.g.,
|
||||
|
||||
@ -185,7 +185,7 @@ table below).
|
||||
For details on feature support and limitations, see the NVIDIA GPU driver
|
||||
end user README here:
|
||||
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/570.124.06/README/kernel_open.html
|
||||
https://us.download.nvidia.com/XFree86/Linux-x86_64/570.133.07/README/kernel_open.html
|
||||
|
||||
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
|
||||
Package for more details.
|
||||
@ -194,761 +194,776 @@ In the below table, if three IDs are listed, the first is the PCI Device
|
||||
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
|
||||
Subsystem Device ID.
|
||||
|
||||
| Product Name | PCI ID |
|
||||
| ----------------------------------------------- | -------------- |
|
||||
| NVIDIA TITAN RTX | 1E02 |
|
||||
| NVIDIA GeForce RTX 2080 Ti | 1E04 |
|
||||
| NVIDIA GeForce RTX 2080 Ti | 1E07 |
|
||||
| NVIDIA CMP 50HX | 1E09 |
|
||||
| Quadro RTX 6000 | 1E30 |
|
||||
| Quadro RTX 8000 | 1E30 1028 129E |
|
||||
| Quadro RTX 8000 | 1E30 103C 129E |
|
||||
| Quadro RTX 8000 | 1E30 10DE 129E |
|
||||
| Quadro RTX 6000 | 1E36 |
|
||||
| Quadro RTX 8000 | 1E78 10DE 13D8 |
|
||||
| Quadro RTX 6000 | 1E78 10DE 13D9 |
|
||||
| NVIDIA GeForce RTX 2080 SUPER | 1E81 |
|
||||
| NVIDIA GeForce RTX 2080 | 1E82 |
|
||||
| NVIDIA GeForce RTX 2070 SUPER | 1E84 |
|
||||
| NVIDIA GeForce RTX 2080 | 1E87 |
|
||||
| NVIDIA GeForce RTX 2060 | 1E89 |
|
||||
| NVIDIA GeForce RTX 2080 | 1E90 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1025 1375 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08A1 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08A2 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EA |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EB |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EC |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08ED |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EE |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EF |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 093B |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 093C |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8572 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8573 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8602 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8606 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 86C6 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 86C7 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 87A6 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 87A7 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 131F |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 137F |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 141F |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 1751 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1660 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1661 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1662 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 75A6 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 75A7 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 86A6 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 86A7 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1462 1274 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1462 1277 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 152D 1220 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1558 95E1 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1558 97E1 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2002 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2005 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2007 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 3000 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 3001 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1D05 1069 |
|
||||
| NVIDIA GeForce RTX 2070 Super | 1E91 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8607 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8736 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8738 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8772 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 878A |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 878B |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1043 1E61 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 1511 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 75B3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 75B4 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 76B2 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 76B3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 78A2 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 78A3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 86B2 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 86B3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12AE |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12B0 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12C6 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 17AA 22C3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 17AA 22C5 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 2009 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 200A |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 3002 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 8086 3012 |
|
||||
| NVIDIA GeForce RTX 2080 Super | 1E93 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1025 1401 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1025 149C |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1028 09D2 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8607 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 86C7 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8736 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8738 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8772 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 87A6 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 87A7 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 75B1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 75B2 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 76B0 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 76B1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 78A0 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 78A1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 86B0 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 86B1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12AE |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12B0 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12B4 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12C6 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1558 50D3 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1558 70D1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 17AA 22C3 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 17AA 22C5 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 2009 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 200A |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 3002 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1D05 1089 |
|
||||
| Quadro RTX 5000 | 1EB0 |
|
||||
| Quadro RTX 4000 | 1EB1 |
|
||||
| Quadro RTX 5000 | 1EB5 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1375 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1401 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 149C |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1028 09C3 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8736 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8738 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8772 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8780 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8782 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8783 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8785 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1043 1DD1 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 1274 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 12B0 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 12C6 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 17AA 22B8 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 17AA 22BA |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2005 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2007 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2008 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 200A |
|
||||
| Quadro RTX 4000 | 1EB6 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1028 09C3 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8736 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8738 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8772 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8780 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8782 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8783 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8785 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 1274 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 1277 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 12B0 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 12C6 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22B8 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22BA |
|
||||
| NVIDIA GeForce RTX 2070 SUPER | 1EC2 |
|
||||
| NVIDIA GeForce RTX 2070 SUPER | 1EC7 |
|
||||
| NVIDIA GeForce RTX 2080 | 1ED0 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1025 132D |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08ED |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08EE |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08EF |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8572 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8573 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8600 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8605 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1043 138F |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1043 15C1 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 17AA 3FEE |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 17AA 3FFE |
|
||||
| NVIDIA GeForce RTX 2070 Super | 1ED1 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 1025 1432 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 103C 8746 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 103C 878A |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 1043 165F |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 144D C192 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FCE |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FCF |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FD0 |
|
||||
| NVIDIA GeForce RTX 2080 Super | 1ED3 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1025 1432 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1028 09D1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 103C 8746 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 103C 878A |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1D61 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1E51 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1F01 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FCE |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FCF |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FD0 |
|
||||
| Quadro RTX 5000 | 1EF5 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F02 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F03 |
|
||||
| NVIDIA GeForce RTX 2060 SUPER | 1F06 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F07 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F08 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F0A |
|
||||
| NVIDIA CMP 40HX | 1F0B |
|
||||
| NVIDIA GeForce RTX 2070 | 1F10 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 132D |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 1342 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08A1 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08A2 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EA |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EB |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EC |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08ED |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EF |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 093B |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 093C |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8572 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8573 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8602 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8606 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 132F |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 136F |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 1881 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 1E6E |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1658 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1663 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1664 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 75A4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 75A5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 86A4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 86A5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1462 1274 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1462 1277 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1558 95E1 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1558 97E1 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2002 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2005 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2007 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 3000 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 3001 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 105E |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 1070 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 2087 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 8086 2087 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F11 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F12 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1028 098F |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 8741 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 8744 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 878E |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 880E |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1043 1E11 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1043 1F11 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1462 12D9 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3801 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3802 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3803 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F14 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1401 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1432 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1442 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1446 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 147D |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1028 09E2 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1028 09F3 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8607 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 86C6 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 86C7 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8736 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8738 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8746 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8772 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 878A |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 878B |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 87A6 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 87A7 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1043 174F |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 1512 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 75B5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 75B6 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 76B4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 76B5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 78A4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 78A5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 86B4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 86B5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12AE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12B0 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12C6 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1558 50D3 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1558 70D1 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 200C |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 2011 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 3002 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F15 |
|
||||
| Quadro RTX 3000 | 1F36 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 1028 0990 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8736 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8738 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8772 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 1043 13CF |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 1414 0032 |
|
||||
| NVIDIA GeForce RTX 2060 SUPER | 1F42 |
|
||||
| NVIDIA GeForce RTX 2060 SUPER | 1F47 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F50 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08ED |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08EE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08EF |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8572 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8573 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8574 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8600 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8605 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 17AA 3FEE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 17AA 3FFE |
|
||||
| NVIDIA GeForce RTX 2060 | 1F51 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F54 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 103C 878A |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FCE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FCF |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FD0 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F55 |
|
||||
| Quadro RTX 3000 | 1F76 |
|
||||
| Matrox D-Series D2450 | 1F76 102B 2800 |
|
||||
| Matrox D-Series D2480 | 1F76 102B 2900 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F82 |
|
||||
| NVIDIA GeForce GTX 1630 | 1F83 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F91 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 863E |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 86E7 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 86E8 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1043 12CF |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1043 156F |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1414 0032 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 144D C822 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 127E |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1281 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1284 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1285 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 129C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 229F |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3802 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3806 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3F1A |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1A58 1001 |
|
||||
| NVIDIA GeForce GTX 1650 Ti | 1F95 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 1479 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147A |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147B |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147C |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 86E7 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 86E8 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 8815 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1043 1DFF |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1043 1E1F |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 144D C838 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12BD |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12C5 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12D2 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 22C0 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 22C1 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 3837 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 3F95 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1003 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1006 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1007 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1E83 3E30 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F96 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F96 1462 1297 |
|
||||
| NVIDIA GeForce MX450 | 1F97 |
|
||||
| NVIDIA GeForce MX450 | 1F98 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F99 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 1479 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147A |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147B |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 103C 8815 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 13B2 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 1402 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 1902 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12BD |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12C5 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12D2 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 17AA 22DA |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 17AA 3F93 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1E83 3E30 |
|
||||
| NVIDIA GeForce MX450 | 1F9C |
|
||||
| NVIDIA GeForce GTX 1650 | 1F9D |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 128D |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 130D |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 149C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 185C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 189C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 12F4 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 1302 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 131B |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 1326 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 132A |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 132E |
|
||||
| NVIDIA GeForce MX550 | 1F9F |
|
||||
| NVIDIA GeForce MX550 | 1FA0 |
|
||||
| NVIDIA T1000 | 1FB0 1028 12DB |
|
||||
| NVIDIA T1000 | 1FB0 103C 12DB |
|
||||
| NVIDIA T1000 | 1FB0 103C 8A80 |
|
||||
| NVIDIA T1000 | 1FB0 10DE 12DB |
|
||||
| NVIDIA DGX Display | 1FB0 10DE 1485 |
|
||||
| NVIDIA T1000 | 1FB0 17AA 12DB |
|
||||
| NVIDIA T600 | 1FB1 1028 1488 |
|
||||
| NVIDIA T600 | 1FB1 103C 1488 |
|
||||
| NVIDIA T600 | 1FB1 103C 8A80 |
|
||||
| NVIDIA T600 | 1FB1 10DE 1488 |
|
||||
| NVIDIA T600 | 1FB1 17AA 1488 |
|
||||
| NVIDIA T400 | 1FB2 1028 1489 |
|
||||
| NVIDIA T400 | 1FB2 103C 1489 |
|
||||
| NVIDIA T400 | 1FB2 103C 8A80 |
|
||||
| NVIDIA T400 | 1FB2 10DE 1489 |
|
||||
| NVIDIA T400 | 1FB2 17AA 1489 |
|
||||
| NVIDIA T600 Laptop GPU | 1FB6 |
|
||||
| NVIDIA T550 Laptop GPU | 1FB7 |
|
||||
| Quadro T2000 | 1FB8 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 1028 097E |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8736 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8738 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8772 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8780 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8782 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8783 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8785 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 87F0 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 1462 1281 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 1462 12BD |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 17AA 22C0 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 17AA 22C1 |
|
||||
| Quadro T1000 | 1FB9 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1025 1479 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1025 147A |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1025 147B |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1025 147C |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8736 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8738 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8772 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8780 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8782 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8783 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8785 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 87F0 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1462 12BD |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 17AA 22C0 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 17AA 22C1 |
|
||||
| NVIDIA T600 Laptop GPU | 1FBA |
|
||||
| NVIDIA T500 | 1FBB |
|
||||
| NVIDIA T1200 Laptop GPU | 1FBC |
|
||||
| NVIDIA GeForce GTX 1650 | 1FDD |
|
||||
| NVIDIA T1000 8GB | 1FF0 1028 1612 |
|
||||
| NVIDIA T1000 8GB | 1FF0 103C 1612 |
|
||||
| NVIDIA T1000 8GB | 1FF0 103C 8A80 |
|
||||
| NVIDIA T1000 8GB | 1FF0 10DE 1612 |
|
||||
| NVIDIA T1000 8GB | 1FF0 17AA 1612 |
|
||||
| NVIDIA T400 4GB | 1FF2 1028 1613 |
|
||||
| NVIDIA T400 4GB | 1FF2 103C 1613 |
|
||||
| NVIDIA T400E | 1FF2 103C 18FF |
|
||||
| NVIDIA T400 4GB | 1FF2 103C 8A80 |
|
||||
| NVIDIA T400 4GB | 1FF2 10DE 1613 |
|
||||
| NVIDIA T400E | 1FF2 10DE 18FF |
|
||||
| NVIDIA T400 4GB | 1FF2 17AA 1613 |
|
||||
| NVIDIA T400E | 1FF2 17AA 18FF |
|
||||
| Quadro T1000 | 1FF9 |
|
||||
| NVIDIA A100-SXM4-40GB | 20B0 |
|
||||
| NVIDIA A100-PG509-200 | 20B0 10DE 1450 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1463 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
|
||||
| NVIDIA PG509-210 | 20B2 10DE 1625 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
|
||||
| NVIDIA PG506-232 | 20B6 10DE 1492 |
|
||||
| NVIDIA A30 | 20B7 10DE 1532 |
|
||||
| NVIDIA A30 | 20B7 10DE 1804 |
|
||||
| NVIDIA A30 | 20B7 10DE 1852 |
|
||||
| NVIDIA A800-SXM4-40GB | 20BD 10DE 17F4 |
|
||||
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
|
||||
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
|
||||
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
|
||||
| NVIDIA A800 40GB Active | 20F6 1028 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 103C 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 10DE 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 17AA 180A |
|
||||
| NVIDIA AX800 | 20FD 10DE 17F8 |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2182 |
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
| NVIDIA GeForce GTX 1650 | 2188 |
|
||||
| NVIDIA CMP 30HX | 2189 |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2191 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1028 0949 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FB |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FE |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 86D6 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 8741 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 8744 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 878D |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 87AF |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 87B3 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 171F |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 17EF |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 18D1 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1414 0032 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 128A |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 128B |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12C6 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12CB |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12CC |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12D9 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 380C |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 381D |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 381E |
|
||||
| NVIDIA GeForce GTX 1650 Ti | 2192 |
|
||||
| NVIDIA GeForce GTX 1660 SUPER | 21C4 |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 21D1 |
|
||||
| NVIDIA GeForce RTX 3090 Ti | 2203 |
|
||||
| NVIDIA GeForce RTX 3090 | 2204 |
|
||||
| NVIDIA GeForce RTX 3080 | 2206 |
|
||||
| NVIDIA GeForce RTX 3070 Ti | 2207 |
|
||||
| NVIDIA GeForce RTX 3080 Ti | 2208 |
|
||||
| NVIDIA GeForce RTX 3080 | 220A |
|
||||
| NVIDIA CMP 90HX | 220D |
|
||||
| NVIDIA GeForce RTX 3080 | 2216 |
|
||||
| NVIDIA RTX A6000 | 2230 1028 1459 |
|
||||
| NVIDIA RTX A6000 | 2230 103C 1459 |
|
||||
| NVIDIA RTX A6000 | 2230 10DE 1459 |
|
||||
| NVIDIA RTX A6000 | 2230 17AA 1459 |
|
||||
| NVIDIA RTX A5000 | 2231 1028 147E |
|
||||
| NVIDIA RTX A5000 | 2231 103C 147E |
|
||||
| NVIDIA RTX A5000 | 2231 10DE 147E |
|
||||
| NVIDIA RTX A5000 | 2231 17AA 147E |
|
||||
| NVIDIA RTX A4500 | 2232 1028 163C |
|
||||
| NVIDIA RTX A4500 | 2232 103C 163C |
|
||||
| NVIDIA RTX A4500 | 2232 10DE 163C |
|
||||
| NVIDIA RTX A4500 | 2232 17AA 163C |
|
||||
| NVIDIA RTX A5500 | 2233 1028 165A |
|
||||
| NVIDIA RTX A5500 | 2233 103C 165A |
|
||||
| NVIDIA RTX A5500 | 2233 10DE 165A |
|
||||
| NVIDIA RTX A5500 | 2233 17AA 165A |
|
||||
| NVIDIA A40 | 2235 10DE 145A |
|
||||
| NVIDIA A10 | 2236 10DE 1482 |
|
||||
| NVIDIA A10G | 2237 10DE 152F |
|
||||
| NVIDIA A10M | 2238 10DE 1677 |
|
||||
| NVIDIA H100 NVL | 2321 10DE 1839 |
|
||||
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
|
||||
| NVIDIA H800 | 2324 10DE 17A6 |
|
||||
| NVIDIA H800 | 2324 10DE 17A8 |
|
||||
| NVIDIA H20 | 2329 10DE 198B |
|
||||
| NVIDIA H20 | 2329 10DE 198C |
|
||||
| NVIDIA H20-3e | 232C 10DE 2063 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H200 | 2335 10DE 18BE |
|
||||
| NVIDIA H200 | 2335 10DE 18BF |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA H800 NVL | 233A 10DE 183A |
|
||||
| NVIDIA H200 NVL | 233B 10DE 1996 |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 16EB |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 1805 |
|
||||
| NVIDIA GH200 480GB | 2342 10DE 1809 |
|
||||
| NVIDIA GH200 144G HBM3e | 2348 10DE 18D2 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
| NVIDIA RTX A5500 Laptop GPU | 2438 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2460 |
|
||||
| NVIDIA GeForce RTX 3070 Ti | 2482 |
|
||||
| NVIDIA GeForce RTX 3070 | 2484 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2486 |
|
||||
| NVIDIA GeForce RTX 3060 | 2487 |
|
||||
| NVIDIA GeForce RTX 3070 | 2488 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2489 |
|
||||
| NVIDIA CMP 70HX | 248A |
|
||||
| NVIDIA GeForce RTX 3080 Laptop GPU | 249C |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 249C 1D05 1194 |
|
||||
| NVIDIA GeForce RTX 3070 Laptop GPU | 249D |
|
||||
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24A0 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 24A0 1D05 1192 |
|
||||
| NVIDIA RTX A4000 | 24B0 1028 14AD |
|
||||
| NVIDIA RTX A4000 | 24B0 103C 14AD |
|
||||
| NVIDIA RTX A4000 | 24B0 10DE 14AD |
|
||||
| NVIDIA RTX A4000 | 24B0 17AA 14AD |
|
||||
| NVIDIA RTX A4000H | 24B1 10DE 1658 |
|
||||
| NVIDIA RTX A5000 Laptop GPU | 24B6 |
|
||||
| NVIDIA RTX A4000 Laptop GPU | 24B7 |
|
||||
| NVIDIA RTX A3000 Laptop GPU | 24B8 |
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
|
||||
| NVIDIA RTX A4500 Laptop GPU | 24BA |
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
|
||||
| NVIDIA GeForce RTX 3060 | 24C7 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
|
||||
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
|
||||
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
|
||||
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24E0 |
|
||||
| NVIDIA RTX A4500 Embedded GPU | 24FA |
|
||||
| NVIDIA GeForce RTX 3060 | 2503 |
|
||||
| NVIDIA GeForce RTX 3060 | 2504 |
|
||||
| NVIDIA GeForce RTX 3050 | 2507 |
|
||||
| NVIDIA GeForce RTX 3050 OEM | 2508 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 2520 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 2521 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2523 |
|
||||
| NVIDIA RTX A2000 | 2531 1028 151D |
|
||||
| NVIDIA RTX A2000 | 2531 103C 151D |
|
||||
| NVIDIA RTX A2000 | 2531 10DE 151D |
|
||||
| NVIDIA RTX A2000 | 2531 17AA 151D |
|
||||
| NVIDIA GeForce RTX 3060 | 2544 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 2560 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2563 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 1028 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 103C 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
|
||||
| NVIDIA GeForce RTX 3050 | 2582 |
|
||||
| NVIDIA GeForce RTX 3050 | 2584 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 25A0 1D05 1196 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25A2 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A2 1028 0BAF |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 25A2 1D05 1195 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25A5 |
|
||||
| NVIDIA GeForce MX570 | 25A6 |
|
||||
| NVIDIA GeForce RTX 2050 | 25A7 |
|
||||
| NVIDIA GeForce RTX 2050 | 25A9 |
|
||||
| NVIDIA GeForce MX570 A | 25AA |
|
||||
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
|
||||
| NVIDIA GeForce RTX 2050 | 25AD |
|
||||
| NVIDIA RTX A1000 | 25B0 1028 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 103C 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 103C 8D96 |
|
||||
| NVIDIA RTX A1000 | 25B0 10DE 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 17AA 1878 |
|
||||
| NVIDIA RTX A400 | 25B2 1028 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 103C 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 103C 8D95 |
|
||||
| NVIDIA RTX A400 | 25B2 10DE 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 17AA 1879 |
|
||||
| NVIDIA A16 | 25B6 10DE 14A9 |
|
||||
| NVIDIA A2 | 25B6 10DE 157E |
|
||||
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
|
||||
| NVIDIA RTX A1000 Laptop GPU | 25B9 |
|
||||
| NVIDIA RTX A2000 8GB Laptop GPU | 25BA |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BB |
|
||||
| NVIDIA RTX A1000 6GB Laptop GPU | 25BC |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BD |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25EC |
|
||||
| NVIDIA GeForce RTX 2050 | 25ED |
|
||||
| NVIDIA RTX A1000 Embedded GPU | 25F9 |
|
||||
| NVIDIA RTX A2000 Embedded GPU | 25FA |
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA GeForce RTX 4090 D | 2685 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2689 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 1028 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 1028 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 103C 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 10DE 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 17AA 1934 |
|
||||
| NVIDIA L40 | 26B5 10DE 169D |
|
||||
| NVIDIA L40 | 26B5 10DE 17DA |
|
||||
| NVIDIA L40S | 26B9 10DE 1851 |
|
||||
| NVIDIA L40S | 26B9 10DE 18CF |
|
||||
| NVIDIA L20 | 26BA 10DE 1957 |
|
||||
| NVIDIA L20 | 26BA 10DE 1990 |
|
||||
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
|
||||
| NVIDIA GeForce RTX 4070 | 2709 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||
| NVIDIA RTX 5000 Ada Generation Embedded GPU | 2770 |
|
||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||
| NVIDIA GeForce RTX 4070 SUPER | 2783 |
|
||||
| NVIDIA GeForce RTX 4070 | 2786 |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2788 |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 1028 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 103C 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 10DE 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 17AA 180C |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 17AA 181B |
|
||||
| NVIDIA L2 | 27B6 10DE 1933 |
|
||||
| NVIDIA L4 | 27B8 10DE 16CA |
|
||||
| NVIDIA L4 | 27B8 10DE 16EE |
|
||||
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |
|
||||
| NVIDIA RTX 3500 Ada Generation Laptop GPU | 27BB |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
|
||||
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2803 |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2805 |
|
||||
| NVIDIA GeForce RTX 4060 | 2808 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 2822 |
|
||||
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||
| NVIDIA GeForce RTX 4060 | 2882 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28A3 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 103C 1871 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 10DE 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 10DE 1871 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 17AA 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 17AA 1871 |
|
||||
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
|
||||
| NVIDIA RTX 1000 Ada Generation Laptop GPU | 28B9 |
|
||||
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BA |
|
||||
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BB |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28E3 |
|
||||
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
|
||||
| NVIDIA B200 | 2901 10DE 1999 |
|
||||
| NVIDIA B200 | 2901 10DE 199B |
|
||||
| NVIDIA B200 | 2901 10DE 20DA |
|
||||
| HGX GB200 | 2941 10DE 2046 |
|
||||
| HGX GB200 | 2941 10DE 20CA |
|
||||
| HGX GB200 | 2941 10DE 20D5 |
|
||||
| HGX GB200 | 2941 10DE 21C9 |
|
||||
| HGX GB200 | 2941 10DE 21CA |
|
||||
| NVIDIA GeForce RTX 5090 | 2B85 |
|
||||
| NVIDIA GeForce RTX 5090 D | 2B87 |
|
||||
| NVIDIA GeForce RTX 5080 | 2C02 |
|
||||
| NVIDIA GeForce RTX 5070 Ti | 2C05 |
|
||||
| Product Name | PCI ID |
|
||||
| ------------------------------------------------------- | -------------- |
|
||||
| NVIDIA TITAN RTX | 1E02 |
|
||||
| NVIDIA GeForce RTX 2080 Ti | 1E04 |
|
||||
| NVIDIA GeForce RTX 2080 Ti | 1E07 |
|
||||
| NVIDIA CMP 50HX | 1E09 |
|
||||
| Quadro RTX 6000 | 1E30 |
|
||||
| Quadro RTX 8000 | 1E30 1028 129E |
|
||||
| Quadro RTX 8000 | 1E30 103C 129E |
|
||||
| Quadro RTX 8000 | 1E30 10DE 129E |
|
||||
| Quadro RTX 6000 | 1E36 |
|
||||
| Quadro RTX 8000 | 1E78 10DE 13D8 |
|
||||
| Quadro RTX 6000 | 1E78 10DE 13D9 |
|
||||
| NVIDIA GeForce RTX 2080 SUPER | 1E81 |
|
||||
| NVIDIA GeForce RTX 2080 | 1E82 |
|
||||
| NVIDIA GeForce RTX 2070 SUPER | 1E84 |
|
||||
| NVIDIA GeForce RTX 2080 | 1E87 |
|
||||
| NVIDIA GeForce RTX 2060 | 1E89 |
|
||||
| NVIDIA GeForce RTX 2080 | 1E90 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1025 1375 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08A1 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08A2 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EA |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EB |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EC |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08ED |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EE |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EF |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 093B |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 093C |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8572 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8573 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8602 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8606 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 86C6 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 86C7 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 87A6 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 87A7 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 131F |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 137F |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 141F |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 1751 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1660 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1661 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1662 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 75A6 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 75A7 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 86A6 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 86A7 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1462 1274 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1462 1277 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 152D 1220 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1558 95E1 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1558 97E1 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2002 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2005 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2007 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 3000 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 3001 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1D05 1069 |
|
||||
| NVIDIA GeForce RTX 2070 Super | 1E91 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8607 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8736 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8738 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8772 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 878A |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 878B |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1043 1E61 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 1511 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 75B3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 75B4 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 76B2 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 76B3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 78A2 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 78A3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 86B2 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 86B3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12AE |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12B0 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12C6 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 17AA 22C3 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 17AA 22C5 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 2009 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 200A |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 3002 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 8086 3012 |
|
||||
| NVIDIA GeForce RTX 2080 Super | 1E93 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1025 1401 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1025 149C |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1028 09D2 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8607 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 86C7 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8736 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8738 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8772 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 87A6 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 87A7 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 75B1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 75B2 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 76B0 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 76B1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 78A0 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 78A1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 86B0 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 86B1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12AE |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12B0 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12B4 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12C6 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1558 50D3 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1558 70D1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 17AA 22C3 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 17AA 22C5 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 2009 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 200A |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 3002 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1D05 1089 |
|
||||
| Quadro RTX 5000 | 1EB0 |
|
||||
| Quadro RTX 4000 | 1EB1 |
|
||||
| Quadro RTX 5000 | 1EB5 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1375 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1401 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 149C |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1028 09C3 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8736 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8738 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8772 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8780 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8782 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8783 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8785 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1043 1DD1 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 1274 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 12B0 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 12C6 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 17AA 22B8 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 17AA 22BA |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2005 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2007 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2008 |
|
||||
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 200A |
|
||||
| Quadro RTX 4000 | 1EB6 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1028 09C3 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8736 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8738 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8772 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8780 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8782 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8783 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8785 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 1274 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 1277 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 12B0 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 12C6 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22B8 |
|
||||
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22BA |
|
||||
| NVIDIA GeForce RTX 2070 SUPER | 1EC2 |
|
||||
| NVIDIA GeForce RTX 2070 SUPER | 1EC7 |
|
||||
| NVIDIA GeForce RTX 2080 | 1ED0 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1025 132D |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08ED |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08EE |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08EF |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8572 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8573 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8600 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8605 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1043 138F |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1043 15C1 |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 17AA 3FEE |
|
||||
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 17AA 3FFE |
|
||||
| NVIDIA GeForce RTX 2070 Super | 1ED1 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 1025 1432 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 103C 8746 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 103C 878A |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 1043 165F |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 144D C192 |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FCE |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FCF |
|
||||
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FD0 |
|
||||
| NVIDIA GeForce RTX 2080 Super | 1ED3 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1025 1432 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1028 09D1 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 103C 8746 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 103C 878A |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1D61 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1E51 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1F01 |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FCE |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FCF |
|
||||
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FD0 |
|
||||
| Quadro RTX 5000 | 1EF5 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F02 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F03 |
|
||||
| NVIDIA GeForce RTX 2060 SUPER | 1F06 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F07 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F08 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F0A |
|
||||
| NVIDIA CMP 40HX | 1F0B |
|
||||
| NVIDIA GeForce RTX 2070 | 1F10 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 132D |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 1342 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08A1 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08A2 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EA |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EB |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EC |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08ED |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EF |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 093B |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 093C |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8572 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8573 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8602 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8606 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 132F |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 136F |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 1881 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 1E6E |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1658 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1663 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1664 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 75A4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 75A5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 86A4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 86A5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1462 1274 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1462 1277 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1558 95E1 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1558 97E1 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2002 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2005 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2007 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 3000 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 3001 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 105E |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 1070 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 2087 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 8086 2087 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F11 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F12 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1028 098F |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 8741 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 8744 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 878E |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 880E |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1043 1E11 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1043 1F11 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1462 12D9 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3801 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3802 |
|
||||
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3803 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F14 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1401 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1432 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1442 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1446 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 147D |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1028 09E2 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1028 09F3 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8607 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 86C6 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 86C7 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8736 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8738 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8746 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8772 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 878A |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 878B |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 87A6 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 87A7 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1043 174F |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 1512 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 75B5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 75B6 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 76B4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 76B5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 78A4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 78A5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 86B4 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 86B5 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12AE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12B0 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12C6 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1558 50D3 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1558 70D1 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 200C |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 2011 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 3002 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F15 |
|
||||
| Quadro RTX 3000 | 1F36 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 1028 0990 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8736 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8738 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8772 |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 1043 13CF |
|
||||
| Quadro RTX 3000 with Max-Q Design | 1F36 1414 0032 |
|
||||
| NVIDIA GeForce RTX 2060 SUPER | 1F42 |
|
||||
| NVIDIA GeForce RTX 2060 SUPER | 1F47 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F50 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08ED |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08EE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08EF |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8572 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8573 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8574 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8600 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8605 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 17AA 3FEE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 17AA 3FFE |
|
||||
| NVIDIA GeForce RTX 2060 | 1F51 |
|
||||
| NVIDIA GeForce RTX 2070 | 1F54 |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 103C 878A |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FCE |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FCF |
|
||||
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FD0 |
|
||||
| NVIDIA GeForce RTX 2060 | 1F55 |
|
||||
| Quadro RTX 3000 | 1F76 |
|
||||
| Matrox D-Series D2450 | 1F76 102B 2800 |
|
||||
| Matrox D-Series D2480 | 1F76 102B 2900 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F82 |
|
||||
| NVIDIA GeForce GTX 1630 | 1F83 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F91 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 863E |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 86E7 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 86E8 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1043 12CF |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1043 156F |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1414 0032 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 144D C822 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 127E |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1281 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1284 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1285 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 129C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 229F |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3802 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3806 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3F1A |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1A58 1001 |
|
||||
| NVIDIA GeForce GTX 1650 Ti | 1F95 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 1479 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147A |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147B |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147C |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 86E7 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 86E8 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 8815 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1043 1DFF |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1043 1E1F |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 144D C838 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12BD |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12C5 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12D2 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 22C0 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 22C1 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 3837 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 3F95 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1003 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1006 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1007 |
|
||||
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1E83 3E30 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F96 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F96 1462 1297 |
|
||||
| NVIDIA GeForce MX450 | 1F97 |
|
||||
| NVIDIA GeForce MX450 | 1F98 |
|
||||
| NVIDIA GeForce GTX 1650 | 1F99 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 1479 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147A |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147B |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 103C 8815 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 13B2 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 1402 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 1902 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12BD |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12C5 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12D2 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 17AA 22DA |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 17AA 3F93 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1E83 3E30 |
|
||||
| NVIDIA GeForce MX450 | 1F9C |
|
||||
| NVIDIA GeForce GTX 1650 | 1F9D |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 128D |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 130D |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 149C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 185C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 189C |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 12F4 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 1302 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 131B |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 1326 |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 132A |
|
||||
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 132E |
|
||||
| NVIDIA GeForce MX550 | 1F9F |
|
||||
| NVIDIA GeForce MX550 | 1FA0 |
|
||||
| NVIDIA T1000 | 1FB0 1028 12DB |
|
||||
| NVIDIA T1000 | 1FB0 103C 12DB |
|
||||
| NVIDIA T1000 | 1FB0 103C 8A80 |
|
||||
| NVIDIA T1000 | 1FB0 10DE 12DB |
|
||||
| NVIDIA DGX Display | 1FB0 10DE 1485 |
|
||||
| NVIDIA T1000 | 1FB0 17AA 12DB |
|
||||
| NVIDIA T600 | 1FB1 1028 1488 |
|
||||
| NVIDIA T600 | 1FB1 103C 1488 |
|
||||
| NVIDIA T600 | 1FB1 103C 8A80 |
|
||||
| NVIDIA T600 | 1FB1 10DE 1488 |
|
||||
| NVIDIA T600 | 1FB1 17AA 1488 |
|
||||
| NVIDIA T400 | 1FB2 1028 1489 |
|
||||
| NVIDIA T400 | 1FB2 103C 1489 |
|
||||
| NVIDIA T400 | 1FB2 103C 8A80 |
|
||||
| NVIDIA T400 | 1FB2 10DE 1489 |
|
||||
| NVIDIA T400 | 1FB2 17AA 1489 |
|
||||
| NVIDIA T600 Laptop GPU | 1FB6 |
|
||||
| NVIDIA T550 Laptop GPU | 1FB7 |
|
||||
| Quadro T2000 | 1FB8 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 1028 097E |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8736 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8738 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8772 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8780 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8782 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8783 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 8785 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 103C 87F0 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 1462 1281 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 1462 12BD |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 17AA 22C0 |
|
||||
| Quadro T2000 with Max-Q Design | 1FB8 17AA 22C1 |
|
||||
| Quadro T1000 | 1FB9 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1025 1479 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1025 147A |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1025 147B |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1025 147C |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8736 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8738 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8772 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8780 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8782 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8783 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 8785 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 103C 87F0 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 1462 12BD |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 17AA 22C0 |
|
||||
| Quadro T1000 with Max-Q Design | 1FB9 17AA 22C1 |
|
||||
| NVIDIA T600 Laptop GPU | 1FBA |
|
||||
| NVIDIA T500 | 1FBB |
|
||||
| NVIDIA T1200 Laptop GPU | 1FBC |
|
||||
| NVIDIA GeForce GTX 1650 | 1FDD |
|
||||
| NVIDIA T1000 8GB | 1FF0 1028 1612 |
|
||||
| NVIDIA T1000 8GB | 1FF0 103C 1612 |
|
||||
| NVIDIA T1000 8GB | 1FF0 103C 8A80 |
|
||||
| NVIDIA T1000 8GB | 1FF0 10DE 1612 |
|
||||
| NVIDIA T1000 8GB | 1FF0 17AA 1612 |
|
||||
| NVIDIA T400 4GB | 1FF2 1028 1613 |
|
||||
| NVIDIA T400 4GB | 1FF2 103C 1613 |
|
||||
| NVIDIA T400E | 1FF2 103C 18FF |
|
||||
| NVIDIA T400 4GB | 1FF2 103C 8A80 |
|
||||
| NVIDIA T400 4GB | 1FF2 10DE 1613 |
|
||||
| NVIDIA T400E | 1FF2 10DE 18FF |
|
||||
| NVIDIA T400 4GB | 1FF2 17AA 1613 |
|
||||
| NVIDIA T400E | 1FF2 17AA 18FF |
|
||||
| Quadro T1000 | 1FF9 |
|
||||
| NVIDIA A100-SXM4-40GB | 20B0 |
|
||||
| NVIDIA A100-PG509-200 | 20B0 10DE 1450 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1463 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
|
||||
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
|
||||
| NVIDIA PG509-210 | 20B2 10DE 1625 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
|
||||
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
|
||||
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
|
||||
| NVIDIA PG506-232 | 20B6 10DE 1492 |
|
||||
| NVIDIA A30 | 20B7 10DE 1532 |
|
||||
| NVIDIA A30 | 20B7 10DE 1804 |
|
||||
| NVIDIA A30 | 20B7 10DE 1852 |
|
||||
| NVIDIA A800-SXM4-40GB | 20BD 10DE 17F4 |
|
||||
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
|
||||
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
|
||||
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
|
||||
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
|
||||
| NVIDIA A800 40GB Active | 20F6 1028 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 103C 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 10DE 180A |
|
||||
| NVIDIA A800 40GB Active | 20F6 17AA 180A |
|
||||
| NVIDIA AX800 | 20FD 10DE 17F8 |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2182 |
|
||||
| NVIDIA GeForce GTX 1660 | 2184 |
|
||||
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
|
||||
| NVIDIA GeForce GTX 1650 | 2188 |
|
||||
| NVIDIA CMP 30HX | 2189 |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 2191 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1028 0949 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FB |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FE |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 86D6 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 8741 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 8744 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 878D |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 87AF |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 87B3 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 171F |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 17EF |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 18D1 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1414 0032 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 128A |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 128B |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12C6 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12CB |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12CC |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12D9 |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 380C |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 381D |
|
||||
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 381E |
|
||||
| NVIDIA GeForce GTX 1650 Ti | 2192 |
|
||||
| NVIDIA GeForce GTX 1660 SUPER | 21C4 |
|
||||
| NVIDIA GeForce GTX 1660 Ti | 21D1 |
|
||||
| NVIDIA GeForce RTX 3090 Ti | 2203 |
|
||||
| NVIDIA GeForce RTX 3090 | 2204 |
|
||||
| NVIDIA GeForce RTX 3080 | 2206 |
|
||||
| NVIDIA GeForce RTX 3070 Ti | 2207 |
|
||||
| NVIDIA GeForce RTX 3080 Ti | 2208 |
|
||||
| NVIDIA GeForce RTX 3080 | 220A |
|
||||
| NVIDIA CMP 90HX | 220D |
|
||||
| NVIDIA GeForce RTX 3080 | 2216 |
|
||||
| NVIDIA RTX A6000 | 2230 1028 1459 |
|
||||
| NVIDIA RTX A6000 | 2230 103C 1459 |
|
||||
| NVIDIA RTX A6000 | 2230 10DE 1459 |
|
||||
| NVIDIA RTX A6000 | 2230 17AA 1459 |
|
||||
| NVIDIA RTX A5000 | 2231 1028 147E |
|
||||
| NVIDIA RTX A5000 | 2231 103C 147E |
|
||||
| NVIDIA RTX A5000 | 2231 10DE 147E |
|
||||
| NVIDIA RTX A5000 | 2231 17AA 147E |
|
||||
| NVIDIA RTX A4500 | 2232 1028 163C |
|
||||
| NVIDIA RTX A4500 | 2232 103C 163C |
|
||||
| NVIDIA RTX A4500 | 2232 10DE 163C |
|
||||
| NVIDIA RTX A4500 | 2232 17AA 163C |
|
||||
| NVIDIA RTX A5500 | 2233 1028 165A |
|
||||
| NVIDIA RTX A5500 | 2233 103C 165A |
|
||||
| NVIDIA RTX A5500 | 2233 10DE 165A |
|
||||
| NVIDIA RTX A5500 | 2233 17AA 165A |
|
||||
| NVIDIA A40 | 2235 10DE 145A |
|
||||
| NVIDIA A10 | 2236 10DE 1482 |
|
||||
| NVIDIA A10G | 2237 10DE 152F |
|
||||
| NVIDIA A10M | 2238 10DE 1677 |
|
||||
| NVIDIA H100 NVL | 2321 10DE 1839 |
|
||||
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
|
||||
| NVIDIA H800 | 2324 10DE 17A6 |
|
||||
| NVIDIA H800 | 2324 10DE 17A8 |
|
||||
| NVIDIA H20 | 2329 10DE 198B |
|
||||
| NVIDIA H20 | 2329 10DE 198C |
|
||||
| NVIDIA H20-3e | 232C 10DE 2063 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
|
||||
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
|
||||
| NVIDIA H100 PCIe | 2331 10DE 1626 |
|
||||
| NVIDIA H200 | 2335 10DE 18BE |
|
||||
| NVIDIA H200 | 2335 10DE 18BF |
|
||||
| NVIDIA H100 | 2339 10DE 17FC |
|
||||
| NVIDIA H800 NVL | 233A 10DE 183A |
|
||||
| NVIDIA H200 NVL | 233B 10DE 1996 |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 16EB |
|
||||
| NVIDIA GH200 120GB | 2342 10DE 1805 |
|
||||
| NVIDIA GH200 480GB | 2342 10DE 1809 |
|
||||
| NVIDIA GH200 144G HBM3e | 2348 10DE 18D2 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2414 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
|
||||
| NVIDIA RTX A5500 Laptop GPU | 2438 |
|
||||
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2460 |
|
||||
| NVIDIA GeForce RTX 3070 Ti | 2482 |
|
||||
| NVIDIA GeForce RTX 3070 | 2484 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2486 |
|
||||
| NVIDIA GeForce RTX 3060 | 2487 |
|
||||
| NVIDIA GeForce RTX 3070 | 2488 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 2489 |
|
||||
| NVIDIA CMP 70HX | 248A |
|
||||
| NVIDIA GeForce RTX 3080 Laptop GPU | 249C |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 249C 1D05 1194 |
|
||||
| NVIDIA GeForce RTX 3070 Laptop GPU | 249D |
|
||||
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24A0 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 24A0 1D05 1192 |
|
||||
| NVIDIA RTX A4000 | 24B0 1028 14AD |
|
||||
| NVIDIA RTX A4000 | 24B0 103C 14AD |
|
||||
| NVIDIA RTX A4000 | 24B0 10DE 14AD |
|
||||
| NVIDIA RTX A4000 | 24B0 17AA 14AD |
|
||||
| NVIDIA RTX A4000H | 24B1 10DE 1658 |
|
||||
| NVIDIA RTX A5000 Laptop GPU | 24B6 |
|
||||
| NVIDIA RTX A4000 Laptop GPU | 24B7 |
|
||||
| NVIDIA RTX A3000 Laptop GPU | 24B8 |
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
|
||||
| NVIDIA RTX A4500 Laptop GPU | 24BA |
|
||||
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
|
||||
| NVIDIA GeForce RTX 3060 | 24C7 |
|
||||
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
|
||||
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
|
||||
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
|
||||
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24E0 |
|
||||
| NVIDIA RTX A4500 Embedded GPU | 24FA |
|
||||
| NVIDIA GeForce RTX 3060 | 2503 |
|
||||
| NVIDIA GeForce RTX 3060 | 2504 |
|
||||
| NVIDIA GeForce RTX 3050 | 2507 |
|
||||
| NVIDIA GeForce RTX 3050 OEM | 2508 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 2520 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 2521 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2523 |
|
||||
| NVIDIA RTX A2000 | 2531 1028 151D |
|
||||
| NVIDIA RTX A2000 | 2531 103C 151D |
|
||||
| NVIDIA RTX A2000 | 2531 10DE 151D |
|
||||
| NVIDIA RTX A2000 | 2531 17AA 151D |
|
||||
| NVIDIA GeForce RTX 3060 | 2544 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 2560 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2563 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 1028 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 103C 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
|
||||
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
|
||||
| NVIDIA GeForce RTX 3050 | 2582 |
|
||||
| NVIDIA GeForce RTX 3050 | 2584 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
|
||||
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 25A0 1D05 1196 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25A2 |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A2 1028 0BAF |
|
||||
| NVIDIA GeForce RTX 3060 Laptop GPU | 25A2 1D05 1195 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25A5 |
|
||||
| NVIDIA GeForce MX570 | 25A6 |
|
||||
| NVIDIA GeForce RTX 2050 | 25A7 |
|
||||
| NVIDIA GeForce RTX 2050 | 25A9 |
|
||||
| NVIDIA GeForce MX570 A | 25AA |
|
||||
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
|
||||
| NVIDIA GeForce RTX 2050 | 25AD |
|
||||
| NVIDIA RTX A1000 | 25B0 1028 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 103C 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 103C 8D96 |
|
||||
| NVIDIA RTX A1000 | 25B0 10DE 1878 |
|
||||
| NVIDIA RTX A1000 | 25B0 17AA 1878 |
|
||||
| NVIDIA RTX A400 | 25B2 1028 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 103C 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 103C 8D95 |
|
||||
| NVIDIA RTX A400 | 25B2 10DE 1879 |
|
||||
| NVIDIA RTX A400 | 25B2 17AA 1879 |
|
||||
| NVIDIA A16 | 25B6 10DE 14A9 |
|
||||
| NVIDIA A2 | 25B6 10DE 157E |
|
||||
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
|
||||
| NVIDIA RTX A1000 Laptop GPU | 25B9 |
|
||||
| NVIDIA RTX A2000 8GB Laptop GPU | 25BA |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BB |
|
||||
| NVIDIA RTX A1000 6GB Laptop GPU | 25BC |
|
||||
| NVIDIA RTX A500 Laptop GPU | 25BD |
|
||||
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
|
||||
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
|
||||
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25EC |
|
||||
| NVIDIA GeForce RTX 2050 | 25ED |
|
||||
| NVIDIA RTX A1000 Embedded GPU | 25F9 |
|
||||
| NVIDIA RTX A2000 Embedded GPU | 25FA |
|
||||
| NVIDIA RTX A500 Embedded GPU | 25FB |
|
||||
| NVIDIA GeForce RTX 4090 | 2684 |
|
||||
| NVIDIA GeForce RTX 4090 D | 2685 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2689 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
|
||||
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 1028 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
|
||||
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 1028 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 103C 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 10DE 1934 |
|
||||
| NVIDIA RTX 5880 Ada Generation | 26B3 17AA 1934 |
|
||||
| NVIDIA L40 | 26B5 10DE 169D |
|
||||
| NVIDIA L40 | 26B5 10DE 17DA |
|
||||
| NVIDIA L40S | 26B9 10DE 1851 |
|
||||
| NVIDIA L40S | 26B9 10DE 18CF |
|
||||
| NVIDIA L20 | 26BA 10DE 1957 |
|
||||
| NVIDIA L20 | 26BA 10DE 1990 |
|
||||
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
|
||||
| NVIDIA GeForce RTX 4080 | 2704 |
|
||||
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
|
||||
| NVIDIA GeForce RTX 4070 | 2709 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
|
||||
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
|
||||
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
|
||||
| NVIDIA RTX 5000 Ada Generation Embedded GPU | 2770 |
|
||||
| NVIDIA GeForce RTX 4070 Ti | 2782 |
|
||||
| NVIDIA GeForce RTX 4070 SUPER | 2783 |
|
||||
| NVIDIA GeForce RTX 4070 | 2786 |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2788 |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
|
||||
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 1028 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 103C 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 10DE 180C |
|
||||
| NVIDIA RTX 4500 Ada Generation | 27B1 17AA 180C |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
|
||||
| NVIDIA RTX 4000 Ada Generation | 27B2 17AA 181B |
|
||||
| NVIDIA L2 | 27B6 10DE 1933 |
|
||||
| NVIDIA L4 | 27B8 10DE 16CA |
|
||||
| NVIDIA L4 | 27B8 10DE 16EE |
|
||||
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |
|
||||
| NVIDIA RTX 3500 Ada Generation Laptop GPU | 27BB |
|
||||
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
|
||||
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2803 |
|
||||
| NVIDIA GeForce RTX 4060 Ti | 2805 |
|
||||
| NVIDIA GeForce RTX 4060 | 2808 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 2822 |
|
||||
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
|
||||
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
|
||||
| NVIDIA GeForce RTX 4060 | 2882 |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28A3 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 103C 1871 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 10DE 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 10DE 1871 |
|
||||
| NVIDIA RTX 2000 Ada Generation | 28B0 17AA 1870 |
|
||||
| NVIDIA RTX 2000E Ada Generation | 28B0 17AA 1871 |
|
||||
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
|
||||
| NVIDIA RTX 1000 Ada Generation Laptop GPU | 28B9 |
|
||||
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BA |
|
||||
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BB |
|
||||
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
|
||||
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
|
||||
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28E3 |
|
||||
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
|
||||
| NVIDIA B200 | 2901 10DE 1999 |
|
||||
| NVIDIA B200 | 2901 10DE 199B |
|
||||
| NVIDIA B200 | 2901 10DE 20DA |
|
||||
| NVIDIA HGX GB200 | 2941 10DE 2046 |
|
||||
| NVIDIA HGX GB200 | 2941 10DE 20CA |
|
||||
| NVIDIA HGX GB200 | 2941 10DE 20D5 |
|
||||
| NVIDIA HGX GB200 | 2941 10DE 21C9 |
|
||||
| NVIDIA HGX GB200 | 2941 10DE 21CA |
|
||||
| NVIDIA GeForce RTX 5090 | 2B85 |
|
||||
| NVIDIA GeForce RTX 5090 D | 2B87 |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 1028 204B |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 103C 204B |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 10DE 204B |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 17AA 204B |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 1028 204C |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C |
|
||||
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 17AA 204C |
|
||||
| NVIDIA GeForce RTX 5080 | 2C02 |
|
||||
| NVIDIA GeForce RTX 5070 Ti | 2C05 |
|
||||
| NVIDIA GeForce RTX 5090 Laptop GPU | 2C18 |
|
||||
| NVIDIA GeForce RTX 5080 Laptop GPU | 2C19 |
|
||||
| NVIDIA GeForce RTX 5090 Laptop GPU | 2C58 |
|
||||
| NVIDIA GeForce RTX 5080 Laptop GPU | 2C59 |
|
||||
| NVIDIA GeForce RTX 5070 | 2F04 |
|
||||
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 |
|
||||
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F58 |
|
||||
|
@ -86,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
|
||||
EXTRA_CFLAGS += -I$(src)
|
||||
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
|
||||
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.124.06\"
|
||||
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.133.07\"
|
||||
|
||||
ifneq ($(SYSSRCHOST1X),)
|
||||
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)
|
||||
|
@ -498,6 +498,9 @@ typedef struct nv_state_t
|
||||
NvU32 dispIsoStreamId;
|
||||
NvU32 dispNisoStreamId;
|
||||
} iommus;
|
||||
|
||||
/* Console is managed by drm drivers or NVKMS */
|
||||
NvBool client_managed_console;
|
||||
} nv_state_t;
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
@ -542,9 +545,9 @@ typedef struct UvmGpuNvlinkInfo_tag *nvgpuNvlinkInfo_t;
|
||||
typedef struct UvmGpuEccInfo_tag *nvgpuEccInfo_t;
|
||||
typedef struct UvmGpuFaultInfo_tag *nvgpuFaultInfo_t;
|
||||
typedef struct UvmGpuAccessCntrInfo_tag *nvgpuAccessCntrInfo_t;
|
||||
typedef struct UvmGpuAccessCntrConfig_tag *nvgpuAccessCntrConfig_t;
|
||||
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
|
||||
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
|
||||
typedef struct UvmGpuAccessCntrConfig_tag nvgpuAccessCntrConfig_t;
|
||||
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
|
||||
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
|
||||
typedef struct UvmPmaAllocationOptions_tag *nvgpuPmaAllocationOptions_t;
|
||||
typedef struct UvmPmaStatistics_tag *nvgpuPmaStatistics_t;
|
||||
typedef struct UvmGpuMemoryInfo_tag *nvgpuMemoryInfo_t;
|
||||
|
@ -1056,7 +1056,7 @@ NV_STATUS nvUvmInterfaceDestroyAccessCntrInfo(uvmGpuDeviceHandle device,
|
||||
*/
|
||||
NV_STATUS nvUvmInterfaceEnableAccessCntr(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessCntrInfo *pAccessCntrInfo,
|
||||
UvmGpuAccessCntrConfig *pAccessCntrConfig);
|
||||
const UvmGpuAccessCntrConfig *pAccessCntrConfig);
|
||||
|
||||
/*******************************************************************************
|
||||
nvUvmInterfaceDisableAccessCntr
|
||||
|
@ -1103,24 +1103,9 @@ typedef enum
|
||||
UVM_ACCESS_COUNTER_GRANULARITY_16G = 4,
|
||||
} UVM_ACCESS_COUNTER_GRANULARITY;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_NONE = 1,
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_QTR = 2,
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_HALF = 3,
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT_FULL = 4,
|
||||
} UVM_ACCESS_COUNTER_USE_LIMIT;
|
||||
|
||||
typedef struct UvmGpuAccessCntrConfig_tag
|
||||
{
|
||||
NvU32 mimcGranularity;
|
||||
|
||||
NvU32 momcGranularity;
|
||||
|
||||
NvU32 mimcUseLimit;
|
||||
|
||||
NvU32 momcUseLimit;
|
||||
|
||||
NvU32 granularity;
|
||||
NvU32 threshold;
|
||||
} UvmGpuAccessCntrConfig;
|
||||
|
||||
|
@ -159,6 +159,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabri
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
|
||||
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
|
||||
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
|
||||
NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR, 0x00000086, "An error occurred while trying to retire a resource")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
@ -81,7 +81,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
|
||||
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);
|
||||
|
@ -5289,6 +5289,45 @@ compile_test() {
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
follow_pte_arg_vma)
|
||||
#
|
||||
# Determine if the first argument of follow_pte is
|
||||
# mm_struct or vm_area_struct.
|
||||
#
|
||||
# The first argument was changed from mm_struct to vm_area_struct by
|
||||
# commit 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte()")
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
|
||||
typeof(follow_pte) conftest_follow_pte_has_vma_arg;
|
||||
int conftest_follow_pte_has_vma_arg(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
pte_t **ptep,
|
||||
spinlock_t **ptl) {
|
||||
return 0;
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_FOLLOW_PTE_ARG1_VMA" "" "types"
|
||||
;;
|
||||
|
||||
ptep_get)
|
||||
#
|
||||
# Determine if ptep_get() is present.
|
||||
#
|
||||
# ptep_get() was added by commit 481e980a7c19
|
||||
# ("mm: Allow arches to provide ptep_get()")
|
||||
#
|
||||
CODE="
|
||||
#include <linux/mm.h>
|
||||
void conftest_ptep_get(void) {
|
||||
ptep_get();
|
||||
}"
|
||||
|
||||
compile_check_conftest "$CODE" "NV_PTEP_GET_PRESENT" "" "functions"
|
||||
;;
|
||||
|
||||
drm_plane_atomic_check_has_atomic_state_arg)
|
||||
#
|
||||
# Determine if drm_plane_helper_funcs::atomic_check takes 'state'
|
||||
|
@ -59,7 +59,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
|
||||
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c
|
||||
|
@ -240,7 +240,7 @@ static void uvm_release_deferred(void *data)
|
||||
// Since this function is only scheduled to run when uvm_release() fails
|
||||
// to trylock-acquire the pm.lock, the following acquisition attempt
|
||||
// is expected to block this thread, and cause it to remain blocked until
|
||||
// uvm_resume() releases the lock. As a result, the deferred release
|
||||
// uvm_resume() releases the lock. As a result, the deferred release
|
||||
// kthread queue may stall for long periods of time.
|
||||
uvm_down_read(&g_uvm_global.pm.lock);
|
||||
|
||||
@ -292,14 +292,14 @@ static int uvm_release(struct inode *inode, struct file *filp)
|
||||
|
||||
// Because the kernel discards the status code returned from this release
|
||||
// callback, early exit in case of a pm.lock acquisition failure is not
|
||||
// an option. Instead, the teardown work normally performed synchronously
|
||||
// an option. Instead, the teardown work normally performed synchronously
|
||||
// needs to be scheduled to run after uvm_resume() releases the lock.
|
||||
if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
|
||||
uvm_va_space_destroy(va_space);
|
||||
uvm_up_read(&g_uvm_global.pm.lock);
|
||||
}
|
||||
else {
|
||||
// Remove references to this inode from the address_space. This isn't
|
||||
// Remove references to this inode from the address_space. This isn't
|
||||
// strictly necessary, as any CPU mappings of this file have already
|
||||
// been destroyed, and va_space->mapping won't be used again. Still,
|
||||
// the va_space survives the inode if its destruction is deferred, in
|
||||
@ -867,8 +867,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
|
||||
}
|
||||
|
||||
// If the PM lock cannot be acquired, disable the VMA and report success
|
||||
// to the caller. The caller is expected to determine whether the
|
||||
// map operation succeeded via an ioctl() call. This is necessary to
|
||||
// to the caller. The caller is expected to determine whether the
|
||||
// map operation succeeded via an ioctl() call. This is necessary to
|
||||
// safely handle MAP_FIXED, which needs to complete atomically to prevent
|
||||
// the loss of the virtual address range.
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
|
||||
@ -1233,19 +1233,8 @@ static int uvm_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
pr_info("Loaded the UVM driver, major device number %d.\n", MAJOR(g_uvm_base_dev));
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
|
||||
|
||||
// After Open RM is released, both the enclosing "#if" and this comment
|
||||
// block should be removed, because the uvm_hmm_is_enabled_system_wide()
|
||||
// check is both necessary and sufficient for reporting functionality.
|
||||
// Until that time, however, we need to avoid advertisting UVM's ability to
|
||||
// enable HMM functionality.
|
||||
|
||||
if (uvm_hmm_is_enabled_system_wide())
|
||||
UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
|
||||
UVM_INFO_PRINT("Built-in UVM tests are enabled. This is a security risk.\n");
|
||||
|
||||
return 0;
|
||||
|
||||
@ -1274,8 +1263,6 @@ static void uvm_exit(void)
|
||||
uvm_global_exit();
|
||||
|
||||
uvm_test_unload_state_exit();
|
||||
|
||||
pr_info("Unloaded the UVM driver.\n");
|
||||
}
|
||||
|
||||
static void __exit uvm_exit_entry(void)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
Copyright (c) 2021-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -38,12 +38,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
|
||||
8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Ada covers 128 TB and that's the minimum size
|
||||
@ -82,8 +80,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2018-2024 NVIDIA Corporation
|
||||
Copyright (c) 2018-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -38,12 +38,10 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Ampere covers 128 TB and that's the minimum
|
||||
@ -86,8 +84,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2023 NVIDIA Corporation
|
||||
Copyright (c) 2024-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -139,9 +139,9 @@ static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate;
|
||||
|
||||
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.replayable.ats_invalidate;
|
||||
else
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
|
||||
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.non_replayable.ats_invalidate;
|
||||
|
||||
if (!ats_invalidate->tlb_batch_pending) {
|
||||
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);
|
||||
|
@ -38,12 +38,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Blackwell covers 64 PB and that's the minimum
|
||||
@ -85,8 +83,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -110,16 +110,22 @@ typedef enum
|
||||
bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool)
|
||||
{
|
||||
uvm_channel_manager_t *manager = pool->manager;
|
||||
uvm_gpu_t *gpu = manager->gpu;
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
if (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU] == pool)
|
||||
return true;
|
||||
|
||||
for_each_gpu_id_in_mask(id, &manager->gpu->peer_info.peer_gpu_mask) {
|
||||
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool)
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
|
||||
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
|
||||
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool) {
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -1974,6 +1980,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu = channel_manager->gpu;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
DECLARE_BITMAP(suspended_pools, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
@ -1981,7 +1988,9 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
|
||||
// Use bitmap to track which were suspended.
|
||||
bitmap_zero(suspended_pools, channel_manager->num_channel_pools);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
|
||||
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
|
||||
if (pool && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
|
||||
status = channel_pool_suspend_p2p(pool);
|
||||
@ -2014,6 +2023,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
|
||||
void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
|
||||
{
|
||||
uvm_channel_pool_t *pool;
|
||||
uvm_gpu_t *gpu = channel_manager->gpu;
|
||||
uvm_gpu_id_t gpu_id;
|
||||
DECLARE_BITMAP(resumed_pools, UVM_COPY_ENGINE_COUNT_MAX);
|
||||
|
||||
@ -2021,7 +2031,9 @@ void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
|
||||
// Use bitmap to track which were suspended.
|
||||
bitmap_zero(resumed_pools, channel_manager->num_channel_pools);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
|
||||
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
|
||||
if (pool && !test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
|
||||
channel_pool_resume_p2p(pool);
|
||||
@ -3243,9 +3255,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;
|
||||
|
||||
if (manager->conf.num_gpfifo_entries != uvm_channel_num_gpfifo_entries) {
|
||||
pr_info("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
|
||||
uvm_channel_num_gpfifo_entries,
|
||||
manager->conf.num_gpfifo_entries);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
|
||||
uvm_channel_num_gpfifo_entries,
|
||||
manager->conf.num_gpfifo_entries);
|
||||
}
|
||||
|
||||
// 2- Allocation locations
|
||||
@ -3285,9 +3297,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
pushbuffer_loc_value = uvm_channel_pushbuffer_loc;
|
||||
if (!is_string_valid_location(pushbuffer_loc_value)) {
|
||||
pushbuffer_loc_value = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;
|
||||
pr_info("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
|
||||
uvm_channel_pushbuffer_loc,
|
||||
pushbuffer_loc_value);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
|
||||
uvm_channel_pushbuffer_loc,
|
||||
pushbuffer_loc_value);
|
||||
}
|
||||
|
||||
// Override the default value if requested by the user
|
||||
@ -3297,8 +3309,8 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
// so force the location to sys for now.
|
||||
// TODO: Bug 2904133: Remove the following "if" after the bug is fixed.
|
||||
if (NVCPU_IS_AARCH64) {
|
||||
pr_info("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
|
||||
pushbuffer_loc_value);
|
||||
UVM_INFO_PRINT("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
|
||||
pushbuffer_loc_value);
|
||||
manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
|
||||
}
|
||||
else {
|
||||
@ -3310,8 +3322,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
// Only support the knobs for GPFIFO/GPPut on Volta+
|
||||
if (!gpu->parent->gpfifo_in_vidmem_supported) {
|
||||
if (manager->conf.gpput_loc == UVM_BUFFER_LOCATION_SYS) {
|
||||
pr_info("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s instead\n",
|
||||
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
|
||||
UVM_INFO_PRINT("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s "
|
||||
"instead\n",
|
||||
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
|
||||
}
|
||||
|
||||
manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_DEFAULT;
|
||||
@ -3323,17 +3336,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
|
||||
gpfifo_loc_value = uvm_channel_gpfifo_loc;
|
||||
if (!is_string_valid_location(gpfifo_loc_value)) {
|
||||
gpfifo_loc_value = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;
|
||||
pr_info("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
|
||||
uvm_channel_gpfifo_loc,
|
||||
gpfifo_loc_value);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
|
||||
uvm_channel_gpfifo_loc,
|
||||
gpfifo_loc_value);
|
||||
}
|
||||
|
||||
gpput_loc_value = uvm_channel_gpput_loc;
|
||||
if (!is_string_valid_location(gpput_loc_value)) {
|
||||
gpput_loc_value = UVM_CHANNEL_GPPUT_LOC_DEFAULT;
|
||||
pr_info("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
|
||||
uvm_channel_gpput_loc,
|
||||
gpput_loc_value);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
|
||||
uvm_channel_gpput_loc,
|
||||
gpput_loc_value);
|
||||
}
|
||||
|
||||
// On coherent platforms where the GPU does not cache sysmem but the CPU
|
||||
|
@ -57,6 +57,7 @@ enum {
|
||||
// NULL.
|
||||
void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
|
||||
|
||||
// Long prefix - typically for debugging and tests.
|
||||
#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
|
||||
func(prefix "%s:%u %s[pid:%d]" fmt, \
|
||||
kbasename(__FILE__), \
|
||||
@ -65,10 +66,15 @@ void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
|
||||
current->pid, \
|
||||
##__VA_ARGS__)
|
||||
|
||||
// Short prefix - typically for information.
|
||||
#define UVM_PRINT_FUNC_SHORT_PREFIX(func, prefix, fmt, ...) \
|
||||
func(prefix fmt, ##__VA_ARGS__)
|
||||
|
||||
// No prefix - used by kernel panic messages.
|
||||
#define UVM_PRINT_FUNC(func, fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)
|
||||
|
||||
// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
|
||||
// Check whether UVM_{ERR,DBG)_PRINT* should be enabled.
|
||||
bool uvm_debug_prints_enabled(void);
|
||||
|
||||
// A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
|
||||
@ -80,10 +86,10 @@ bool uvm_debug_prints_enabled(void);
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define UVM_ASSERT_PRINT(fmt, ...) \
|
||||
#define UVM_ERR_PRINT_ALWAYS(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ASSERT_PRINT_RL(fmt, ...) \
|
||||
#define UVM_ERR_PRINT_ALWAYS_RL(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ERR_PRINT(fmt, ...) \
|
||||
@ -95,13 +101,16 @@ bool uvm_debug_prints_enabled(void);
|
||||
#define UVM_DBG_PRINT(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_DBG_PRINT_RL(fmt, ...) \
|
||||
#define UVM_DBG_PRINT_RL(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
// UVM_INFO_PRINT prints in all modes (including in the release mode.) It is
|
||||
// used for relaying driver-level information, rather than detailed debugging
|
||||
// information; therefore, it does not add the "pretty long prefix".
|
||||
#define UVM_INFO_PRINT(fmt, ...) \
|
||||
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
UVM_PRINT_FUNC_SHORT_PREFIX(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
|
||||
|
||||
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
|
||||
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
|
||||
UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
|
||||
|
||||
#define UVM_PANIC() UVM_PRINT_FUNC(panic, "\n")
|
||||
@ -134,13 +143,13 @@ void on_uvm_test_fail(void);
|
||||
// Unlike on_uvm_test_fail it provides 'panic' coverity semantics
|
||||
void on_uvm_assert(void);
|
||||
|
||||
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (unlikely(!(expr))) { \
|
||||
UVM_ERR_PRINT_ALWAYS("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
// Prevent function calls in expr and the print argument list from being
|
||||
@ -151,7 +160,8 @@ void on_uvm_assert(void);
|
||||
UVM_NO_PRINT(fmt, ##__VA_ARGS__); \
|
||||
} while (0)
|
||||
|
||||
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
|
||||
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
|
||||
// builds.
|
||||
#if UVM_IS_DEBUG() || defined __COVERITY__
|
||||
#define UVM_ASSERT_MSG(expr, fmt, ...) _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
|
||||
#define UVM_ASSERT(expr) _UVM_ASSERT_MSG(expr, #expr, "\n")
|
||||
@ -174,16 +184,16 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
|
||||
// Given these are enabled for release builds, we need to be more cautious than
|
||||
// in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
|
||||
// param is enabled.
|
||||
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (uvm_release_asserts && unlikely(!(expr))) { \
|
||||
UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
|
||||
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
|
||||
if (uvm_release_asserts_dump_stack) \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
|
||||
do { \
|
||||
if (uvm_release_asserts && unlikely(!(expr))) { \
|
||||
UVM_ERR_PRINT_ALWAYS_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
|
||||
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
|
||||
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
|
||||
if (uvm_release_asserts_dump_stack) \
|
||||
dump_stack(); \
|
||||
on_uvm_assert(); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
Copyright (c) 2021-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -532,7 +532,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
|
||||
|
||||
// There is no dedicated lock for the CSL context associated with replayable
|
||||
// faults. The mutual exclusion required by the RM CSL API is enforced by
|
||||
@ -571,7 +571,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
|
||||
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
|
||||
|
||||
// See comment in uvm_conf_computing_fault_decrypt
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -93,11 +93,11 @@ typedef struct uvm_service_block_context_struct uvm_service_block_context_t;
|
||||
|
||||
typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;
|
||||
|
||||
typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
|
||||
typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
|
||||
typedef struct uvm_replayable_fault_buffer_struct uvm_replayable_fault_buffer_t;
|
||||
typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_buffer_t;
|
||||
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
|
||||
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
|
||||
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
|
||||
typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
|
||||
|
||||
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;
|
||||
|
||||
|
@ -194,6 +194,12 @@ NV_STATUS uvm_global_init(void)
|
||||
goto error;
|
||||
}
|
||||
|
||||
status = uvm_access_counters_init();
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("uvm_access_counters_init failed: %s\n", nvstatusToString(status));
|
||||
goto error;
|
||||
}
|
||||
|
||||
// This sets up the ISR (interrupt service routine), by hooking into RM's
|
||||
// top-half ISR callback. As soon as this call completes, GPU interrupts
|
||||
// will start arriving, so it's important to be prepared to receive
|
||||
@ -224,8 +230,8 @@ void uvm_global_exit(void)
|
||||
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
|
||||
|
||||
uvm_unregister_callbacks();
|
||||
uvm_access_counters_exit();
|
||||
uvm_service_block_context_exit();
|
||||
|
||||
uvm_perf_heuristics_exit();
|
||||
uvm_perf_events_exit();
|
||||
uvm_migrate_exit();
|
||||
@ -287,7 +293,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
// * Flush relevant kthread queues (bottom half, etc.)
|
||||
|
||||
// Some locks acquired by this function, such as pm.lock, are released
|
||||
// by uvm_resume(). This is contrary to the lock tracking code's
|
||||
// by uvm_resume(). This is contrary to the lock tracking code's
|
||||
// expectations, so lock tracking is disabled.
|
||||
uvm_thread_context_lock_disable_tracking();
|
||||
|
||||
@ -304,7 +310,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
gpu = uvm_gpu_get(gpu_id);
|
||||
|
||||
// Since fault buffer state may be lost across sleep cycles, UVM must
|
||||
// ensure any outstanding replayable faults are dismissed. The RM
|
||||
// ensure any outstanding replayable faults are dismissed. The RM
|
||||
// guarantees that all user channels have been preempted before
|
||||
// uvm_suspend() is called, which implies that no user channels can be
|
||||
// stalled on faults when this point is reached.
|
||||
@ -330,7 +336,7 @@ static NV_STATUS uvm_suspend(void)
|
||||
}
|
||||
|
||||
// Acquire each VA space's lock in write mode to lock out VMA open and
|
||||
// release callbacks. These entry points do not have feasible early exit
|
||||
// release callbacks. These entry points do not have feasible early exit
|
||||
// options, and so aren't suitable for synchronization with pm.lock.
|
||||
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
|
||||
|
||||
@ -360,7 +366,7 @@ static NV_STATUS uvm_resume(void)
|
||||
g_uvm_global.pm.is_suspended = false;
|
||||
|
||||
// Some locks released by this function, such as pm.lock, were acquired
|
||||
// by uvm_suspend(). This is contrary to the lock tracking code's
|
||||
// by uvm_suspend(). This is contrary to the lock tracking code's
|
||||
// expectations, so lock tracking is disabled.
|
||||
uvm_thread_context_lock_disable_tracking();
|
||||
|
||||
@ -392,7 +398,7 @@ static NV_STATUS uvm_resume(void)
|
||||
uvm_thread_context_lock_enable_tracking();
|
||||
|
||||
// Force completion of any release callbacks successfully queued for
|
||||
// deferred completion while suspended. The deferred release
|
||||
// deferred completion while suspended. The deferred release
|
||||
// queue is not guaranteed to remain empty following this flush since
|
||||
// some threads that failed to acquire pm.lock in uvm_release() may
|
||||
// not have scheduled their handlers yet.
|
||||
@ -424,7 +430,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
|
||||
}
|
||||
else {
|
||||
UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
|
||||
nvstatusToString(error), nvstatusToString(previous_error));
|
||||
nvstatusToString(error),
|
||||
nvstatusToString(previous_error));
|
||||
}
|
||||
|
||||
nvUvmInterfaceReportFatalError(error);
|
||||
|
@ -538,7 +538,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
NvU64 num_pages_in;
|
||||
NvU64 num_pages_out;
|
||||
NvU64 mapped_cpu_pages_size;
|
||||
NvU32 get, put;
|
||||
NvU32 get;
|
||||
NvU32 put;
|
||||
NvU32 i;
|
||||
unsigned int cpu;
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "GPU %s\n", uvm_gpu_name(gpu));
|
||||
@ -608,19 +610,19 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
gpu->parent->isr.replayable_faults.stats.cpu_exec_count[cpu]);
|
||||
}
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_buffer_entries %u\n",
|
||||
gpu->parent->fault_buffer_info.replayable.max_faults);
|
||||
gpu->parent->fault_buffer.replayable.max_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_get %u\n",
|
||||
gpu->parent->fault_buffer_info.replayable.cached_get);
|
||||
gpu->parent->fault_buffer.replayable.cached_get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_put %u\n",
|
||||
gpu->parent->fault_buffer_info.replayable.cached_put);
|
||||
gpu->parent->fault_buffer.replayable.cached_put);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_get %u\n",
|
||||
gpu->parent->fault_buffer_hal->read_get(gpu->parent));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_put %u\n",
|
||||
gpu->parent->fault_buffer_hal->read_put(gpu->parent));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_fault_batch_size %u\n",
|
||||
gpu->parent->fault_buffer_info.max_batch_size);
|
||||
gpu->parent->fault_buffer.max_batch_size);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_replay_policy %s\n",
|
||||
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer_info.replayable.replay_policy));
|
||||
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer.replayable.replay_policy));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_num_faults %llu\n",
|
||||
gpu->parent->stats.num_replayable_faults);
|
||||
}
|
||||
@ -634,32 +636,35 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
|
||||
gpu->parent->isr.non_replayable_faults.stats.cpu_exec_count[cpu]);
|
||||
}
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_buffer_entries %u\n",
|
||||
gpu->parent->fault_buffer_info.non_replayable.max_faults);
|
||||
gpu->parent->fault_buffer.non_replayable.max_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_num_faults %llu\n",
|
||||
gpu->parent->stats.num_non_replayable_faults);
|
||||
}
|
||||
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count > 0) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh %llu\n",
|
||||
gpu->parent->isr.access_counters.stats.bottom_half_count);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh/cpu\n");
|
||||
for_each_cpu(cpu, &gpu->parent->isr.access_counters.stats.cpus_used_mask) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
|
||||
cpu,
|
||||
gpu->parent->isr.access_counters.stats.cpu_exec_count[cpu]);
|
||||
for (i = 0; i < gpu_info->accessCntrBufferCount; i++) {
|
||||
if (gpu->parent->access_counters_supported && gpu->parent->isr.access_counters[i].handling_ref_count > 0) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_notif_buffer_index %u\n", i);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh %llu\n",
|
||||
gpu->parent->isr.access_counters[i].stats.bottom_half_count);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh/cpu\n");
|
||||
for_each_cpu(cpu, &gpu->parent->isr.access_counters[i].stats.cpus_used_mask) {
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
|
||||
cpu,
|
||||
gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
|
||||
}
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_buffer_entries %u\n",
|
||||
gpu->parent->access_counter_buffer[i].max_notifications);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_get %u\n",
|
||||
gpu->parent->access_counter_buffer[i].cached_get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_put %u\n",
|
||||
gpu->parent->access_counter_buffer[i].cached_put);
|
||||
|
||||
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
|
||||
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_get %u\n", get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_put %u\n", put);
|
||||
}
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_buffer_entries %u\n",
|
||||
gpu->parent->access_counter_buffer_info.max_notifications);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_get %u\n",
|
||||
gpu->parent->access_counter_buffer_info.cached_get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_put %u\n",
|
||||
gpu->parent->access_counter_buffer_info.cached_put);
|
||||
|
||||
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferGet);
|
||||
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_get %u\n", get);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_put %u\n", put);
|
||||
}
|
||||
|
||||
num_pages_out = atomic64_read(&gpu->parent->stats.num_pages_out);
|
||||
@ -694,18 +699,18 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
|
||||
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults %llu\n", parent_gpu->stats.num_replayable_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "duplicates %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " prefetch %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_read_faults);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_read_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_write_faults);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_write_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults);
|
||||
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_in);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_atomic_faults);
|
||||
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_in);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
|
||||
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
@ -713,25 +718,25 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
|
||||
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "replays:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " start %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_replays);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_replays);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " start_ack_all %llu\n",
|
||||
parent_gpu->fault_buffer_info.replayable.stats.num_replays_ack_all);
|
||||
parent_gpu->fault_buffer.replayable.stats.num_replays_ack_all);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults %llu\n", parent_gpu->stats.num_non_replayable_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_read_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_write_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_addressing:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " virtual %llu\n",
|
||||
parent_gpu->stats.num_non_replayable_faults -
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " physical %llu\n",
|
||||
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
|
||||
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_in);
|
||||
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
|
||||
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_in);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
|
||||
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
@ -743,16 +748,25 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
|
||||
{
|
||||
NvU64 num_pages_in;
|
||||
NvU64 num_pages_out;
|
||||
NvU32 i;
|
||||
|
||||
UVM_ASSERT(uvm_procfs_is_debug_enabled());
|
||||
|
||||
num_pages_out = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_in);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
|
||||
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
|
||||
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
// procfs_files are created before gpu_init_isr, we need to check if the
|
||||
// access_counter_buffer is allocated.
|
||||
if (parent_gpu->access_counter_buffer) {
|
||||
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
|
||||
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
|
||||
|
||||
num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
|
||||
num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, "migrations - buffer index %u:\n", i);
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
|
||||
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
|
||||
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This function converts an index of 2D array of size [N x N] into an index
|
||||
@ -892,7 +906,7 @@ static int nv_procfs_read_gpu_info(struct seq_file *s, void *v)
|
||||
uvm_gpu_t *gpu = (uvm_gpu_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
gpu_info_print_common(gpu, s);
|
||||
|
||||
@ -911,7 +925,7 @@ static int nv_procfs_read_gpu_fault_stats(struct seq_file *s, void *v)
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
gpu_fault_stats_print_common(parent_gpu, s);
|
||||
|
||||
@ -930,7 +944,7 @@ static int nv_procfs_read_gpu_access_counters(struct seq_file *s, void *v)
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
|
||||
|
||||
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
|
||||
return -EAGAIN;
|
||||
return -EAGAIN;
|
||||
|
||||
gpu_access_counters_print_common(parent_gpu, s);
|
||||
|
||||
@ -1182,7 +1196,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
|
||||
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_sema_init(&parent_gpu->isr.access_counters.service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
|
||||
uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
|
||||
@ -1221,7 +1235,7 @@ static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t gpu_id)
|
||||
|
||||
// Initialize enough of the gpu struct for remove_gpu to be called
|
||||
gpu->magic = UVM_GPU_MAGIC_VALUE;
|
||||
uvm_spin_lock_init(&gpu->peer_info.peer_gpus_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_spin_lock_init(&gpu->peer_info.peer_gpu_lock, UVM_LOCK_ORDER_LEAF);
|
||||
|
||||
sub_processor_index = uvm_id_sub_processor_index(gpu_id);
|
||||
parent_gpu->gpus[sub_processor_index] = gpu;
|
||||
@ -1545,12 +1559,6 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_pmm_sysmem_mappings_init(gpu, &gpu->pmm_reverse_sysmem_mappings);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("CPU PMM MMIO initialization failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
return status;
|
||||
}
|
||||
|
||||
uvm_pmm_gpu_device_p2p_init(gpu);
|
||||
|
||||
status = init_semaphore_pools(gpu);
|
||||
@ -1616,7 +1624,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
// trackers.
|
||||
if (sync_replay_tracker) {
|
||||
uvm_parent_gpu_replayable_faults_isr_lock(parent_gpu);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.replayable.replay_tracker);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer.replayable.replay_tracker);
|
||||
uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
if (status != NV_OK)
|
||||
@ -1627,7 +1635,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
// VA block trackers, too.
|
||||
if (sync_clear_faulted_tracker) {
|
||||
uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.non_replayable.clear_faulted_tracker);
|
||||
status = uvm_tracker_wait(&parent_gpu->fault_buffer.non_replayable.clear_faulted_tracker);
|
||||
uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);
|
||||
|
||||
if (status != NV_OK)
|
||||
@ -1635,13 +1643,20 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
|
||||
// Sync the access counter clear tracker too.
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
status = uvm_tracker_wait(&parent_gpu->access_counter_buffer_info.clear_tracker);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
|
||||
NvU32 notif_buf_index;
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index];
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
if (access_counters->rm_info.accessCntrBufferHandle != 0) {
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1680,15 +1695,11 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->instance_ptr_table));
|
||||
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->tsg_table));
|
||||
|
||||
// Access counters should have been disabled when the GPU is no longer
|
||||
// registered in any VA space.
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
|
||||
deinit_parent_procfs_files(parent_gpu);
|
||||
|
||||
// Return ownership to RM
|
||||
uvm_parent_gpu_deinit_isr(parent_gpu);
|
||||
|
||||
deinit_parent_procfs_files(parent_gpu);
|
||||
|
||||
uvm_pmm_devmem_deinit(parent_gpu);
|
||||
uvm_ats_remove_gpu(parent_gpu);
|
||||
|
||||
@ -1746,8 +1757,6 @@ static void deinit_gpu(uvm_gpu_t *gpu)
|
||||
|
||||
uvm_pmm_gpu_device_p2p_deinit(gpu);
|
||||
|
||||
uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);
|
||||
|
||||
uvm_pmm_gpu_deinit(&gpu->pmm);
|
||||
|
||||
if (gpu->rm_address_space != 0)
|
||||
@ -1794,14 +1803,14 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
|
||||
switch (fault_entry->fault_access_type)
|
||||
{
|
||||
case UVM_FAULT_ACCESS_TYPE_READ:
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults;
|
||||
++parent_gpu->fault_buffer.non_replayable.stats.num_read_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_WRITE:
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults;
|
||||
++parent_gpu->fault_buffer.non_replayable.stats.num_write_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults;
|
||||
++parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults;
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(false, "Invalid access type for non-replayable faults\n");
|
||||
@ -1809,7 +1818,7 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
|
||||
if (!fault_entry->is_virtual)
|
||||
++parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults;
|
||||
++parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults;
|
||||
|
||||
++parent_gpu->stats.num_non_replayable_faults;
|
||||
|
||||
@ -1821,23 +1830,23 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
|
||||
switch (fault_entry->fault_access_type)
|
||||
{
|
||||
case UVM_FAULT_ACCESS_TYPE_PREFETCH:
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_READ:
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_read_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_read_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_WRITE:
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_write_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_write_faults;
|
||||
break;
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
|
||||
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_atomic_faults;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
if (is_duplicate || fault_entry->filtered)
|
||||
++parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults;
|
||||
++parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults;
|
||||
|
||||
++parent_gpu->stats.num_replayable_faults;
|
||||
}
|
||||
@ -1901,21 +1910,29 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
|
||||
|
||||
if (gpu_dst) {
|
||||
atomic64_add(pages, &gpu_dst->parent->stats.num_pages_in);
|
||||
if (is_replayable_fault)
|
||||
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.replayable.stats.num_pages_in);
|
||||
else if (is_non_replayable_fault)
|
||||
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.non_replayable.stats.num_pages_in);
|
||||
else if (is_access_counter)
|
||||
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer_info.stats.num_pages_in);
|
||||
if (is_replayable_fault) {
|
||||
atomic64_add(pages, &gpu_dst->parent->fault_buffer.replayable.stats.num_pages_in);
|
||||
}
|
||||
else if (is_non_replayable_fault) {
|
||||
atomic64_add(pages, &gpu_dst->parent->fault_buffer.non_replayable.stats.num_pages_in);
|
||||
}
|
||||
else if (is_access_counter) {
|
||||
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
|
||||
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
|
||||
}
|
||||
}
|
||||
if (gpu_src) {
|
||||
atomic64_add(pages, &gpu_src->parent->stats.num_pages_out);
|
||||
if (is_replayable_fault)
|
||||
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.replayable.stats.num_pages_out);
|
||||
else if (is_non_replayable_fault)
|
||||
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.non_replayable.stats.num_pages_out);
|
||||
else if (is_access_counter)
|
||||
atomic64_add(pages, &gpu_src->parent->access_counter_buffer_info.stats.num_pages_out);
|
||||
if (is_replayable_fault) {
|
||||
atomic64_add(pages, &gpu_src->parent->fault_buffer.replayable.stats.num_pages_out);
|
||||
}
|
||||
else if (is_non_replayable_fault) {
|
||||
atomic64_add(pages, &gpu_src->parent->fault_buffer.non_replayable.stats.num_pages_out);
|
||||
}
|
||||
else if (is_access_counter) {
|
||||
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
|
||||
atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -1929,8 +1946,9 @@ static void uvm_param_conf(void)
|
||||
}
|
||||
else {
|
||||
if (strcmp(uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL) != 0) {
|
||||
pr_info("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
|
||||
uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
|
||||
uvm_peer_copy,
|
||||
UVM_PARAM_PEER_COPY_PHYSICAL);
|
||||
}
|
||||
|
||||
g_uvm_global.peer_copy_mode = UVM_GPU_PEER_COPY_MODE_PHYSICAL;
|
||||
@ -2397,6 +2415,7 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
|
||||
{
|
||||
NV_STATUS status;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(peer_caps->ref_count == 0);
|
||||
|
||||
status = parent_peers_retain(gpu0->parent, gpu1->parent);
|
||||
@ -2419,25 +2438,13 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
|
||||
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
|
||||
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
|
||||
|
||||
// In the case of NVLINK peers, this initialization will happen during
|
||||
// add_gpu. As soon as the peer info table is assigned below, the access
|
||||
// counter bottom half could start operating on the GPU being newly
|
||||
// added and inspecting the peer caps, so all of the appropriate
|
||||
// initialization must happen before this point.
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
|
||||
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
|
||||
uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
|
||||
UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
|
||||
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
|
||||
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
|
||||
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
|
||||
uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
|
||||
UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
|
||||
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
|
||||
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
@ -2465,18 +2472,18 @@ static NV_STATUS peers_retain(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
|
||||
|
||||
static void peers_destroy(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *peer_caps)
|
||||
{
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
|
||||
uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
|
||||
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
|
||||
uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
|
||||
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
|
||||
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
|
||||
uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
|
||||
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
|
||||
|
||||
// Flush the access counter buffer to avoid getting stale notifications for
|
||||
// accesses to GPUs to which peer access is being disabled. This is also
|
||||
@ -2690,7 +2697,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
|
||||
uvm_processor_mask_clear(&g_uvm_global.retained_gpus, gpu->id);
|
||||
|
||||
// If the parent is being freed, stop scheduling new bottom halves and
|
||||
// update relevant software state. Else flush any pending bottom halves
|
||||
// update relevant software state. Else flush any pending bottom halves
|
||||
// before continuing.
|
||||
if (free_parent)
|
||||
uvm_parent_gpu_disable_isr(parent_gpu);
|
||||
@ -2713,6 +2720,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
const UvmGpuInfo *gpu_info,
|
||||
const UvmGpuPlatformInfo *gpu_platform_info,
|
||||
uvm_parent_gpu_t *parent_gpu,
|
||||
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
|
||||
uvm_gpu_t **gpu_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
@ -2725,6 +2733,9 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
status = alloc_parent_gpu(gpu_uuid, uvm_parent_gpu_id_from_gpu_id(gpu_id), &parent_gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
parent_gpu->test = *parent_gpu_error;
|
||||
}
|
||||
|
||||
gpu = alloc_gpu(parent_gpu, gpu_id);
|
||||
@ -2794,7 +2805,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
|
||||
// Clear the interrupt bit and force the re-evaluation of the interrupt
|
||||
// condition to ensure that we don't miss any pending interrupt
|
||||
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
|
||||
parent_gpu->fault_buffer_info.replayable.cached_get);
|
||||
parent_gpu->fault_buffer.replayable.cached_get);
|
||||
}
|
||||
|
||||
// Access counters are enabled on demand
|
||||
@ -2837,6 +2848,7 @@ error:
|
||||
// the partition.
|
||||
static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
|
||||
uvm_gpu_t **gpu_out)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -2888,7 +2900,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
|
||||
if (status != NV_OK)
|
||||
goto error_unregister;
|
||||
|
||||
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, &gpu);
|
||||
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, parent_gpu_error, &gpu);
|
||||
if (status != NV_OK)
|
||||
goto error_unregister;
|
||||
}
|
||||
@ -2913,11 +2925,12 @@ error_free_gpu_info:
|
||||
|
||||
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
|
||||
uvm_gpu_t **gpu_out)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, gpu_out);
|
||||
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, parent_gpu_error, gpu_out);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
return status;
|
||||
}
|
||||
@ -3072,60 +3085,63 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
|
||||
return (address.address >= gpu->parent->peer_va_base &&
|
||||
address.address < (gpu->parent->peer_va_base + gpu->parent->peer_va_size));
|
||||
}
|
||||
} else {
|
||||
}
|
||||
else {
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
phys_addr_t phys_addr;
|
||||
|
||||
if (uvm_aperture_is_peer(address.aperture)) {
|
||||
bool is_peer = true;
|
||||
uvm_parent_processor_mask_t parent_gpus;
|
||||
uvm_parent_gpu_t *parent_peer_gpu;
|
||||
|
||||
// Local EGM accesses don't go over NVLINK
|
||||
if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
|
||||
return false;
|
||||
|
||||
// EGM uses peer IDs but they are different from VIDMEM peer IDs.
|
||||
// Check if the address aperture is an EGM aperture.
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
|
||||
uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
|
||||
for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
|
||||
uvm_aperture_t egm_peer_aperture;
|
||||
|
||||
if (!parent_peer_gpu->egm.enabled)
|
||||
continue;
|
||||
|
||||
egm_peer_aperture = uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu);
|
||||
|
||||
if (address.aperture == egm_peer_aperture) {
|
||||
is_peer = false;
|
||||
break;
|
||||
}
|
||||
// EGM uses peer IDs but they are different from VIDMEM peer
|
||||
// IDs.
|
||||
// Check if the address aperture is an EGM aperture.
|
||||
// We should not use remote EGM addresses internally until
|
||||
// NVLINK STO handling is updated to handle EGM.
|
||||
// TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
|
||||
// when accessing EGM memory
|
||||
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
|
||||
// systems
|
||||
UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
|
||||
return true;
|
||||
} else if (address.aperture == UVM_APERTURE_SYS) {
|
||||
bool is_peer = false;
|
||||
|
||||
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
|
||||
// either inline, or via ATS.
|
||||
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
|
||||
|
||||
// Exposed coherent vidmem can be accessed via sys aperture
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
for_each_parent_gpu(parent_gpu) {
|
||||
if (parent_gpu == gpu->parent)
|
||||
continue;
|
||||
|
||||
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
|
||||
phys_addr <= parent_gpu->system_bus.memory_window_end) {
|
||||
is_peer = true;
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
return is_peer;
|
||||
}
|
||||
|
||||
if (address.aperture != UVM_APERTURE_SYS)
|
||||
return false;
|
||||
|
||||
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
|
||||
// either inline, or via ATS.
|
||||
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
|
||||
|
||||
// Exposed coherent vidmem can be accessed via sys aperture
|
||||
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
|
||||
for_each_parent_gpu(parent_gpu) {
|
||||
if (parent_gpu == gpu->parent)
|
||||
continue;
|
||||
|
||||
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
|
||||
phys_addr <= parent_gpu->system_bus.memory_window_end) {
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
|
||||
}
|
||||
|
||||
return false;
|
||||
@ -3141,49 +3157,6 @@ uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
|
||||
return UVM_APERTURE_DEFAULT;
|
||||
}
|
||||
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
|
||||
{
|
||||
uvm_processor_id_t id = UVM_ID_INVALID;
|
||||
|
||||
// TODO: Bug 1899622: On P9 systems with multiple CPU sockets, SYS aperture
|
||||
// is also reported for accesses to remote GPUs connected to a different CPU
|
||||
// NUMA domain. We will need to determine the actual processor id using the
|
||||
// reported physical address.
|
||||
if (addr.aperture == UVM_APERTURE_SYS)
|
||||
return UVM_ID_CPU;
|
||||
else if (addr.aperture == UVM_APERTURE_VID)
|
||||
return gpu->id;
|
||||
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
|
||||
|
||||
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
|
||||
uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
|
||||
|
||||
UVM_ASSERT(other_gpu);
|
||||
UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));
|
||||
|
||||
if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, other_gpu->parent)) {
|
||||
// NVSWITCH connected systems use an extended physical address to
|
||||
// map to peers. Find the physical memory 'slot' containing the
|
||||
// given physical address to find the peer gpu that owns the
|
||||
// physical address
|
||||
NvU64 fabric_window_end = other_gpu->parent->nvswitch_info.fabric_memory_window_start +
|
||||
other_gpu->mem_info.max_allocatable_address;
|
||||
|
||||
if (other_gpu->parent->nvswitch_info.fabric_memory_window_start <= addr.address &&
|
||||
fabric_window_end >= addr.address)
|
||||
break;
|
||||
}
|
||||
else if (uvm_gpu_peer_aperture(gpu, other_gpu) == addr.aperture) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
|
||||
|
||||
return id;
|
||||
}
|
||||
|
||||
static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)
|
||||
{
|
||||
NvU64 key;
|
||||
@ -3570,20 +3543,19 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
|
||||
|
||||
*out_va_space = NULL;
|
||||
*out_gpu = NULL;
|
||||
UVM_ASSERT(entry->address.is_virtual);
|
||||
|
||||
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
|
||||
|
||||
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->virtual_info.instance_ptr);
|
||||
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->instance_ptr);
|
||||
if (!user_channel) {
|
||||
status = NV_ERR_INVALID_CHANNEL;
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
if (!user_channel->in_subctx) {
|
||||
UVM_ASSERT_MSG(entry->virtual_info.ve_id == 0,
|
||||
UVM_ASSERT_MSG(entry->ve_id == 0,
|
||||
"Access counter packet contains SubCTX %u for channel not in subctx\n",
|
||||
entry->virtual_info.ve_id);
|
||||
entry->ve_id);
|
||||
|
||||
gpu_va_space = user_channel->gpu_va_space;
|
||||
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
|
||||
@ -3591,7 +3563,7 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
|
||||
*out_gpu = gpu_va_space->gpu;
|
||||
}
|
||||
else {
|
||||
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
|
||||
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->ve_id);
|
||||
if (gpu_va_space) {
|
||||
*out_va_space = gpu_va_space->va_space;
|
||||
*out_gpu = gpu_va_space->gpu;
|
||||
|
@ -189,6 +189,9 @@ struct uvm_service_block_context_struct
|
||||
|
||||
// Prefetch temporary state.
|
||||
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
|
||||
|
||||
// Access counters notification buffer index.
|
||||
NvU32 access_counters_buffer_index;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
@ -197,8 +200,8 @@ typedef struct
|
||||
{
|
||||
struct
|
||||
{
|
||||
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
// of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
|
||||
// region of a SAM VMA. Used for batching ATS faults in a vma.
|
||||
uvm_page_mask_t prefetch_only_fault_mask;
|
||||
|
||||
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
|
||||
@ -350,7 +353,7 @@ typedef struct
|
||||
// entries from the GPU buffer
|
||||
NvU32 max_batch_size;
|
||||
|
||||
struct uvm_replayable_fault_buffer_info_struct
|
||||
struct uvm_replayable_fault_buffer_struct
|
||||
{
|
||||
// Maximum number of faults entries that can be stored in the buffer
|
||||
NvU32 max_faults;
|
||||
@ -414,7 +417,7 @@ typedef struct
|
||||
uvm_ats_fault_invalidate_t ats_invalidate;
|
||||
} replayable;
|
||||
|
||||
struct uvm_non_replayable_fault_buffer_info_struct
|
||||
struct uvm_non_replayable_fault_buffer_struct
|
||||
{
|
||||
// Maximum number of faults entries that can be stored in the buffer
|
||||
NvU32 max_faults;
|
||||
@ -468,7 +471,7 @@ typedef struct
|
||||
|
||||
// Timestamp when prefetch faults where disabled last time
|
||||
NvU64 disable_prefetch_faults_timestamp;
|
||||
} uvm_fault_buffer_info_t;
|
||||
} uvm_fault_buffer_t;
|
||||
|
||||
struct uvm_access_counter_service_batch_context_struct
|
||||
{
|
||||
@ -476,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
|
||||
NvU32 num_cached_notifications;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_access_counter_buffer_entry_t **notifications;
|
||||
uvm_access_counter_buffer_entry_t **notifications;
|
||||
|
||||
NvU32 num_notifications;
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by instance_ptr if we
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
} virt;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_access_counter_buffer_entry_t **notifications;
|
||||
uvm_reverse_map_t *translations;
|
||||
|
||||
NvU32 num_notifications;
|
||||
|
||||
// Boolean used to avoid sorting the fault batch by aperture if we
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same aperture
|
||||
bool is_single_aperture;
|
||||
} phys;
|
||||
// Boolean used to avoid sorting the fault batch by instance_ptr if we
|
||||
// determine at fetch time that all the access counter notifications in
|
||||
// the batch report the same instance_ptr
|
||||
bool is_single_instance_ptr;
|
||||
|
||||
// Helper page mask to compute the accessed pages within a VA block
|
||||
uvm_page_mask_t accessed_pages;
|
||||
@ -514,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
|
||||
NvU32 batch_id;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
struct uvm_access_counter_buffer_struct
|
||||
{
|
||||
// Values used to configure access counters in RM
|
||||
struct
|
||||
{
|
||||
UVM_ACCESS_COUNTER_GRANULARITY granularity;
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT use_limit;
|
||||
} rm;
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
|
||||
// The following values are precomputed by the access counter notification
|
||||
// handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
|
||||
// uvm_gpu_access_counters.c for more details.
|
||||
NvU64 translation_size;
|
||||
|
||||
NvU64 translations_per_counter;
|
||||
|
||||
NvU64 sub_granularity_region_size;
|
||||
|
||||
NvU64 sub_granularity_regions_per_translation;
|
||||
} uvm_gpu_access_counter_type_config_t;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
UvmGpuAccessCntrInfo rm_info;
|
||||
|
||||
// Access counters may have multiple notification buffers.
|
||||
NvU32 index;
|
||||
|
||||
NvU32 max_notifications;
|
||||
|
||||
NvU32 max_batch_size;
|
||||
@ -560,10 +531,22 @@ typedef struct
|
||||
// may override it to try different configuration values.
|
||||
struct
|
||||
{
|
||||
uvm_gpu_access_counter_type_config_t mimc;
|
||||
uvm_gpu_access_counter_type_config_t momc;
|
||||
// Values used to configure access counters in RM
|
||||
struct
|
||||
{
|
||||
UVM_ACCESS_COUNTER_GRANULARITY granularity;
|
||||
} rm;
|
||||
|
||||
NvU32 threshold;
|
||||
// The following values are precomputed by the access counter
|
||||
// notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
|
||||
// in uvm_gpu_access_counters.c for more details.
|
||||
NvU64 translation_size;
|
||||
|
||||
NvU64 sub_granularity_region_size;
|
||||
|
||||
NvU64 sub_granularity_regions_per_translation;
|
||||
|
||||
NvU32 threshold;
|
||||
} current_config;
|
||||
|
||||
// Access counter statistics
|
||||
@ -575,7 +558,7 @@ typedef struct
|
||||
} stats;
|
||||
|
||||
// Ignoring access counters means that notifications are left in the HW
|
||||
// buffer without being serviced. Requests to ignore access counters
|
||||
// buffer without being serviced. Requests to ignore access counters
|
||||
// are counted since the suspend path inhibits access counter interrupts,
|
||||
// and the resume path needs to know whether to reenable them.
|
||||
NvU32 notifications_ignored_count;
|
||||
@ -583,13 +566,25 @@ typedef struct
|
||||
// Context structure used to service a GPU access counter batch
|
||||
uvm_access_counter_service_batch_context_t batch_service_context;
|
||||
|
||||
// VA space that reconfigured the access counters configuration, if any.
|
||||
// Used in builtin tests only, to avoid reconfigurations from different
|
||||
// processes
|
||||
//
|
||||
// Locking: both readers and writers must hold the access counters ISR lock
|
||||
uvm_va_space_t *reconfiguration_owner;
|
||||
} uvm_access_counter_buffer_info_t;
|
||||
struct
|
||||
{
|
||||
// VA space that reconfigured the access counters configuration, if any.
|
||||
// Used in builtin tests only, to avoid reconfigurations from different
|
||||
// processes.
|
||||
//
|
||||
// Locking: both readers and writers must hold the access counters ISR
|
||||
// lock.
|
||||
uvm_va_space_t *reconfiguration_owner;
|
||||
|
||||
// The service access counters loop breaks after processing the first
|
||||
// batch. It will be retriggered if there are pending notifications, but
|
||||
// it releases the ISR service lock to check certain races that would be
|
||||
// difficult to hit otherwise.
|
||||
bool one_iteration_per_batch;
|
||||
NvU32 sleep_per_iteration_us;
|
||||
} test;
|
||||
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
@ -745,15 +740,11 @@ struct uvm_gpu_struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Mask of peer_gpus set
|
||||
// Mask of peer_gpus set.
|
||||
uvm_processor_mask_t peer_gpu_mask;
|
||||
|
||||
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
|
||||
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
|
||||
|
||||
// Leaf spinlock used to synchronize access to the peer_gpus table so
|
||||
// that it can be safely accessed from the access counters bottom half
|
||||
uvm_spinlock_t peer_gpus_lock;
|
||||
// Leaf spinlock used to synchronize access to peer_gpu_mask.
|
||||
uvm_spinlock_t peer_gpu_lock;
|
||||
} peer_info;
|
||||
|
||||
// Maximum number of subcontexts supported
|
||||
@ -828,14 +819,6 @@ struct uvm_gpu_struct
|
||||
uvm_bit_locks_t bitlocks;
|
||||
} sysmem_mappings;
|
||||
|
||||
// Reverse lookup table used to query the user mapping associated with a
|
||||
// sysmem (DMA) physical address.
|
||||
//
|
||||
// The system memory mapping information referred to by this field is
|
||||
// different from that of sysmem_mappings, because it relates to user
|
||||
// mappings (instead of kernel), and it is used in most configurations.
|
||||
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
|
||||
|
||||
struct
|
||||
{
|
||||
uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
|
||||
@ -957,6 +940,16 @@ struct uvm_gpu_struct
|
||||
uvm_mutex_t device_p2p_lock;
|
||||
};
|
||||
|
||||
typedef struct
|
||||
{
|
||||
bool access_counters_alloc_buffer;
|
||||
bool access_counters_alloc_block_context;
|
||||
bool isr_access_counters_alloc;
|
||||
bool isr_access_counters_alloc_stats_cpu;
|
||||
bool access_counters_batch_context_notifications;
|
||||
bool access_counters_batch_context_notification_cache;
|
||||
} uvm_test_parent_gpu_inject_error_t;
|
||||
|
||||
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
|
||||
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
|
||||
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
|
||||
@ -965,8 +958,8 @@ struct uvm_gpu_struct
|
||||
struct uvm_parent_gpu_struct
|
||||
{
|
||||
// Reference count for how many places are holding on to a parent GPU
|
||||
// (internal to the UVM driver). This includes any GPUs we know about, not
|
||||
// just GPUs that are registered with a VA space. Most GPUs end up being
|
||||
// (internal to the UVM driver). This includes any GPUs we know about, not
|
||||
// just GPUs that are registered with a VA space. Most GPUs end up being
|
||||
// registered, but there are brief periods when they are not registered,
|
||||
// such as during interrupt handling, and in add_gpu() or remove_gpu().
|
||||
nv_kref_t gpu_kref;
|
||||
@ -976,7 +969,7 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];
|
||||
|
||||
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
|
||||
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
|
||||
// usable child GPU in bottom-halves.
|
||||
DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
|
||||
|
||||
@ -1079,11 +1072,6 @@ struct uvm_parent_gpu_struct
|
||||
|
||||
bool access_counters_supported;
|
||||
|
||||
// If this is true, physical address based access counter notifications are
|
||||
// potentially generated. If false, only virtual address based notifications
|
||||
// are generated (assuming access_counters_supported is true too).
|
||||
bool access_counters_can_use_physical_addresses;
|
||||
|
||||
bool fault_cancel_va_supported;
|
||||
|
||||
// True if the GPU has hardware support for scoped atomics
|
||||
@ -1205,17 +1193,17 @@ struct uvm_parent_gpu_struct
|
||||
// Interrupt handling state and locks
|
||||
uvm_isr_info_t isr;
|
||||
|
||||
// Fault buffer info. This is only valid if supports_replayable_faults is
|
||||
// set to true.
|
||||
uvm_fault_buffer_info_t fault_buffer_info;
|
||||
// This is only valid if supports_replayable_faults is set to true.
|
||||
uvm_fault_buffer_t fault_buffer;
|
||||
|
||||
// PMM lazy free processing queue.
|
||||
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
|
||||
nv_kthread_q_t lazy_free_q;
|
||||
|
||||
// Access counter buffer info. This is only valid if
|
||||
// supports_access_counters is set to true.
|
||||
uvm_access_counter_buffer_info_t access_counter_buffer_info;
|
||||
// This is only valid if supports_access_counters is set to true. This array
|
||||
// has rm_info.accessCntrBufferCount entries.
|
||||
uvm_access_counter_buffer_t *access_counter_buffer;
|
||||
uvm_mutex_t access_counters_enablement_lock;
|
||||
|
||||
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
|
||||
NvU32 utlb_per_gpc_count;
|
||||
@ -1348,6 +1336,8 @@ struct uvm_parent_gpu_struct
|
||||
// GPUs.
|
||||
NvU64 base_address;
|
||||
} egm;
|
||||
|
||||
uvm_test_parent_gpu_inject_error_t test;
|
||||
};
|
||||
|
||||
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
|
||||
@ -1395,10 +1385,10 @@ typedef struct
|
||||
// detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
|
||||
// called.
|
||||
//
|
||||
// - The peer_gpus_lock is held on one of the GPUs. In this case, the other
|
||||
// GPU must be read from the original GPU's peer_gpus table. The fields
|
||||
// will not change while the lock is held, but they may no longer be valid
|
||||
// because the other GPU might be in teardown.
|
||||
// - The peer_gpu_lock is held on one of the GPUs. In this case, the other
|
||||
// GPU must be referred from the original GPU's peer_gpu_mask reference.
|
||||
// The fields will not change while the lock is held, but they may no
|
||||
// longer be valid because the other GPU might be in teardown.
|
||||
|
||||
// This field is used to determine when this struct has been initialized
|
||||
// (ref_count != 0). NVLink peers are initialized at GPU registration time.
|
||||
@ -1510,7 +1500,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
|
||||
|
||||
// Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
|
||||
// that the caller is holding the global_lock. This is a narrower-purpose
|
||||
// that the caller is holding the global_lock. This is a narrower-purpose
|
||||
// function, and is only intended for use by the top-half ISR, or other very
|
||||
// limited cases.
|
||||
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
|
||||
@ -1521,6 +1511,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
|
||||
// LOCKING: Takes and releases the global lock for the caller.
|
||||
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
|
||||
const uvm_rm_user_object_t *user_rm_device,
|
||||
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
|
||||
uvm_gpu_t **gpu_out);
|
||||
|
||||
// Retain a gpu which is known to already be retained. Does NOT require the
|
||||
@ -1578,10 +1569,6 @@ uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address
|
||||
// The two GPUs must have different parents.
|
||||
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
|
||||
|
||||
// Get the processor id accessible by the given GPU for the given physical
|
||||
// address.
|
||||
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
|
||||
|
||||
// Get the EGM aperture for local_gpu to use to map memory resident on the CPU
|
||||
// NUMA node that remote_gpu is attached to.
|
||||
// Note that local_gpu can be equal to remote_gpu when memory is resident in
|
||||
@ -1655,7 +1642,8 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
|
||||
|
||||
// Check whether the provided address points to peer memory:
|
||||
// * Physical address using one of the PEER apertures
|
||||
// * Physical address using SYS aperture that belongs to an exposed coherent memory
|
||||
// * Physical address using SYS aperture that belongs to an exposed coherent
|
||||
// memory
|
||||
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
|
||||
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
|
||||
|
||||
@ -1684,8 +1672,8 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
|
||||
// Check for NVLINK errors without calling into RM
|
||||
//
|
||||
// Calling into RM is problematic in many places, this check is always safe to
|
||||
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
|
||||
// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
|
||||
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
|
||||
// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
|
||||
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
|
||||
|
||||
// Map size bytes of contiguous sysmem on the GPU for physical access
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -31,7 +31,6 @@
|
||||
#include "uvm_va_block.h"
|
||||
#include "uvm_va_range.h"
|
||||
#include "uvm_va_space_mm.h"
|
||||
#include "uvm_pmm_sysmem.h"
|
||||
#include "uvm_perf_module.h"
|
||||
#include "uvm_ats.h"
|
||||
#include "uvm_ats_faults.h"
|
||||
@ -45,7 +44,6 @@
|
||||
|
||||
#define UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR 0x1
|
||||
#define UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR 0x2
|
||||
#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x4
|
||||
|
||||
// Each page in a tracked physical range may belong to a different VA Block. We
|
||||
// preallocate an array of reverse map translations. However, access counter
|
||||
@ -56,27 +54,18 @@
|
||||
#define UVM_MAX_TRANSLATION_SIZE (2 * 1024 * 1024ULL)
|
||||
#define UVM_SUB_GRANULARITY_REGIONS 32
|
||||
|
||||
static unsigned g_uvm_access_counter_threshold;
|
||||
|
||||
// Per-VA space access counters information
|
||||
typedef struct
|
||||
{
|
||||
// VA space-specific configuration settings. These override the global
|
||||
// settings
|
||||
struct
|
||||
{
|
||||
atomic_t enable_mimc_migrations;
|
||||
|
||||
atomic_t enable_momc_migrations;
|
||||
} params;
|
||||
atomic_t enable_migrations;
|
||||
|
||||
uvm_va_space_t *va_space;
|
||||
} va_space_access_counters_info_t;
|
||||
|
||||
// Enable/disable access-counter-guided migrations
|
||||
//
|
||||
static int uvm_perf_access_counter_mimc_migration_enable = -1;
|
||||
static int uvm_perf_access_counter_momc_migration_enable = -1;
|
||||
static int uvm_perf_access_counter_migration_enable = -1;
|
||||
|
||||
// Number of entries that are fetched from the GPU access counter notification
|
||||
// buffer and serviced in batch
|
||||
@ -86,13 +75,9 @@ static unsigned uvm_perf_access_counter_batch_count = UVM_PERF_ACCESS_COUNTER_BA
|
||||
static unsigned uvm_perf_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT;
|
||||
|
||||
// Module parameters for the tunables
|
||||
module_param(uvm_perf_access_counter_mimc_migration_enable, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(uvm_perf_access_counter_mimc_migration_enable,
|
||||
"Whether MIMC access counters will trigger migrations."
|
||||
"Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
|
||||
module_param(uvm_perf_access_counter_momc_migration_enable, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(uvm_perf_access_counter_momc_migration_enable,
|
||||
"Whether MOMC access counters will trigger migrations."
|
||||
module_param(uvm_perf_access_counter_migration_enable, int, S_IRUGO);
|
||||
MODULE_PARM_DESC(uvm_perf_access_counter_migration_enable,
|
||||
"Whether access counters will trigger migrations."
|
||||
"Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
|
||||
module_param(uvm_perf_access_counter_batch_count, uint, S_IRUGO);
|
||||
module_param(uvm_perf_access_counter_threshold, uint, S_IRUGO);
|
||||
@ -100,7 +85,7 @@ MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
|
||||
"Number of remote accesses on a region required to trigger a notification."
|
||||
"Valid values: [1, 65535]");
|
||||
|
||||
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
static void access_counter_buffer_flush_locked(uvm_access_counter_buffer_t *access_counters,
|
||||
uvm_gpu_buffer_flush_mode_t flush_mode);
|
||||
|
||||
static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};
|
||||
@ -108,6 +93,15 @@ static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};
|
||||
// Performance heuristics module for access_counters
|
||||
static uvm_perf_module_t g_module_access_counters;
|
||||
|
||||
// Locking: default config values are defined in uvm_access_counters_init() at
|
||||
// module init time, before any GPU is registered. After initialization, it is
|
||||
// only consumed/read.
|
||||
static UvmGpuAccessCntrConfig g_default_config =
|
||||
{
|
||||
.granularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
|
||||
.threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT,
|
||||
};
|
||||
|
||||
// Get the access counters tracking struct for the given VA space if it exists.
|
||||
// This information is allocated at VA space creation and freed during VA space
|
||||
// destruction.
|
||||
@ -126,31 +120,35 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
|
||||
return va_space_access_counters;
|
||||
}
|
||||
|
||||
static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 notif_buf_index)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
UVM_ASSERT(parent_gpu->access_counter_buffer);
|
||||
|
||||
return &parent_gpu->access_counter_buffer[notif_buf_index];
|
||||
}
|
||||
|
||||
static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get_or_null(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 notif_buf_index)
|
||||
{
|
||||
if (parent_gpu->access_counter_buffer)
|
||||
return parent_gpu_access_counter_buffer_get(parent_gpu, notif_buf_index);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Whether access counter migrations are enabled or not. The policy is as
|
||||
// follows:
|
||||
// - MIMC migrations are disabled by default on all non-ATS systems.
|
||||
// - MOMC migrations are disabled by default on all systems
|
||||
// - Migrations are disabled by default on all non-ATS systems.
|
||||
// - Users can override this policy by specifying on/off
|
||||
static bool is_migration_enabled(uvm_access_counter_type_t type)
|
||||
static bool is_migration_enabled(void)
|
||||
{
|
||||
int val;
|
||||
if (type == UVM_ACCESS_COUNTER_TYPE_MIMC) {
|
||||
val = uvm_perf_access_counter_mimc_migration_enable;
|
||||
}
|
||||
else {
|
||||
val = uvm_perf_access_counter_momc_migration_enable;
|
||||
|
||||
UVM_ASSERT(type == UVM_ACCESS_COUNTER_TYPE_MOMC);
|
||||
}
|
||||
|
||||
if (val == 0)
|
||||
if (uvm_perf_access_counter_migration_enable == 0)
|
||||
return false;
|
||||
else if (val > 0)
|
||||
else if (uvm_perf_access_counter_migration_enable > 0)
|
||||
return true;
|
||||
|
||||
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
return false;
|
||||
|
||||
if (UVM_ATS_SUPPORTED())
|
||||
return g_uvm_global.ats.supported;
|
||||
|
||||
@ -173,11 +171,9 @@ static va_space_access_counters_info_t *va_space_access_counters_info_create(uvm
|
||||
va_space_access_counters,
|
||||
UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS);
|
||||
|
||||
// Snap the access_counters parameters so that they can be tuned per VA space
|
||||
atomic_set(&va_space_access_counters->params.enable_mimc_migrations,
|
||||
is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MIMC));
|
||||
atomic_set(&va_space_access_counters->params.enable_momc_migrations,
|
||||
is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MOMC));
|
||||
// Snap the access_counters parameters so that they can be tuned per VA
|
||||
// space
|
||||
atomic_set(&va_space_access_counters->enable_migrations, is_migration_enabled());
|
||||
va_space_access_counters->va_space = va_space;
|
||||
}
|
||||
|
||||
@ -220,22 +216,23 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Clear the access counter notifications and add it to the per-GPU clear
|
||||
// tracker.
|
||||
// Clear the access counter notifications and add it to the per-GPU
|
||||
// per-notification-buffer clear tracker.
|
||||
static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_t *access_counters,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_notifications)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
uvm_gpu_name(gpu),
|
||||
access_counters->index);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -249,21 +246,22 @@ static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
|
||||
return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
|
||||
}
|
||||
|
||||
// Clear all access counters and add the operation to the per-GPU clear tracker
|
||||
static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu)
|
||||
// Clear all access counters and add the operation to the per-GPU
|
||||
// per-notification-buffer clear tracker
|
||||
static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
|
||||
status = uvm_push_begin(gpu->channel_manager,
|
||||
UVM_CHANNEL_TYPE_MEMOPS,
|
||||
&push,
|
||||
"Clear access counter: all");
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
|
||||
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu));
|
||||
uvm_gpu_name(gpu),
|
||||
access_counters->index);
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -272,34 +270,27 @@ static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu)
|
||||
uvm_push_end(&push);
|
||||
|
||||
uvm_tracker_remove_completed(&access_counters->clear_tracker);
|
||||
|
||||
return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
|
||||
}
|
||||
|
||||
static const uvm_gpu_access_counter_type_config_t *
|
||||
get_config_for_type(const uvm_access_counter_buffer_info_t *access_counters, uvm_access_counter_type_t counter_type)
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
return counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? &(access_counters)->current_config.mimc :
|
||||
&(access_counters)->current_config.momc;
|
||||
}
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu, index);
|
||||
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
// Fast path 1: we left some notifications unserviced in the buffer in the last pass
|
||||
if (parent_gpu->access_counter_buffer_info.cached_get != parent_gpu->access_counter_buffer_info.cached_put)
|
||||
// Fast path 1: we left some notifications unserviced in the buffer in the
|
||||
// last pass
|
||||
if (access_counters->cached_get != access_counters->cached_put)
|
||||
return true;
|
||||
|
||||
// Fast path 2: read the valid bit of the notification buffer entry pointed by the cached get pointer
|
||||
if (!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu,
|
||||
parent_gpu->access_counter_buffer_info.cached_get)) {
|
||||
// Slow path: read the put pointer from the GPU register via BAR0 over PCIe
|
||||
parent_gpu->access_counter_buffer_info.cached_put =
|
||||
UVM_GPU_READ_ONCE(*parent_gpu->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
|
||||
// Fast path 2: read the valid bit of the notification buffer entry pointed
|
||||
// by the cached get pointer
|
||||
if (!parent_gpu->access_counter_buffer_hal->entry_is_valid(access_counters, access_counters->cached_get)) {
|
||||
// Slow path: read the put pointer from the GPU register via BAR0 over
|
||||
// PCIe
|
||||
access_counters->cached_put = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferPut);
|
||||
|
||||
// No interrupt pending
|
||||
if (parent_gpu->access_counter_buffer_info.cached_get == parent_gpu->access_counter_buffer_info.cached_put)
|
||||
if (access_counters->cached_get == access_counters->cached_put)
|
||||
return false;
|
||||
}
|
||||
|
||||
@ -308,73 +299,65 @@ bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
// Initialize the configuration and pre-compute some required values for the
|
||||
// given access counter type
|
||||
static void init_access_counter_types_config(const UvmGpuAccessCntrConfig *config,
|
||||
uvm_access_counter_type_t counter_type,
|
||||
uvm_gpu_access_counter_type_config_t *counter_type_config)
|
||||
static void init_access_counter_config(const UvmGpuAccessCntrConfig *config,
|
||||
uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU64 tracking_size = 0;
|
||||
UVM_ACCESS_COUNTER_GRANULARITY granularity = counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? config->mimcGranularity:
|
||||
config->momcGranularity;
|
||||
UVM_ACCESS_COUNTER_USE_LIMIT use_limit = counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? config->mimcUseLimit:
|
||||
config->momcUseLimit;
|
||||
|
||||
counter_type_config->rm.granularity = granularity;
|
||||
counter_type_config->rm.use_limit = use_limit;
|
||||
access_counters->current_config.rm.granularity = config->granularity;
|
||||
|
||||
// Precompute the maximum size to use in reverse map translations and the
|
||||
// number of translations that are required per access counter notification.
|
||||
status = config_granularity_to_bytes(granularity, &tracking_size);
|
||||
status = config_granularity_to_bytes(config->granularity, &tracking_size);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
// sub_granularity field is only filled for tracking granularities larger
|
||||
// than 64K
|
||||
if (granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
|
||||
counter_type_config->sub_granularity_region_size = tracking_size;
|
||||
if (config->granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
|
||||
access_counters->current_config.sub_granularity_region_size = tracking_size;
|
||||
else
|
||||
counter_type_config->sub_granularity_region_size = tracking_size / UVM_SUB_GRANULARITY_REGIONS;
|
||||
access_counters->current_config.sub_granularity_region_size = tracking_size / UVM_SUB_GRANULARITY_REGIONS;
|
||||
|
||||
counter_type_config->translation_size = min(UVM_MAX_TRANSLATION_SIZE, tracking_size);
|
||||
counter_type_config->translations_per_counter =
|
||||
max(counter_type_config->translation_size / UVM_MAX_TRANSLATION_SIZE, 1ULL);
|
||||
counter_type_config->sub_granularity_regions_per_translation =
|
||||
max(counter_type_config->translation_size / counter_type_config->sub_granularity_region_size, 1ULL);
|
||||
UVM_ASSERT(counter_type_config->sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
|
||||
access_counters->current_config.translation_size = min(UVM_MAX_TRANSLATION_SIZE, tracking_size);
|
||||
access_counters->current_config.sub_granularity_regions_per_translation =
|
||||
max(access_counters->current_config.translation_size / access_counters->current_config.sub_granularity_region_size,
|
||||
1ULL);
|
||||
UVM_ASSERT(access_counters->current_config.sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
static NvU32 access_counters_max_batch_size(const uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
NvU32 max_batch_size = 0;
|
||||
|
||||
// Check provided module parameter value
|
||||
max_batch_size = max(uvm_perf_access_counter_batch_count, (NvU32)UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN);
|
||||
max_batch_size = min(max_batch_size, access_counters->max_notifications);
|
||||
|
||||
return max_batch_size;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
NvU64 granularity_bytes = 0;
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu, notif_buf_index);
|
||||
uvm_access_counter_service_batch_context_t *batch_context;
|
||||
|
||||
if (uvm_perf_access_counter_threshold < UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN) {
|
||||
g_uvm_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN;
|
||||
pr_info("Value %u too small for uvm_perf_access_counter_threshold, using %u instead\n",
|
||||
uvm_perf_access_counter_threshold,
|
||||
g_uvm_access_counter_threshold);
|
||||
}
|
||||
else if (uvm_perf_access_counter_threshold > UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX) {
|
||||
g_uvm_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX;
|
||||
pr_info("Value %u too large for uvm_perf_access_counter_threshold, using %u instead\n",
|
||||
uvm_perf_access_counter_threshold,
|
||||
g_uvm_access_counter_threshold);
|
||||
}
|
||||
else {
|
||||
g_uvm_access_counter_threshold = uvm_perf_access_counter_threshold;
|
||||
}
|
||||
access_counters->parent_gpu = parent_gpu;
|
||||
access_counters->index = notif_buf_index;
|
||||
batch_context = &access_counters->batch_service_context;
|
||||
|
||||
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
|
||||
UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceInitAccessCntrInfo(parent_gpu->rm_device,
|
||||
&access_counters->rm_info,
|
||||
0));
|
||||
notif_buf_index));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init notify buffer info from RM: %s, GPU %s\n",
|
||||
UVM_ERR_PRINT("Failed to init notify buffer from RM: %s, GPU %s, notif buf index %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
notif_buf_index);
|
||||
|
||||
// nvUvmInterfaceInitAccessCntrInfo may leave fields in rm_info
|
||||
// populated when it returns an error. Set the buffer handle to zero as
|
||||
@ -387,32 +370,28 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(access_counters->rm_info.bufferSize %
|
||||
parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu) == 0);
|
||||
|
||||
status = config_granularity_to_bytes(UVM_PERF_ACCESS_COUNTER_GRANULARITY, &granularity_bytes);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
|
||||
UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
|
||||
|
||||
parent_gpu->access_counter_buffer_info.notifications_ignored_count = 0;
|
||||
parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
|
||||
access_counters->notifications_ignored_count = 0;
|
||||
access_counters->test.reconfiguration_owner = NULL;
|
||||
|
||||
uvm_tracker_init(&access_counters->clear_tracker);
|
||||
|
||||
access_counters->max_notifications = parent_gpu->access_counter_buffer_info.rm_info.bufferSize /
|
||||
access_counters->max_notifications = access_counters->rm_info.bufferSize /
|
||||
parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu);
|
||||
|
||||
// Check provided module parameter value
|
||||
access_counters->max_batch_size = max(uvm_perf_access_counter_batch_count,
|
||||
(NvU32)UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN);
|
||||
access_counters->max_batch_size = min(access_counters->max_batch_size,
|
||||
access_counters->max_notifications);
|
||||
|
||||
access_counters->max_batch_size = access_counters_max_batch_size(access_counters);
|
||||
if (access_counters->max_batch_size != uvm_perf_access_counter_batch_count) {
|
||||
pr_info("Invalid uvm_perf_access_counter_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_access_counter_batch_count,
|
||||
UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN,
|
||||
access_counters->max_notifications,
|
||||
access_counters->max_batch_size);
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_access_counter_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u "
|
||||
"instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_access_counter_batch_count,
|
||||
UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN,
|
||||
access_counters->max_notifications,
|
||||
access_counters->max_batch_size);
|
||||
}
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_batch_context_notification_cache) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
batch_context->notification_cache = uvm_kvmalloc_zero(access_counters->max_notifications *
|
||||
@ -422,23 +401,14 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
goto fail;
|
||||
}
|
||||
|
||||
batch_context->virt.notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
|
||||
sizeof(*batch_context->virt.notifications));
|
||||
if (!batch_context->virt.notifications) {
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_batch_context_notifications) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
batch_context->phys.notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
|
||||
sizeof(*batch_context->phys.notifications));
|
||||
if (!batch_context->phys.notifications) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
batch_context->phys.translations = uvm_kvmalloc_zero((UVM_MAX_TRANSLATION_SIZE / PAGE_SIZE) *
|
||||
sizeof(*batch_context->phys.translations));
|
||||
if (!batch_context->phys.translations) {
|
||||
batch_context->notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
|
||||
sizeof(*batch_context->notifications));
|
||||
if (!batch_context->notifications) {
|
||||
status = NV_ERR_NO_MEMORY;
|
||||
goto fail;
|
||||
}
|
||||
@ -446,35 +416,39 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
return NV_OK;
|
||||
|
||||
fail:
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu);
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
|
||||
{
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get_or_null(parent_gpu,
|
||||
notif_buf_index);
|
||||
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
|
||||
// Access counters should have been disabled when the GPU is no longer
|
||||
// registered in any VA space.
|
||||
if (parent_gpu->isr.access_counters) {
|
||||
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
|
||||
"notif buf index: %u\n",
|
||||
notif_buf_index);
|
||||
}
|
||||
|
||||
if (access_counters->rm_info.accessCntrBufferHandle) {
|
||||
if (access_counters && access_counters->rm_info.accessCntrBufferHandle) {
|
||||
NV_STATUS status = uvm_rm_locked_call(nvUvmInterfaceDestroyAccessCntrInfo(parent_gpu->rm_device,
|
||||
&access_counters->rm_info));
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
access_counters->rm_info.accessCntrBufferHandle = 0;
|
||||
uvm_tracker_deinit(&access_counters->clear_tracker);
|
||||
}
|
||||
|
||||
uvm_kvfree(batch_context->notification_cache);
|
||||
uvm_kvfree(batch_context->virt.notifications);
|
||||
uvm_kvfree(batch_context->phys.notifications);
|
||||
uvm_kvfree(batch_context->phys.translations);
|
||||
batch_context->notification_cache = NULL;
|
||||
batch_context->virt.notifications = NULL;
|
||||
batch_context->phys.notifications = NULL;
|
||||
batch_context->phys.translations = NULL;
|
||||
uvm_kvfree(batch_context->notification_cache);
|
||||
uvm_kvfree(batch_context->notifications);
|
||||
batch_context->notification_cache = NULL;
|
||||
batch_context->notifications = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
|
||||
@ -485,30 +459,31 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
|
||||
if (parent_gpu->rm_info.isSimulated)
|
||||
return true;
|
||||
|
||||
return is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MIMC) || is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MOMC);
|
||||
return is_migration_enabled();
|
||||
}
|
||||
|
||||
// This function enables access counters with the given configuration and takes
|
||||
// ownership from RM. The function also stores the new configuration within the
|
||||
// uvm_gpu_t struct.
|
||||
static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntrConfig *config)
|
||||
static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, NvU32 index, const UvmGpuAccessCntrConfig *config)
|
||||
{
|
||||
NV_STATUS status, disable_status;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, index);
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters[index].service_lock));
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceEnableAccessCntr(gpu->parent->rm_device,
|
||||
&access_counters->rm_info,
|
||||
config));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to enable access counter notification from RM: %s, GPU %s\n",
|
||||
nvstatusToString(status), uvm_gpu_name(gpu));
|
||||
UVM_ERR_PRINT("Failed to enable access counter notification from RM: %s, GPU %s notif buf index %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_gpu_name(gpu),
|
||||
index);
|
||||
return status;
|
||||
}
|
||||
|
||||
status = access_counter_clear_all(gpu);
|
||||
status = access_counter_clear_all(gpu, access_counters);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
|
||||
@ -520,12 +495,11 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntr
|
||||
// taken control of the notify buffer since the GPU was initialized. Then
|
||||
// flush old notifications. This will update the cached_put pointer.
|
||||
access_counters->cached_get = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferGet);
|
||||
access_counter_buffer_flush_locked(gpu->parent, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
access_counter_buffer_flush_locked(access_counters, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
|
||||
access_counters->current_config.threshold = config->threshold;
|
||||
|
||||
init_access_counter_types_config(config, UVM_ACCESS_COUNTER_TYPE_MIMC, &access_counters->current_config.mimc);
|
||||
init_access_counter_types_config(config, UVM_ACCESS_COUNTER_TYPE_MOMC, &access_counters->current_config.momc);
|
||||
init_access_counter_config(config, access_counters);
|
||||
|
||||
return NV_OK;
|
||||
|
||||
@ -539,15 +513,14 @@ error:
|
||||
|
||||
// If ownership is yielded as part of reconfiguration, the access counters
|
||||
// handling refcount may not be 0
|
||||
static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
|
||||
static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu, index);
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[index].service_lock));
|
||||
|
||||
// Wait for any pending clear operation befor releasing ownership
|
||||
// Wait for any pending clear operation before releasing ownership
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
if (status != NV_OK)
|
||||
UVM_ASSERT(status == uvm_global_get_status());
|
||||
@ -559,100 +532,180 @@ static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
// Increment the refcount of access counter enablement. If this is the first
|
||||
// reference, enable the HW feature.
|
||||
static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu, UvmGpuAccessCntrConfig *config)
|
||||
static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_t *access_counters,
|
||||
const UvmGpuAccessCntrConfig *config)
|
||||
{
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
UVM_ASSERT(gpu->parent->access_counter_buffer_info.rm_info.accessCntrBufferHandle);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters[notif_buf_index].service_lock));
|
||||
UVM_ASSERT(access_counters->rm_info.accessCntrBufferHandle);
|
||||
|
||||
// There cannot be a concurrent modification of the handling count, since
|
||||
// the only two writes of that field happen in the enable/disable functions
|
||||
// and those are protected by the access counters ISR lock.
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count == 0) {
|
||||
NV_STATUS status = access_counters_take_ownership(gpu, config);
|
||||
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0) {
|
||||
NV_STATUS status = access_counters_take_ownership(gpu, notif_buf_index, config);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
++gpu->parent->isr.access_counters.handling_ref_count;
|
||||
++gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count;
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Decrement the refcount of access counter enablement. If this is the last
|
||||
// reference, disable the HW feature.
|
||||
static void parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
static void access_counters_disable(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count > 0);
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
if (--parent_gpu->isr.access_counters.handling_ref_count == 0)
|
||||
access_counters_yield_ownership(parent_gpu);
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
|
||||
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0,
|
||||
"notif buf index: %u\n",
|
||||
notif_buf_index);
|
||||
|
||||
if (--parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0)
|
||||
access_counters_yield_ownership(parent_gpu, notif_buf_index);
|
||||
}
|
||||
|
||||
// Invoked during registration of the GPU in the VA space
|
||||
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
|
||||
{
|
||||
NV_STATUS status;
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
|
||||
|
||||
if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
status = NV_OK;
|
||||
}
|
||||
else {
|
||||
UvmGpuAccessCntrConfig default_config =
|
||||
{
|
||||
.mimcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
|
||||
.momcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
|
||||
.mimcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
|
||||
.momcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
|
||||
.threshold = g_uvm_access_counter_threshold,
|
||||
};
|
||||
status = gpu_access_counters_enable(gpu, &default_config);
|
||||
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
|
||||
notif_buf_index);
|
||||
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
|
||||
status = gpu_access_counters_enable(gpu, access_counters, &g_default_config);
|
||||
|
||||
// If this is the first reference taken on access counters, dropping
|
||||
// the ISR lock will enable interrupts.
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto cleanup;
|
||||
}
|
||||
|
||||
// No VA space lock is currently held, so the mask is atomically
|
||||
// modified to protect from concurrent enablement of access counters in
|
||||
// another GPU
|
||||
if (status == NV_OK)
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
// another GPU.
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
}
|
||||
|
||||
// If this is the first reference taken on access counters, dropping the
|
||||
// ISR lock will enable interrupts.
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
|
||||
return status;
|
||||
|
||||
cleanup:
|
||||
// The "notif_buf_index" notification buffer is already disabled since it
|
||||
// failed, we disable all prior to notif_buf_index.
|
||||
while (notif_buf_index-- != 0) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
|
||||
notif_buf_index);
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
access_counters_disable(access_counters);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_va_space_t *va_space)
|
||||
static void access_counters_disable_notif_buffer(uvm_access_counter_buffer_t *access_counters,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_space_t *va_space)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
|
||||
if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
|
||||
parent_gpu->id)) {
|
||||
parent_gpu_access_counters_disable(parent_gpu);
|
||||
access_counters_disable(access_counters);
|
||||
|
||||
// If this is VA space reconfigured access counters, clear the
|
||||
// ownership to allow for other processes to invoke the reconfiguration
|
||||
if (parent_gpu->access_counter_buffer_info.reconfiguration_owner == va_space)
|
||||
parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
|
||||
// If this VA space reconfigured access counters, clear the ownership to
|
||||
// allow for other processes to invoke the reconfiguration.
|
||||
if (access_counters->test.reconfiguration_owner == va_space) {
|
||||
access_counters->test.reconfiguration_owner = NULL;
|
||||
|
||||
// Reset notification service test knobs.
|
||||
access_counters->max_batch_size = access_counters_max_batch_size(access_counters);
|
||||
access_counters->test.one_iteration_per_batch = false;
|
||||
access_counters->test.sleep_per_iteration_us = 0;
|
||||
|
||||
// Reset HW access counters settings to default values. A test may
|
||||
// have modified them. Concurrent processes and registered VA spaces
|
||||
// would maintain the modified config, undermining the correctness
|
||||
// of forthcoming tests.
|
||||
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count > 0) {
|
||||
NV_STATUS status;
|
||||
|
||||
// Disable counters, and renable with the new configuration.
|
||||
// More details, refer to comments in
|
||||
// uvm_test_reconfigure_access_counters().
|
||||
access_counters_yield_ownership(gpu->parent, notif_buf_index);
|
||||
status = access_counters_take_ownership(gpu, notif_buf_index, &g_default_config);
|
||||
if (status != NV_OK) {
|
||||
// Retaking ownership failed, so RM owns the interrupt.
|
||||
// The state of any other VA space with access counters
|
||||
// enabled is corrupt.
|
||||
UVM_ASSERT_MSG(status == NV_OK,
|
||||
"Access counters interrupt still owned by RM, other VA spaces may experience "
|
||||
"failures");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
}
|
||||
|
||||
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
|
||||
{
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(gpu->parent->access_counters_supported);
|
||||
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
|
||||
|
||||
if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
|
||||
gpu->parent->id)) {
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
|
||||
notif_buf_index);
|
||||
|
||||
// Disable access counters per notification buffer. If testing is
|
||||
// enabled, we may reset the access counters config and testing
|
||||
// knobs.
|
||||
access_counters_disable_notif_buffer(access_counters, gpu, va_space);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
}
|
||||
|
||||
static void write_get(uvm_access_counter_buffer_t *access_counters, NvU32 get)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
|
||||
|
||||
// Write get on the GPU only if it's changed.
|
||||
if (access_counters->cached_get == get)
|
||||
@ -664,16 +717,16 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
|
||||
}
|
||||
|
||||
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
static void access_counter_buffer_flush_locked(uvm_access_counter_buffer_t *access_counters,
|
||||
uvm_gpu_buffer_flush_mode_t flush_mode)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
NvU32 get;
|
||||
NvU32 put;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
|
||||
|
||||
// Read PUT pointer from the GPU if requested
|
||||
UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
|
||||
@ -685,32 +738,39 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
while (get != put) {
|
||||
// Wait until valid bit is set
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(access_counters, get), &spin) {
|
||||
if (uvm_global_get_status() != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
|
||||
parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
|
||||
parent_gpu->access_counter_buffer_hal->entry_clear_valid(access_counters, get);
|
||||
++get;
|
||||
if (get == access_counters->max_notifications)
|
||||
get = 0;
|
||||
}
|
||||
|
||||
done:
|
||||
write_get(parent_gpu, get);
|
||||
write_get(access_counters, get);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
|
||||
// Disables access counter interrupts and notification servicing
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu,
|
||||
notif_buf_index);
|
||||
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0)
|
||||
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
// Disables access counter interrupts and notification servicing
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
|
||||
access_counter_buffer_flush_locked(access_counters, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
|
||||
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
}
|
||||
}
|
||||
|
||||
static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
|
||||
@ -718,24 +778,23 @@ static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffe
|
||||
{
|
||||
int result;
|
||||
|
||||
result = uvm_gpu_phys_addr_cmp(a->virtual_info.instance_ptr, b->virtual_info.instance_ptr);
|
||||
// On Volta+ we need to sort by {instance_ptr + subctx_id} pair since it can
|
||||
// map to a different VA space
|
||||
result = uvm_gpu_phys_addr_cmp(a->instance_ptr, b->instance_ptr);
|
||||
|
||||
// On Turing+ we need to sort by {instance_ptr + subctx_id} pair since it
|
||||
// can map to a different VA space
|
||||
if (result != 0)
|
||||
return result;
|
||||
return UVM_CMP_DEFAULT(a->virtual_info.ve_id, b->virtual_info.ve_id);
|
||||
|
||||
return UVM_CMP_DEFAULT(a->ve_id, b->ve_id);
|
||||
}
|
||||
|
||||
// Sort comparator for pointers to GVA access counter notification buffer
|
||||
// entries that sorts by instance pointer
|
||||
static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const void *_b)
|
||||
// Sort comparator for pointers to access counter notification buffer entries
|
||||
// that sorts by instance pointer and ve_id.
|
||||
static int cmp_sort_notifications_by_instance_ptr(const void *_a, const void *_b)
|
||||
{
|
||||
const uvm_access_counter_buffer_entry_t *a = *(const uvm_access_counter_buffer_entry_t **)_a;
|
||||
const uvm_access_counter_buffer_entry_t *b = *(const uvm_access_counter_buffer_entry_t **)_b;
|
||||
|
||||
UVM_ASSERT(a->address.is_virtual);
|
||||
UVM_ASSERT(b->address.is_virtual);
|
||||
|
||||
return cmp_access_counter_instance_ptr(a, b);
|
||||
}
|
||||
|
||||
@ -748,16 +807,15 @@ static inline int cmp_gpu(const uvm_gpu_t *a, const uvm_gpu_t *b)
|
||||
return UVM_CMP_DEFAULT(id_a, id_b);
|
||||
}
|
||||
|
||||
// Sort comparator for pointers to GVA access counter notification buffer
|
||||
// entries that sorts by va_space, GPU ID, and fault address.
|
||||
static int cmp_sort_virt_notifications_by_va_space_gpu_address(const void *_a, const void *_b)
|
||||
// Sort comparator for pointers to access counter notification buffer entries
|
||||
// that sorts by va_space, GPU ID, and notification address.
|
||||
static int cmp_sort_notifications_by_va_space_gpu_address(const void *_a, const void *_b)
|
||||
{
|
||||
const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
|
||||
const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
|
||||
|
||||
int result;
|
||||
|
||||
result = UVM_CMP_DEFAULT((*a)->virtual_info.va_space, (*b)->virtual_info.va_space);
|
||||
result = UVM_CMP_DEFAULT((*a)->va_space, (*b)->va_space);
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
@ -765,20 +823,7 @@ static int cmp_sort_virt_notifications_by_va_space_gpu_address(const void *_a, c
|
||||
if (result != 0)
|
||||
return result;
|
||||
|
||||
return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
|
||||
}
|
||||
|
||||
// Sort comparator for pointers to GPA access counter notification buffer
|
||||
// entries that sorts by physical address' aperture
|
||||
static int cmp_sort_phys_notifications_by_processor_id(const void *_a, const void *_b)
|
||||
{
|
||||
const uvm_access_counter_buffer_entry_t *a = *(const uvm_access_counter_buffer_entry_t **)_a;
|
||||
const uvm_access_counter_buffer_entry_t *b = *(const uvm_access_counter_buffer_entry_t **)_b;
|
||||
|
||||
UVM_ASSERT(!a->address.is_virtual);
|
||||
UVM_ASSERT(!b->address.is_virtual);
|
||||
|
||||
return uvm_id_cmp(a->physical_info.resident_id, b->physical_info.resident_id);
|
||||
return UVM_CMP_DEFAULT((*a)->address, (*b)->address);
|
||||
}
|
||||
|
||||
typedef enum
|
||||
@ -792,21 +837,20 @@ typedef enum
|
||||
NOTIFICATION_FETCH_MODE_ALL,
|
||||
} notification_fetch_mode_t;
|
||||
|
||||
static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
static NvU32 fetch_access_counter_buffer_entries(uvm_access_counter_buffer_t *access_counters,
|
||||
notification_fetch_mode_t fetch_mode)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
NvU32 get;
|
||||
NvU32 put;
|
||||
NvU32 notification_index;
|
||||
uvm_access_counter_buffer_entry_t *notification_cache;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
NvU32 last_instance_ptr_idx = 0;
|
||||
uvm_aperture_t last_aperture = UVM_APERTURE_PEER_MAX;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
|
||||
|
||||
notification_cache = batch_context->notification_cache;
|
||||
|
||||
@ -822,12 +866,8 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
if (get == put)
|
||||
return 0;
|
||||
|
||||
batch_context->phys.num_notifications = 0;
|
||||
batch_context->virt.num_notifications = 0;
|
||||
|
||||
batch_context->virt.is_single_instance_ptr = true;
|
||||
batch_context->phys.is_single_aperture = true;
|
||||
|
||||
batch_context->num_notifications = 0;
|
||||
batch_context->is_single_instance_ptr = true;
|
||||
notification_index = 0;
|
||||
|
||||
// Parse until get != put and have enough space to cache.
|
||||
@ -838,7 +878,8 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
// We cannot just wait for the last entry (the one pointed by put) to
|
||||
// become valid, we have to do it individually since entries can be
|
||||
// written out of order
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
|
||||
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(access_counters, get),
|
||||
&spin) {
|
||||
// We have some entry to work on. Let's do the rest later.
|
||||
if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
|
||||
goto done;
|
||||
@ -853,54 +894,15 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
smp_mb__after_atomic();
|
||||
|
||||
// Got valid bit set. Let's cache.
|
||||
parent_gpu->access_counter_buffer_hal->parse_entry(parent_gpu, get, current_entry);
|
||||
parent_gpu->access_counter_buffer_hal->parse_entry(access_counters, get, current_entry);
|
||||
|
||||
if (current_entry->address.is_virtual) {
|
||||
batch_context->virt.notifications[batch_context->virt.num_notifications++] = current_entry;
|
||||
batch_context->notifications[batch_context->num_notifications++] = current_entry;
|
||||
|
||||
if (batch_context->virt.is_single_instance_ptr) {
|
||||
if (batch_context->virt.num_notifications == 1) {
|
||||
last_instance_ptr_idx = notification_index;
|
||||
}
|
||||
else if (cmp_access_counter_instance_ptr(¬ification_cache[last_instance_ptr_idx],
|
||||
current_entry) != 0) {
|
||||
batch_context->virt.is_single_instance_ptr = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
NvU64 translation_size;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
translation_size = get_config_for_type(access_counters,
|
||||
current_entry->counter_type)->translation_size;
|
||||
current_entry->address.address = UVM_ALIGN_DOWN(current_entry->address.address, translation_size);
|
||||
|
||||
batch_context->phys.notifications[batch_context->phys.num_notifications++] = current_entry;
|
||||
|
||||
gpu = uvm_parent_gpu_find_first_valid_gpu(parent_gpu);
|
||||
if (!gpu) {
|
||||
current_entry->physical_info.resident_id = UVM_ID_INVALID;
|
||||
current_entry->gpu = NULL;
|
||||
}
|
||||
else {
|
||||
current_entry->gpu = gpu;
|
||||
current_entry->physical_info.resident_id =
|
||||
uvm_gpu_get_processor_id_by_address(gpu, uvm_gpu_phys_address(current_entry->address.aperture,
|
||||
current_entry->address.address));
|
||||
|
||||
if (batch_context->phys.is_single_aperture) {
|
||||
if (batch_context->phys.num_notifications == 1)
|
||||
last_aperture = current_entry->address.aperture;
|
||||
else if (current_entry->address.aperture != last_aperture)
|
||||
batch_context->phys.is_single_aperture = false;
|
||||
}
|
||||
|
||||
if (current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
UVM_ASSERT(uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
|
||||
else
|
||||
UVM_ASSERT(!uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
|
||||
}
|
||||
if (batch_context->is_single_instance_ptr) {
|
||||
if (batch_context->num_notifications == 1)
|
||||
last_instance_ptr_idx = notification_index;
|
||||
else if (cmp_access_counter_instance_ptr(¬ification_cache[last_instance_ptr_idx], current_entry) != 0)
|
||||
batch_context->is_single_instance_ptr = false;
|
||||
}
|
||||
|
||||
++notification_index;
|
||||
@ -910,83 +912,69 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
|
||||
done:
|
||||
write_get(parent_gpu, get);
|
||||
write_get(access_counters, get);
|
||||
|
||||
return notification_index;
|
||||
}
|
||||
|
||||
static void translate_virt_notifications_instance_ptrs(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
static void translate_notifications_instance_ptrs(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
NV_STATUS status;
|
||||
|
||||
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
|
||||
for (i = 0; i < batch_context->num_notifications; ++i) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->notifications[i];
|
||||
|
||||
if (i == 0 ||
|
||||
cmp_access_counter_instance_ptr(current_entry, batch_context->virt.notifications[i - 1]) != 0) {
|
||||
if (i == 0 || cmp_access_counter_instance_ptr(current_entry, batch_context->notifications[i - 1]) != 0) {
|
||||
// If instance_ptr is different, make a new translation. If the
|
||||
// translation fails then va_space will be NULL and the entry will
|
||||
// simply be ignored in subsequent processing.
|
||||
status = uvm_parent_gpu_access_counter_entry_to_va_space(parent_gpu,
|
||||
current_entry,
|
||||
¤t_entry->virtual_info.va_space,
|
||||
¤t_entry->va_space,
|
||||
¤t_entry->gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ASSERT(current_entry->virtual_info.va_space == NULL);
|
||||
UVM_ASSERT(current_entry->va_space == NULL);
|
||||
UVM_ASSERT(current_entry->gpu == NULL);
|
||||
}
|
||||
}
|
||||
else {
|
||||
current_entry->virtual_info.va_space = batch_context->virt.notifications[i - 1]->virtual_info.va_space;
|
||||
current_entry->gpu = batch_context->virt.notifications[i - 1]->gpu;
|
||||
current_entry->va_space = batch_context->notifications[i - 1]->va_space;
|
||||
current_entry->gpu = batch_context->notifications[i - 1]->gpu;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// GVA notifications provide an instance_ptr and ve_id that can be directly
|
||||
// Notifications provide an instance_ptr and ve_id that can be directly
|
||||
// translated to a VA space. In order to minimize translations, we sort the
|
||||
// entries by instance_ptr, va_space and notification address in that order.
|
||||
static void preprocess_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
static void preprocess_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
if (!batch_context->virt.is_single_instance_ptr) {
|
||||
sort(batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
sizeof(*batch_context->virt.notifications),
|
||||
cmp_sort_virt_notifications_by_instance_ptr,
|
||||
if (!batch_context->is_single_instance_ptr) {
|
||||
sort(batch_context->notifications,
|
||||
batch_context->num_notifications,
|
||||
sizeof(*batch_context->notifications),
|
||||
cmp_sort_notifications_by_instance_ptr,
|
||||
NULL);
|
||||
}
|
||||
|
||||
translate_virt_notifications_instance_ptrs(parent_gpu, batch_context);
|
||||
translate_notifications_instance_ptrs(parent_gpu, batch_context);
|
||||
|
||||
sort(batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
sizeof(*batch_context->virt.notifications),
|
||||
cmp_sort_virt_notifications_by_va_space_gpu_address,
|
||||
sort(batch_context->notifications,
|
||||
batch_context->num_notifications,
|
||||
sizeof(*batch_context->notifications),
|
||||
cmp_sort_notifications_by_va_space_gpu_address,
|
||||
NULL);
|
||||
}
|
||||
|
||||
// GPA notifications provide a physical address and an aperture. Sort
|
||||
// accesses by aperture to try to coalesce operations on the same target
|
||||
// processor.
|
||||
static void preprocess_phys_notifications(uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
if (!batch_context->phys.is_single_aperture) {
|
||||
sort(batch_context->phys.notifications,
|
||||
batch_context->phys.num_notifications,
|
||||
sizeof(*batch_context->phys.notifications),
|
||||
cmp_sort_phys_notifications_by_processor_id,
|
||||
NULL);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *parent_gpu,
|
||||
static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_access_counter_buffer_t *access_counters,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_entries,
|
||||
NvU32 flags)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
uvm_gpu_t *gpu = uvm_parent_gpu_find_first_valid_gpu(parent_gpu);
|
||||
NV_STATUS status = NV_OK;
|
||||
|
||||
@ -999,19 +987,20 @@ static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *pare
|
||||
NvU32 i;
|
||||
|
||||
for (i = 0; i < num_entries; i++)
|
||||
uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
|
||||
uvm_tools_broadcast_access_counter(gpu, notification_start[i]);
|
||||
}
|
||||
|
||||
UVM_ASSERT(!(flags & UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR));
|
||||
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR)
|
||||
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
|
||||
status = access_counter_clear_notifications(gpu, access_counters, notification_start, num_entries);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_access_counter_buffer_t *access_counters,
|
||||
NvU64 base,
|
||||
uvm_access_counter_buffer_entry_t **notification_start,
|
||||
NvU32 num_entries,
|
||||
@ -1023,12 +1012,8 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
|
||||
if (uvm_enable_builtin_tests) {
|
||||
NvU32 i;
|
||||
|
||||
for (i = 0; i < num_entries; i++) {
|
||||
uvm_tools_record_access_counter(va_space,
|
||||
gpu->id,
|
||||
notification_start[i],
|
||||
flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
|
||||
}
|
||||
for (i = 0; i < num_entries; i++)
|
||||
uvm_tools_record_access_counter(va_space, gpu->id, notification_start[i]);
|
||||
}
|
||||
|
||||
if (flags & UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR) {
|
||||
@ -1042,7 +1027,7 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
|
||||
NvU32 end_index;
|
||||
|
||||
for (end_index = i; end_index < num_entries; end_index++) {
|
||||
NvU32 mask_index = (notification_start[end_index]->address.address - base) / PAGE_SIZE;
|
||||
NvU32 mask_index = (notification_start[end_index]->address - base) / PAGE_SIZE;
|
||||
|
||||
if (!uvm_page_mask_test(migrated_mask, mask_index))
|
||||
break;
|
||||
@ -1050,6 +1035,7 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
|
||||
|
||||
if (end_index > start_index) {
|
||||
status = access_counter_clear_notifications(gpu,
|
||||
access_counters,
|
||||
¬ification_start[start_index],
|
||||
end_index - start_index);
|
||||
if (status != NV_OK)
|
||||
@ -1062,7 +1048,7 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
|
||||
else if (flags & UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) {
|
||||
UVM_ASSERT(!base);
|
||||
UVM_ASSERT(!migrated_mask);
|
||||
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
|
||||
status = access_counter_clear_notifications(gpu, access_counters, notification_start, num_entries);
|
||||
}
|
||||
|
||||
return status;
|
||||
@ -1242,162 +1228,6 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
|
||||
return status;
|
||||
}
|
||||
|
||||
static void reverse_mappings_to_va_block_page_mask(uvm_va_block_t *va_block,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
uvm_page_mask_t *page_mask)
|
||||
{
|
||||
NvU32 index;
|
||||
|
||||
UVM_ASSERT(page_mask);
|
||||
|
||||
if (num_reverse_mappings > 0)
|
||||
UVM_ASSERT(reverse_mappings);
|
||||
|
||||
uvm_page_mask_zero(page_mask);
|
||||
|
||||
// Populate the mask of accessed pages within the VA Block
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
const uvm_reverse_map_t *reverse_map = &reverse_mappings[index];
|
||||
uvm_va_block_region_t region = reverse_map->region;
|
||||
|
||||
UVM_ASSERT(reverse_map->va_block == va_block);
|
||||
|
||||
// The VA Block could have been split since we obtained the reverse
|
||||
// mappings. Clamp the region to the current VA block size, to handle
|
||||
// the case in which it was split.
|
||||
region.outer = min(region.outer, (uvm_page_index_t)uvm_va_block_num_cpu_pages(va_block));
|
||||
region.first = min(region.first, region.outer);
|
||||
|
||||
uvm_page_mask_region_fill(page_mask, region);
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS service_phys_single_va_block(uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
uvm_gpu_t *gpu = current_entry->gpu;
|
||||
size_t index;
|
||||
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
struct mm_struct *mm = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
|
||||
gpu->id: UVM_ID_CPU;
|
||||
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
|
||||
UVM_ASSERT(num_reverse_mappings > 0);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
va_space = uvm_va_block_get_va_space_maybe_dead(va_block);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (va_space) {
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
va_space_access_counters_info_t *va_space_access_counters;
|
||||
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
|
||||
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
|
||||
|
||||
// If an mm is registered with the VA space, we have to retain it
|
||||
// in order to lock it before locking the VA space.
|
||||
mm = uvm_va_space_mm_retain_lock(va_space);
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
// Re-check that the VA block is valid after taking the VA block lock.
|
||||
if (uvm_va_block_is_dead(va_block))
|
||||
goto done;
|
||||
|
||||
va_space_access_counters = va_space_access_counters_info_get(va_space);
|
||||
if (UVM_ID_IS_CPU(processor) && !atomic_read(&va_space_access_counters->params.enable_momc_migrations))
|
||||
goto done;
|
||||
|
||||
if (!UVM_ID_IS_CPU(processor) && !atomic_read(&va_space_access_counters->params.enable_mimc_migrations))
|
||||
goto done;
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
|
||||
service_context->num_retries = 0;
|
||||
|
||||
uvm_va_block_context_init(service_context->block_context, mm);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
uvm_hmm_migrate_begin_wait(va_block);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);
|
||||
|
||||
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
|
||||
&va_block_retry,
|
||||
service_va_block_locked(processor,
|
||||
va_block,
|
||||
&va_block_retry,
|
||||
service_context,
|
||||
accessed_pages));
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
if (uvm_va_block_is_hmm(va_block)) {
|
||||
uvm_hmm_migrate_finish(va_block);
|
||||
|
||||
// If the pages could not be migrated, no need to try again,
|
||||
// this is best effort only.
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED || status == NV_WARN_MISMATCHED_TARGET)
|
||||
status = NV_OK;
|
||||
}
|
||||
|
||||
if (status == NV_OK)
|
||||
*out_flags |= UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
}
|
||||
|
||||
done:
|
||||
if (va_space) {
|
||||
uvm_va_space_up_read(va_space);
|
||||
uvm_va_space_mm_release_unlock(va_space, mm);
|
||||
}
|
||||
|
||||
// Drop the refcounts taken by the reverse map translation routines
|
||||
for (index = 0; index < num_reverse_mappings; ++index)
|
||||
uvm_va_block_release(va_block);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
const uvm_reverse_map_t *reverse_mappings,
|
||||
size_t num_reverse_mappings,
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
size_t index;
|
||||
|
||||
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
NvU32 out_flags_local = 0;
|
||||
status = service_phys_single_va_block(batch_context,
|
||||
current_entry,
|
||||
reverse_mappings + index,
|
||||
1,
|
||||
&out_flags_local);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) == 0);
|
||||
*out_flags |= out_flags_local;
|
||||
}
|
||||
|
||||
// In the case of failure, drop the refcounts for the remaining reverse mappings
|
||||
while (++index < num_reverse_mappings)
|
||||
uvm_va_block_release(reverse_mappings[index].va_block);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// Iterate over all regions set in the given sub_granularity mask
|
||||
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) \
|
||||
for ((region_start) = find_first_bit(&(sub_granularity), (num_regions)), \
|
||||
@ -1406,189 +1236,6 @@ static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context
|
||||
(region_start) = find_next_bit(&(sub_granularity), (num_regions), (region_end) + 1), \
|
||||
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1))
|
||||
|
||||
|
||||
static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *reverse_mappings, size_t num_reverse_mappings)
|
||||
{
|
||||
size_t index;
|
||||
uvm_va_block_t *prev_va_block = NULL;
|
||||
|
||||
for (index = 0; index < num_reverse_mappings; ++index) {
|
||||
uvm_va_block_t *va_block = reverse_mappings[index].va_block;
|
||||
UVM_ASSERT(va_block);
|
||||
|
||||
if (prev_va_block && prev_va_block != va_block)
|
||||
return false;
|
||||
|
||||
prev_va_block = va_block;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// Service the given translation range. It will return the count of the reverse
|
||||
// mappings found during servicing in num_reverse_mappings, even if the function
|
||||
// doesn't return NV_OK.
|
||||
static NV_STATUS service_phys_notification_translation(uvm_gpu_t *resident_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
const uvm_gpu_access_counter_type_config_t *config,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry,
|
||||
NvU64 address,
|
||||
unsigned long sub_granularity,
|
||||
size_t *num_reverse_mappings,
|
||||
NvU32 *out_flags)
|
||||
{
|
||||
uvm_gpu_t *gpu = current_entry->gpu;
|
||||
NV_STATUS status;
|
||||
NvU32 region_start, region_end;
|
||||
|
||||
*num_reverse_mappings = 0;
|
||||
|
||||
// Get the reverse_map translations for all the regions set in the
|
||||
// sub_granularity field of the counter.
|
||||
for_each_sub_granularity_region(region_start,
|
||||
region_end,
|
||||
sub_granularity,
|
||||
config->sub_granularity_regions_per_translation) {
|
||||
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
|
||||
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
|
||||
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
|
||||
|
||||
// Obtain the virtual addresses of the pages within the reported
|
||||
// DMA range
|
||||
if (resident_gpu) {
|
||||
*num_reverse_mappings += uvm_pmm_gpu_phys_to_virt(&resident_gpu->pmm,
|
||||
local_address,
|
||||
local_translation_size,
|
||||
local_reverse_mappings);
|
||||
}
|
||||
else {
|
||||
*num_reverse_mappings += uvm_pmm_sysmem_mappings_dma_to_virt(&gpu->pmm_reverse_sysmem_mappings,
|
||||
local_address,
|
||||
local_translation_size,
|
||||
local_reverse_mappings,
|
||||
local_translation_size / PAGE_SIZE);
|
||||
}
|
||||
}
|
||||
|
||||
if (*num_reverse_mappings == 0)
|
||||
return NV_OK;
|
||||
|
||||
// Service all the translations
|
||||
if (are_reverse_mappings_on_single_block(batch_context->phys.translations, *num_reverse_mappings)) {
|
||||
status = service_phys_single_va_block(batch_context,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
out_flags);
|
||||
}
|
||||
else {
|
||||
status = service_phys_va_blocks(batch_context,
|
||||
current_entry,
|
||||
batch_context->phys.translations,
|
||||
*num_reverse_mappings,
|
||||
out_flags);
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_context_t *batch_context,
|
||||
uvm_access_counter_buffer_entry_t *current_entry)
|
||||
{
|
||||
NvU64 address;
|
||||
NvU64 translation_index;
|
||||
uvm_gpu_t *gpu = current_entry->gpu;
|
||||
uvm_parent_gpu_t *parent_gpu = gpu->parent;
|
||||
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
|
||||
uvm_access_counter_type_t counter_type = current_entry->counter_type;
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
|
||||
unsigned long sub_granularity;
|
||||
size_t total_reverse_mappings = 0;
|
||||
uvm_gpu_t *resident_gpu = NULL;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 flags = 0;
|
||||
|
||||
address = current_entry->address.address;
|
||||
UVM_ASSERT(address % config->translation_size == 0);
|
||||
sub_granularity = current_entry->sub_granularity;
|
||||
|
||||
if (config->rm.granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
|
||||
sub_granularity = 1;
|
||||
|
||||
if (UVM_ID_IS_GPU(current_entry->physical_info.resident_id)) {
|
||||
resident_gpu = uvm_gpu_get(current_entry->physical_info.resident_id);
|
||||
UVM_ASSERT(resident_gpu != NULL);
|
||||
|
||||
if (gpu != resident_gpu && uvm_parent_gpus_are_nvswitch_connected(gpu->parent, resident_gpu->parent)) {
|
||||
UVM_ASSERT(address >= resident_gpu->parent->nvswitch_info.fabric_memory_window_start);
|
||||
address -= resident_gpu->parent->nvswitch_info.fabric_memory_window_start;
|
||||
}
|
||||
|
||||
// On P9 systems, the CPU accesses the reserved heap on vidmem via
|
||||
// coherent NVLINK mappings. This can trigger notifications that
|
||||
// fall outside of the allocatable address range. We just drop
|
||||
// them.
|
||||
if (address >= resident_gpu->mem_info.max_allocatable_address)
|
||||
goto out;
|
||||
}
|
||||
|
||||
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
|
||||
size_t num_reverse_mappings;
|
||||
NvU32 out_flags_local = 0;
|
||||
status = service_phys_notification_translation(resident_gpu,
|
||||
batch_context,
|
||||
config,
|
||||
current_entry,
|
||||
address,
|
||||
sub_granularity,
|
||||
&num_reverse_mappings,
|
||||
&out_flags_local);
|
||||
total_reverse_mappings += num_reverse_mappings;
|
||||
|
||||
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) == 0);
|
||||
flags |= out_flags_local;
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
|
||||
address += config->translation_size;
|
||||
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
|
||||
}
|
||||
|
||||
if (uvm_enable_builtin_tests)
|
||||
flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
|
||||
|
||||
out:
|
||||
notify_tools_broadcast_and_process_flags(parent_gpu, ¤t_entry, 1, flags);
|
||||
return status;
|
||||
}
|
||||
|
||||
// TODO: Bug 2018899: Add statistics for dropped access counter notifications
|
||||
static NV_STATUS service_phys_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
{
|
||||
NvU32 i;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_can_use_physical_addresses);
|
||||
|
||||
preprocess_phys_notifications(batch_context);
|
||||
|
||||
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
|
||||
NV_STATUS status;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
|
||||
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
|
||||
continue;
|
||||
|
||||
status = service_phys_notification(batch_context, current_entry);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t processor,
|
||||
@ -1617,6 +1264,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
|
||||
|
||||
static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_va_block_t *va_block,
|
||||
const uvm_access_counter_buffer_t *access_counters,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_mask_t *accessed_pages,
|
||||
const uvm_access_counter_buffer_entry_t *current_entry)
|
||||
@ -1627,18 +1275,15 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_processor_id_t resident_id;
|
||||
uvm_page_index_t page_index;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
const uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters,
|
||||
UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
|
||||
config_granularity_to_bytes(config->rm.granularity, &granularity);
|
||||
config_granularity_to_bytes(access_counters->current_config.rm.granularity, &granularity);
|
||||
|
||||
// Granularities other than 2MB can only be enabled by UVM tests. Do nothing
|
||||
// in that case.
|
||||
if (granularity != UVM_PAGE_SIZE_2M)
|
||||
return;
|
||||
|
||||
addr = current_entry->address.address;
|
||||
addr = current_entry->address;
|
||||
|
||||
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
@ -1665,8 +1310,8 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
NvU32 region_start;
|
||||
NvU32 region_end;
|
||||
unsigned long sub_granularity = current_entry->sub_granularity;
|
||||
NvU32 num_regions = config->sub_granularity_regions_per_translation;
|
||||
NvU32 num_sub_pages = config->sub_granularity_region_size / PAGE_SIZE;
|
||||
NvU32 num_regions = access_counters->current_config.sub_granularity_regions_per_translation;
|
||||
NvU32 num_sub_pages = access_counters->current_config.sub_granularity_region_size / PAGE_SIZE;
|
||||
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, NUMA_NO_NODE);
|
||||
|
||||
UVM_ASSERT(num_sub_pages >= 1);
|
||||
@ -1686,12 +1331,12 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
}
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
static NV_STATUS service_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_access_counter_buffer_t *access_counters,
|
||||
uvm_va_block_t *va_block,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
NvU32 i;
|
||||
NvU32 flags = 0;
|
||||
@ -1699,12 +1344,13 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
NV_STATUS flags_status;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->notifications;
|
||||
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(index < batch_context->virt.num_notifications);
|
||||
UVM_ASSERT(index < batch_context->num_notifications);
|
||||
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
@ -1714,15 +1360,16 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
for (i = index; i < batch_context->virt.num_notifications; i++) {
|
||||
for (i = index; i < batch_context->num_notifications; i++) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
NvU64 address = current_entry->address.address;
|
||||
NvU64 address = current_entry->address;
|
||||
|
||||
if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address > va_block->end)
|
||||
if (current_entry->va_space != va_space || current_entry->gpu != gpu || address > va_block->end)
|
||||
break;
|
||||
|
||||
expand_notification_block(gpu_va_space,
|
||||
va_block,
|
||||
access_counters,
|
||||
batch_context->block_service_context.block_context,
|
||||
accessed_pages,
|
||||
current_entry);
|
||||
@ -1733,6 +1380,8 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
// Atleast one notification should have been processed.
|
||||
UVM_ASSERT(index < *out_index);
|
||||
|
||||
batch_context->block_service_context.access_counters_buffer_index = access_counters->index;
|
||||
|
||||
status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
@ -1742,6 +1391,7 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
|
||||
flags_status = notify_tools_and_process_flags(va_space,
|
||||
gpu,
|
||||
access_counters,
|
||||
0,
|
||||
¬ifications[index],
|
||||
*out_index - index,
|
||||
@ -1754,11 +1404,11 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
static NV_STATUS service_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
|
||||
NvU32 i;
|
||||
@ -1770,15 +1420,16 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct vm_area_struct *vma = NULL;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
|
||||
uvm_access_counter_buffer_entry_t **notifications = batch_context->notifications;
|
||||
|
||||
UVM_ASSERT(index < batch_context->virt.num_notifications);
|
||||
UVM_ASSERT(index < batch_context->num_notifications);
|
||||
|
||||
uvm_assert_mmap_lock_locked(mm);
|
||||
uvm_assert_rwsem_locked(&va_space->lock);
|
||||
|
||||
address = notifications[index]->address.address;
|
||||
address = notifications[index]->address;
|
||||
|
||||
vma = find_vma_intersection(mm, address, address + 1);
|
||||
if (!vma) {
|
||||
@ -1786,6 +1437,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
// notifications when a new VMA is allocated in this range.
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
gpu,
|
||||
access_counters,
|
||||
0,
|
||||
¬ifications[index],
|
||||
1,
|
||||
@ -1800,11 +1452,11 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
uvm_page_mask_zero(&ats_context->access_counters.accessed_mask);
|
||||
|
||||
for (i = index; i < batch_context->virt.num_notifications; i++) {
|
||||
for (i = index; i < batch_context->num_notifications; i++) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
|
||||
address = current_entry->address.address;
|
||||
address = current_entry->address;
|
||||
|
||||
if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address >= end)
|
||||
if (current_entry->va_space != va_space || current_entry->gpu != gpu || address >= end)
|
||||
break;
|
||||
|
||||
uvm_page_mask_set(&ats_context->access_counters.accessed_mask, (address - base) / PAGE_SIZE);
|
||||
@ -1822,6 +1474,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
flags_status = notify_tools_and_process_flags(va_space,
|
||||
gpu,
|
||||
access_counters,
|
||||
base,
|
||||
¬ifications[index],
|
||||
*out_index - index,
|
||||
@ -1834,17 +1487,19 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_access_counter_service_batch_context_t *batch_context,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
// TODO: Bug 2018899: Add statistics for dropped access counter notifications
|
||||
static NV_STATUS service_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
|
||||
struct mm_struct *mm,
|
||||
uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
NvU32 *out_index)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_range_t *va_range;
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
|
||||
NvU64 address = current_entry->address.address;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->notifications[index];
|
||||
NvU64 address = current_entry->address;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
|
||||
@ -1864,26 +1519,32 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
uvm_va_range_managed_t *managed_range = uvm_va_range_to_managed_or_null(va_range);
|
||||
|
||||
if (managed_range) {
|
||||
size_t index = uvm_va_range_block_index(managed_range, address);
|
||||
size_t block_index = uvm_va_range_block_index(managed_range, address);
|
||||
|
||||
va_block = uvm_va_range_block(managed_range, index);
|
||||
va_block = uvm_va_range_block(managed_range, block_index);
|
||||
|
||||
// If the va_range is a managed range, the notification belongs to a
|
||||
// recently freed va_range if va_block is NULL. If va_block is not
|
||||
// NULL, service_virt_notifications_in_block will process flags.
|
||||
// NULL, service_notifications_in_block will process flags.
|
||||
// Clear the notification entry to continue receiving notifications
|
||||
// when a new va_range is allocated in that region.
|
||||
flags = UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
|
||||
}
|
||||
|
||||
if (va_block) {
|
||||
status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
|
||||
status = service_notifications_in_block(gpu_va_space,
|
||||
mm,
|
||||
access_counters,
|
||||
va_block,
|
||||
index,
|
||||
out_index);
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
gpu_va_space->gpu,
|
||||
access_counters,
|
||||
0,
|
||||
batch_context->virt.notifications,
|
||||
batch_context->notifications,
|
||||
1,
|
||||
flags,
|
||||
NULL);
|
||||
@ -1891,7 +1552,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
}
|
||||
}
|
||||
else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
|
||||
status = service_virt_notification_ats(gpu_va_space, mm, batch_context, index, out_index);
|
||||
status = service_notification_ats(gpu_va_space, mm, access_counters, index, out_index);
|
||||
}
|
||||
else {
|
||||
NvU32 flags;
|
||||
@ -1918,8 +1579,9 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
// in the batch.
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
gpu_va_space->gpu,
|
||||
access_counters,
|
||||
0,
|
||||
batch_context->virt.notifications,
|
||||
batch_context->notifications,
|
||||
1,
|
||||
flags,
|
||||
NULL);
|
||||
@ -1930,8 +1592,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_access_counter_service_batch_context_t *batch_context)
|
||||
static NV_STATUS service_notifications(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
NvU32 i = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -1940,24 +1601,25 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_va_space_t *prev_va_space = NULL;
|
||||
uvm_gpu_t *prev_gpu = NULL;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
|
||||
|
||||
// TODO: Bug 4299018 : Add support for virtual access counter migrations on
|
||||
// 4K page sizes.
|
||||
if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
|
||||
return notify_tools_broadcast_and_process_flags(parent_gpu,
|
||||
batch_context->virt.notifications,
|
||||
batch_context->virt.num_notifications,
|
||||
return notify_tools_broadcast_and_process_flags(access_counters,
|
||||
batch_context->notifications,
|
||||
batch_context->num_notifications,
|
||||
0);
|
||||
}
|
||||
|
||||
preprocess_virt_notifications(parent_gpu, batch_context);
|
||||
preprocess_notifications(parent_gpu, batch_context);
|
||||
|
||||
while (i < batch_context->virt.num_notifications) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
|
||||
va_space = current_entry->virtual_info.va_space;
|
||||
while (i < batch_context->num_notifications) {
|
||||
uvm_access_counter_buffer_entry_t *current_entry = batch_context->notifications[i];
|
||||
va_space = current_entry->va_space;
|
||||
|
||||
if (va_space != prev_va_space) {
|
||||
|
||||
// New va_space detected, drop locks of the old va_space.
|
||||
if (prev_va_space) {
|
||||
uvm_va_space_up_read(prev_va_space);
|
||||
@ -1983,13 +1645,14 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
|
||||
if (gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
|
||||
status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
|
||||
status = service_notifications_batch(gpu_va_space, mm, access_counters, i, &i);
|
||||
}
|
||||
else {
|
||||
status = notify_tools_and_process_flags(va_space,
|
||||
current_entry->gpu,
|
||||
access_counters,
|
||||
0,
|
||||
&batch_context->virt.notifications[i],
|
||||
&batch_context->notifications[i],
|
||||
1,
|
||||
0,
|
||||
NULL);
|
||||
@ -1997,7 +1660,10 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
}
|
||||
}
|
||||
else {
|
||||
status = notify_tools_broadcast_and_process_flags(parent_gpu, &batch_context->virt.notifications[i], 1, 0);
|
||||
status = notify_tools_broadcast_and_process_flags(access_counters,
|
||||
&batch_context->notifications[i],
|
||||
1,
|
||||
0);
|
||||
i++;
|
||||
}
|
||||
|
||||
@ -2013,42 +1679,46 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
|
||||
return status;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_access_counter_service_batch_context_t *batch_context = &parent_gpu->access_counter_buffer_info.batch_service_context;
|
||||
uvm_access_counter_service_batch_context_t *batch_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
batch_context = &access_counters->batch_service_context;
|
||||
|
||||
if (parent_gpu->access_counter_buffer_info.notifications_ignored_count > 0)
|
||||
if (access_counters->notifications_ignored_count > 0)
|
||||
return;
|
||||
|
||||
while (1) {
|
||||
batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(parent_gpu,
|
||||
batch_context,
|
||||
batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(access_counters,
|
||||
NOTIFICATION_FETCH_MODE_BATCH_READY);
|
||||
if (batch_context->num_cached_notifications == 0)
|
||||
break;
|
||||
|
||||
++batch_context->batch_id;
|
||||
|
||||
if (batch_context->virt.num_notifications) {
|
||||
status = service_virt_notifications(parent_gpu, batch_context);
|
||||
if (batch_context->num_notifications) {
|
||||
status = service_notifications(access_counters);
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
if (batch_context->phys.num_notifications) {
|
||||
status = service_phys_notifications(parent_gpu, batch_context);
|
||||
if (status != NV_OK)
|
||||
if (uvm_enable_builtin_tests) {
|
||||
if (access_counters->test.sleep_per_iteration_us) {
|
||||
usleep_range(access_counters->test.sleep_per_iteration_us,
|
||||
access_counters->test.sleep_per_iteration_us * 2);
|
||||
}
|
||||
|
||||
if (access_counters->test.one_iteration_per_batch)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (status != NV_OK) {
|
||||
UVM_DBG_PRINT("Error %s servicing access counter notifications on GPU: %s\n",
|
||||
UVM_DBG_PRINT("Error %s servicing access counter notifications on GPU: %s notif buf index: %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
}
|
||||
|
||||
@ -2069,7 +1739,6 @@ NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS
|
||||
uvm_va_space_down_read(va_space);
|
||||
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
|
||||
if (gpu->parent == parent_gpu)
|
||||
continue;
|
||||
|
||||
@ -2081,25 +1750,28 @@ NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS
|
||||
uvm_va_space_up_read(va_space);
|
||||
|
||||
for_each_gpu_in_mask(gpu, retained_gpus) {
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
if (!gpu->parent->access_counters_supported)
|
||||
continue;
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
|
||||
notif_buf_index);
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
|
||||
// Access counters not enabled. Nothing to clear
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count) {
|
||||
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
// Access counters are not enabled. Nothing to clear.
|
||||
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count) {
|
||||
status = access_counter_clear_all(gpu, access_counters);
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
}
|
||||
|
||||
status = access_counter_clear_all(gpu);
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
|
||||
if (status != NV_OK)
|
||||
break;
|
||||
}
|
||||
|
||||
for_each_gpu_in_mask(gpu, retained_gpus)
|
||||
@ -2121,21 +1793,12 @@ static NV_STATUS access_counters_config_from_test_params(const UVM_TEST_RECONFIG
|
||||
if (params->threshold == 0 || params->threshold > g_uvm_access_counters_threshold_max)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if (config_granularity_to_bytes(params->mimc_granularity, &tracking_size) != NV_OK)
|
||||
if (config_granularity_to_bytes(params->granularity, &tracking_size) != NV_OK)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if (config_granularity_to_bytes(params->momc_granularity, &tracking_size) != NV_OK)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
// Since values for granularity/use limit are shared between tests and
|
||||
// nv_uvm_types.h, the value will be checked in the call to
|
||||
// nvUvmInterfaceEnableAccessCntr
|
||||
config->mimcGranularity = params->mimc_granularity;
|
||||
config->momcGranularity = params->momc_granularity;
|
||||
|
||||
config->mimcUseLimit = params->mimc_use_limit;
|
||||
config->momcUseLimit = params->momc_use_limit;
|
||||
|
||||
// Since values for granularity are shared between tests and nv_uvm_types.h,
|
||||
// the value will be checked in the call to nvUvmInterfaceEnableAccessCntr
|
||||
config->granularity = params->granularity;
|
||||
config->threshold = params->threshold;
|
||||
|
||||
return NV_OK;
|
||||
@ -2145,7 +1808,40 @@ bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space)
|
||||
{
|
||||
va_space_access_counters_info_t *va_space_access_counters = va_space_access_counters_info_get(va_space);
|
||||
|
||||
return atomic_read(&va_space_access_counters->params.enable_mimc_migrations);
|
||||
return atomic_read(&va_space_access_counters->enable_migrations);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_access_counters_init(void)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU64 granularity_bytes = 0;
|
||||
|
||||
if (uvm_perf_access_counter_threshold < UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN) {
|
||||
g_default_config.threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN;
|
||||
UVM_INFO_PRINT("Value %u too small for uvm_perf_access_counter_threshold, using %u instead\n",
|
||||
uvm_perf_access_counter_threshold,
|
||||
g_default_config.threshold);
|
||||
}
|
||||
else if (uvm_perf_access_counter_threshold > UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX) {
|
||||
g_default_config.threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX;
|
||||
UVM_INFO_PRINT("Value %u too large for uvm_perf_access_counter_threshold, using %u instead\n",
|
||||
uvm_perf_access_counter_threshold,
|
||||
g_default_config.threshold);
|
||||
}
|
||||
else {
|
||||
g_default_config.threshold = uvm_perf_access_counter_threshold;
|
||||
}
|
||||
|
||||
status = config_granularity_to_bytes(g_default_config.granularity, &granularity_bytes);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
|
||||
UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_access_counters_exit(void)
|
||||
{
|
||||
}
|
||||
|
||||
NV_STATUS uvm_perf_access_counters_init(void)
|
||||
@ -2203,14 +1899,110 @@ NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_E
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_reconfigure_access_counters_notif_buffer(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params,
|
||||
uvm_gpu_t *gpu,
|
||||
uvm_va_space_t *va_space,
|
||||
UvmGpuAccessCntrConfig *config,
|
||||
uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_space_t *va_space_reconfiguration_owner;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
if (params->max_batch_size > access_counters->max_notifications)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
// ISR lock ensures that we own GET/PUT registers. It disables
|
||||
// interrupts and ensures that no other thread (nor the top half) will
|
||||
// be able to re-enable interrupts during reconfiguration.
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
|
||||
if (!uvm_processor_mask_test(&va_space->registered_gpus, gpu->id)) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
// Unregistration already started. Fail to avoid an interleaving in
|
||||
// which access counters end up been enabled on an unregistered GPU:
|
||||
// (thread 0) uvm_va_space_unregister_gpu disables access counters.
|
||||
// (thread 1) assuming no VA space lock is held yet by the
|
||||
// unregistration, this function enables access counters and
|
||||
// runs to completion, returning NV_OK.
|
||||
// (thread 0) uvm_va_space_unregister_gpu takes the VA space lock and
|
||||
// completes the unregistration.
|
||||
if (uvm_processor_mask_test(&va_space->gpu_unregister_in_progress, gpu->id)) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
va_space_reconfiguration_owner = access_counters->test.reconfiguration_owner;
|
||||
|
||||
// If any other VA space has reconfigured access counters on this GPU,
|
||||
// return error to avoid overwriting its configuration.
|
||||
if (va_space_reconfiguration_owner && (va_space_reconfiguration_owner != va_space)) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
status = gpu_access_counters_enable(gpu, access_counters, config);
|
||||
|
||||
if (status != NV_OK)
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
UVM_ASSERT_MSG(gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count > 0,
|
||||
"notif buf index: %u\n",
|
||||
notif_buf_index);
|
||||
|
||||
// Disable counters, and renable with the new configuration.
|
||||
// Note that we are yielding ownership even when the access counters are
|
||||
// enabled in at least gpu. This inconsistent state is not visible to
|
||||
// other threads or VA spaces because of the ISR lock, and it is
|
||||
// immediately rectified by retaking ownership.
|
||||
access_counters_yield_ownership(gpu->parent, notif_buf_index);
|
||||
status = access_counters_take_ownership(gpu, notif_buf_index, config);
|
||||
|
||||
// Retaking ownership failed, so RM owns the interrupt.
|
||||
if (status != NV_OK) {
|
||||
// The state of any other VA space with access counters enabled is
|
||||
// corrupt
|
||||
// TODO: Bug 2419290: Fail reconfiguration if access
|
||||
// counters are enabled on a different VA space.
|
||||
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count > 1) {
|
||||
UVM_ASSERT_MSG(status == NV_OK,
|
||||
"Access counters interrupt still owned by RM, other VA spaces may experience failures");
|
||||
}
|
||||
|
||||
access_counters_disable(access_counters);
|
||||
goto exit_unlock;
|
||||
}
|
||||
|
||||
access_counters->test.reconfiguration_owner = va_space;
|
||||
|
||||
if (params->max_batch_size)
|
||||
access_counters->max_batch_size = params->max_batch_size;
|
||||
access_counters->test.one_iteration_per_batch = params->one_iteration_per_batch;
|
||||
access_counters->test.sleep_per_iteration_us = params->sleep_per_iteration_us;
|
||||
|
||||
exit_unlock:
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
UvmGpuAccessCntrConfig config = {0};
|
||||
va_space_access_counters_info_t *va_space_access_counters;
|
||||
uvm_va_space_t *va_space_reconfiguration_owner;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
status = access_counters_config_from_test_params(params, &config);
|
||||
if (status != NV_OK)
|
||||
@ -2225,90 +2017,31 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
// ISR lock ensures that we own GET/PUT registers. It disables interrupts
|
||||
// and ensures that no other thread (nor the top half) will be able to
|
||||
// re-enable interrupts during reconfiguration.
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
|
||||
uvm_va_space_down_read_rm(va_space);
|
||||
|
||||
if (!uvm_processor_mask_test(&va_space->registered_gpus, gpu->id)) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto exit_isr_unlock;
|
||||
// a zero max_batch_size does not change the driver's behavior.
|
||||
if (params->max_batch_size < (NvU32)UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN && params->max_batch_size != 0) {
|
||||
status = NV_ERR_INVALID_ARGUMENT;
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
// Unregistration already started. Fail to avoid an interleaving in which
|
||||
// access counters end up been enabled on an unregistered GPU:
|
||||
// (thread 0) uvm_va_space_unregister_gpu disables access counters
|
||||
// (thread 1) assuming no VA space lock is held yet by the unregistration,
|
||||
// this function enables access counters and runs to completion,
|
||||
// returning NV_OK
|
||||
// (thread 0) uvm_va_space_unregister_gpu takes the VA space lock and
|
||||
// completes the unregistration
|
||||
if (uvm_processor_mask_test(&va_space->gpu_unregister_in_progress, gpu->id)) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto exit_isr_unlock;
|
||||
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
|
||||
notif_buf_index);
|
||||
status = test_reconfigure_access_counters_notif_buffer(params, gpu, va_space, &config, access_counters);
|
||||
if (status != NV_OK)
|
||||
goto exit_ac_lock;
|
||||
}
|
||||
|
||||
va_space_access_counters = va_space_access_counters_info_get(va_space);
|
||||
|
||||
va_space_reconfiguration_owner = gpu->parent->access_counter_buffer_info.reconfiguration_owner;
|
||||
|
||||
// If any other VA space has reconfigured access counters on this GPU,
|
||||
// return error to avoid overwriting its configuration.
|
||||
if (va_space_reconfiguration_owner && (va_space_reconfiguration_owner != va_space)) {
|
||||
status = NV_ERR_INVALID_STATE;
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
|
||||
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
|
||||
status = gpu_access_counters_enable(gpu, &config);
|
||||
|
||||
if (status == NV_OK)
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
else
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
|
||||
UVM_ASSERT(gpu->parent->isr.access_counters.handling_ref_count > 0);
|
||||
|
||||
// Disable counters, and renable with the new configuration.
|
||||
// Note that we are yielding ownership even when the access counters are
|
||||
// enabled in at least gpu. This inconsistent state is not visible to other
|
||||
// threads or VA spaces because of the ISR lock, and it is immediately
|
||||
// rectified by retaking ownership.
|
||||
access_counters_yield_ownership(gpu->parent);
|
||||
status = access_counters_take_ownership(gpu, &config);
|
||||
|
||||
// Retaking ownership failed, so RM owns the interrupt.
|
||||
if (status != NV_OK) {
|
||||
// The state of any other VA space with access counters enabled is
|
||||
// corrupt
|
||||
// TODO: Bug 2419290: Fail reconfiguration if access
|
||||
// counters are enabled on a different VA space.
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count > 1) {
|
||||
UVM_ASSERT_MSG(status == NV_OK,
|
||||
"Access counters interrupt still owned by RM, other VA spaces may experience failures");
|
||||
}
|
||||
|
||||
uvm_parent_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
parent_gpu_access_counters_disable(gpu->parent);
|
||||
goto exit_isr_unlock;
|
||||
}
|
||||
|
||||
gpu->parent->access_counter_buffer_info.reconfiguration_owner = va_space;
|
||||
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_va_space_down_write(va_space);
|
||||
atomic_set(&va_space_access_counters->params.enable_mimc_migrations, !!params->enable_mimc_migrations);
|
||||
atomic_set(&va_space_access_counters->params.enable_momc_migrations, !!params->enable_momc_migrations);
|
||||
va_space_access_counters = va_space_access_counters_info_get(va_space);
|
||||
atomic_set(&va_space_access_counters->enable_migrations, !!params->enable_migrations);
|
||||
uvm_va_space_up_write(va_space);
|
||||
|
||||
exit_isr_unlock:
|
||||
if (status != NV_OK)
|
||||
uvm_va_space_up_read_rm(va_space);
|
||||
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
exit_ac_lock:
|
||||
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
|
||||
|
||||
exit_release_gpu:
|
||||
uvm_gpu_release(gpu);
|
||||
@ -2320,17 +2053,12 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
uvm_access_counter_buffer_info_t *access_counters;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
if (params->mode >= UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX)
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
|
||||
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_TARGETED &&
|
||||
params->counter_type >= UVM_TEST_ACCESS_COUNTER_TYPE_MAX) {
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
|
||||
if (!gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
@ -2340,37 +2068,51 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
for (notif_buf_index = 0;
|
||||
notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount && status == NV_OK;
|
||||
notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
|
||||
notif_buf_index);
|
||||
|
||||
// Access counters not enabled. Nothing to reset
|
||||
if (gpu->parent->isr.access_counters.handling_ref_count == 0)
|
||||
goto exit_isr_unlock;
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
|
||||
access_counters = &gpu->parent->access_counter_buffer_info;
|
||||
// Access counters not enabled. Nothing to reset
|
||||
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0)
|
||||
goto exit_isr_unlock;
|
||||
|
||||
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
|
||||
status = access_counter_clear_all(gpu);
|
||||
}
|
||||
else {
|
||||
uvm_access_counter_buffer_entry_t entry = { 0 };
|
||||
uvm_access_counter_buffer_entry_t *notification = &entry;
|
||||
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
|
||||
status = access_counter_clear_all(gpu, access_counters);
|
||||
}
|
||||
else {
|
||||
uvm_access_counter_buffer_entry_t entry = { 0 };
|
||||
uvm_access_counter_buffer_entry_t *notification = &entry;
|
||||
|
||||
if (params->counter_type == UVM_TEST_ACCESS_COUNTER_TYPE_MIMC)
|
||||
entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MIMC;
|
||||
else
|
||||
entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MOMC;
|
||||
entry.bank = params->bank;
|
||||
entry.tag = params->tag;
|
||||
|
||||
entry.bank = params->bank;
|
||||
entry.tag = params->tag;
|
||||
status = access_counter_clear_notifications(gpu, access_counters, ¬ification, 1);
|
||||
}
|
||||
|
||||
status = access_counter_clear_notifications(gpu, ¬ification, 1);
|
||||
}
|
||||
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
if (status == NV_OK)
|
||||
status = uvm_tracker_wait(&access_counters->clear_tracker);
|
||||
|
||||
exit_isr_unlock:
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
// We only need to clear_all() once.
|
||||
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
|
||||
NvU32 i;
|
||||
|
||||
// Early exit of the main loop; since we only need to clear_all()
|
||||
// once. Check that all the remaining notification buffers have
|
||||
// access counters in same state.
|
||||
NvBool index0_state = (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0);
|
||||
for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++)
|
||||
UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state);
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
exit_release_gpu:
|
||||
uvm_gpu_release(gpu);
|
||||
@ -2381,39 +2123,44 @@ exit_release_gpu:
|
||||
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore)
|
||||
{
|
||||
bool change_intr_state = false;
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
if (!parent_gpu->access_counters_supported)
|
||||
return;
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu,
|
||||
notif_buf_index);
|
||||
uvm_access_counters_isr_lock(access_counters);
|
||||
|
||||
if (do_ignore) {
|
||||
if (parent_gpu->access_counter_buffer_info.notifications_ignored_count++ == 0)
|
||||
change_intr_state = true;
|
||||
if (do_ignore) {
|
||||
if (access_counters->notifications_ignored_count++ == 0)
|
||||
change_intr_state = true;
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(access_counters->notifications_ignored_count >= 1);
|
||||
if (--access_counters->notifications_ignored_count == 0)
|
||||
change_intr_state = true;
|
||||
}
|
||||
|
||||
if (change_intr_state) {
|
||||
// We need to avoid an interrupt storm while ignoring notifications.
|
||||
// We just disable the interrupt.
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (do_ignore)
|
||||
uvm_access_counters_intr_disable(access_counters);
|
||||
else
|
||||
uvm_access_counters_intr_enable(access_counters);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (!do_ignore)
|
||||
access_counter_buffer_flush_locked(access_counters, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
|
||||
}
|
||||
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
}
|
||||
else {
|
||||
UVM_ASSERT(parent_gpu->access_counter_buffer_info.notifications_ignored_count >= 1);
|
||||
if (--parent_gpu->access_counter_buffer_info.notifications_ignored_count == 0)
|
||||
change_intr_state = true;
|
||||
}
|
||||
|
||||
if (change_intr_state) {
|
||||
// We need to avoid an interrupt storm while ignoring notifications. We
|
||||
// just disable the interrupt.
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (do_ignore)
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
else
|
||||
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
if (!do_ignore)
|
||||
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
|
||||
}
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
|
||||
@ -2434,3 +2181,34 @@ NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTER
|
||||
uvm_gpu_release(gpu);
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *gpu = NULL;
|
||||
uvm_va_space_t *va_space = uvm_va_space_get(filp);
|
||||
NvU32 buffer_size;
|
||||
NvU32 index;
|
||||
|
||||
gpu = uvm_va_space_retain_gpu_by_uuid(va_space, ¶ms->gpu_uuid);
|
||||
if (!gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
if (!gpu->parent->access_counters_supported) {
|
||||
status = NV_ERR_NOT_SUPPORTED;
|
||||
goto exit_release_gpu;
|
||||
}
|
||||
|
||||
buffer_size = gpu->parent->access_counter_buffer[0].rm_info.bufferSize;
|
||||
|
||||
for (index = 1; index < gpu->parent->rm_info.accessCntrBufferCount; index++)
|
||||
UVM_ASSERT(gpu->parent->access_counter_buffer[index].rm_info.bufferSize == buffer_size);
|
||||
|
||||
params->num_notification_buffers = gpu->parent->rm_info.accessCntrBufferCount;
|
||||
params->num_notification_entries = buffer_size / gpu->parent->access_counter_buffer_hal->entry_size(gpu->parent);
|
||||
|
||||
exit_release_gpu:
|
||||
uvm_gpu_release(gpu);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -27,11 +27,11 @@
|
||||
#include "uvm_forward_decl.h"
|
||||
#include "uvm_test_ioctl.h"
|
||||
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
|
||||
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
|
||||
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
@ -46,17 +46,23 @@ void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
|
||||
//
|
||||
// When uningoring, the interrupt conditions will be re-evaluated to trigger
|
||||
// processing of buffered notifications, if any exist.
|
||||
//
|
||||
// All parent_gpu's notifications buffers are affected.
|
||||
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);
|
||||
|
||||
// Return whether the VA space has access counter migrations enabled. The
|
||||
// caller must ensure that the VA space cannot go away.
|
||||
bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);
|
||||
|
||||
// Global perf initialization/cleanup functions
|
||||
// Global access counters initialization/cleanup functions.
|
||||
NV_STATUS uvm_access_counters_init(void);
|
||||
void uvm_access_counters_exit(void);
|
||||
|
||||
// Global perf initialization/cleanup functions.
|
||||
NV_STATUS uvm_perf_access_counters_init(void);
|
||||
void uvm_perf_access_counters_exit(void);
|
||||
|
||||
// VA space Initialization/cleanup functions. See comments in
|
||||
// VA space initialization/cleanup functions. See comments in
|
||||
// uvm_perf_heuristics.h
|
||||
NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
|
||||
void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
|
||||
@ -72,17 +78,18 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
|
||||
// counters are currently enabled. The hardware notifications and interrupts on
|
||||
// the GPU are enabled the first time any VA space invokes
|
||||
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
|
||||
// uvm_parent_gpu_access_counters_disable().
|
||||
// uvm_gpu_access_counters_disable().
|
||||
//
|
||||
// Locking: the VA space lock must not be held by the caller since these
|
||||
// functions may take the access counters ISR lock.
|
||||
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
|
||||
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
|
||||
|
||||
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
|
||||
struct file *filp);
|
||||
NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
|
||||
|
||||
#endif // __UVM_GPU_ACCESS_COUNTERS_H__
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -154,62 +154,73 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
|
||||
return 1;
|
||||
}
|
||||
|
||||
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
|
||||
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
|
||||
{
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
|
||||
// On Volta, accessCntrBufferCount is > 0, but we don't support access
|
||||
// counters in UVM (access_counters_supported is cleared during HAL
|
||||
// initialization.) This check prevents the top-half from accessing
|
||||
// unallocated memory.
|
||||
if (!parent_gpu->access_counters_supported)
|
||||
return 0;
|
||||
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
return 0;
|
||||
|
||||
if (!parent_gpu->isr.access_counters.handling_ref_count)
|
||||
if (!parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count)
|
||||
return 0;
|
||||
|
||||
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
|
||||
if (down_trylock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem) != 0)
|
||||
return 0;
|
||||
|
||||
if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
|
||||
up(&parent_gpu->isr.access_counters.service_lock.sem);
|
||||
if (!uvm_parent_gpu_access_counters_pending(parent_gpu, notif_buf_index)) {
|
||||
up(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem);
|
||||
return 0;
|
||||
}
|
||||
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
|
||||
// Interrupts need to be disabled to avoid an interrupt storm
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);
|
||||
|
||||
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
|
||||
&parent_gpu->isr.access_counters.bottom_half_q_item);
|
||||
&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);
|
||||
|
||||
return 1;
|
||||
}
|
||||
|
||||
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
|
||||
// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
|
||||
// did, back to RM, via the return code:
|
||||
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and
|
||||
// UVM is given a chance to handle the interrupt, before most of the RM
|
||||
// processing. UVM communicates what it did, back to RM, via the return code:
|
||||
//
|
||||
// NV_OK:
|
||||
// UVM handled an interrupt.
|
||||
//
|
||||
// NV_WARN_MORE_PROCESSING_REQUIRED:
|
||||
// UVM did not schedule a bottom half, because it was unable to get the locks it
|
||||
// needed, but there is still UVM work to be done. RM will return "not handled" to the
|
||||
// Linux kernel, *unless* RM handled other faults in its top half. In that case, the
|
||||
// fact that UVM did not handle its interrupt is lost. However, life and interrupt
|
||||
// processing continues anyway: the GPU will soon raise another interrupt, because
|
||||
// that's what it does when there are replayable page faults remaining (GET != PUT in
|
||||
// the fault buffer).
|
||||
// UVM did not schedule a bottom half, because it was unable to get the
|
||||
// locks it needed, but there is still UVM work to be done. RM will
|
||||
// return "not handled" to the Linux kernel, *unless* RM handled other
|
||||
// faults in its top half. In that case, the fact that UVM did not
|
||||
// handle its interrupt is lost. However, life and interrupt processing
|
||||
// continues anyway: the GPU will soon raise another interrupt, because
|
||||
// that's what it does when there are replayable page faults remaining
|
||||
// (GET != PUT in the fault buffer).
|
||||
//
|
||||
// NV_ERR_NO_INTR_PENDING:
|
||||
// UVM did not find any work to do. Currently this is handled in RM in exactly the same
|
||||
// way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
|
||||
// available for the future. RM's interrupt handling tends to evolve as new chips and
|
||||
// new interrupts get created.
|
||||
// UVM did not find any work to do. Currently this is handled in RM in
|
||||
// exactly the same way as NV_WARN_MORE_PROCESSING_REQUIRED is handled.
|
||||
// However, the extra precision is available for the future. RM's
|
||||
// interrupt handling tends to evolve as new chips and new interrupts
|
||||
// get created.
|
||||
|
||||
static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu;
|
||||
unsigned num_handlers_scheduled = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
NvU32 i;
|
||||
|
||||
if (!in_interrupt() && in_atomic()) {
|
||||
// Early-out if we're not in interrupt context, but memory allocations
|
||||
@ -243,14 +254,16 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
|
||||
nv_kref_get(&parent_gpu->gpu_kref);
|
||||
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
|
||||
|
||||
// Now that we got a GPU object, lock it so that it can't be removed without us noticing.
|
||||
// Now that we got a GPU object, lock it so that it can't be removed without
|
||||
// us noticing.
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
++parent_gpu->isr.interrupt_count;
|
||||
|
||||
num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
|
||||
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
|
||||
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++)
|
||||
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu, i);
|
||||
|
||||
if (num_handlers_scheduled == 0) {
|
||||
if (parent_gpu->isr.is_suspended)
|
||||
@ -288,6 +301,55 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
|
||||
return errno_to_nv_status(nv_kthread_q_init(queue, name));
|
||||
}
|
||||
|
||||
static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_context_t *block_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
|
||||
uvm_sema_init(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, 1, UVM_LOCK_ORDER_ISR);
|
||||
|
||||
status = uvm_parent_gpu_init_access_counters(parent_gpu, notif_buf_index);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s, notif buf index: %u\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
notif_buf_index);
|
||||
return status;
|
||||
}
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
|
||||
block_context;
|
||||
|
||||
nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
|
||||
access_counters_isr_bottom_half_entry,
|
||||
&parent_gpu->access_counter_buffer[notif_buf_index]);
|
||||
|
||||
// Access counters interrupts are initially disabled. They are
|
||||
// dynamically enabled when the GPU is registered on a VA space.
|
||||
parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count = 0;
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc_stats_cpu)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count =
|
||||
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count) *
|
||||
num_possible_cpus());
|
||||
if (!parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
@ -316,7 +378,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
|
||||
parent_gpu->fault_buffer.replayable.block_service_context.block_context = block_context;
|
||||
|
||||
parent_gpu->isr.replayable_faults.handling = true;
|
||||
|
||||
@ -344,7 +406,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
|
||||
parent_gpu->fault_buffer.non_replayable.block_service_context.block_context = block_context;
|
||||
|
||||
parent_gpu->isr.non_replayable_faults.handling = true;
|
||||
|
||||
@ -361,32 +423,31 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
status = uvm_parent_gpu_init_access_counters(parent_gpu);
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
|
||||
nvstatusToString(status),
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
return status;
|
||||
NvU32 index_count = parent_gpu->rm_info.accessCntrBufferCount;
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
UVM_ASSERT(index_count > 0);
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
|
||||
index_count);
|
||||
if (!parent_gpu->access_counter_buffer)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->isr.access_counters = uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters) * index_count);
|
||||
if (!parent_gpu->isr.access_counters)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < index_count; notif_buf_index++) {
|
||||
status = uvm_isr_init_access_counters(parent_gpu, notif_buf_index);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
block_context = uvm_va_block_context_alloc(NULL);
|
||||
if (!block_context)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
|
||||
block_context;
|
||||
|
||||
nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
|
||||
access_counters_isr_bottom_half_entry,
|
||||
parent_gpu);
|
||||
|
||||
// Access counters interrupts are initially disabled. They are
|
||||
// dynamically enabled when the GPU is registered on a VA space.
|
||||
parent_gpu->isr.access_counters.handling_ref_count = 0;
|
||||
parent_gpu->isr.access_counters.stats.cpu_exec_count =
|
||||
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
|
||||
if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
}
|
||||
|
||||
@ -401,7 +462,15 @@ void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
if (parent_gpu->isr.access_counters) {
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
|
||||
"notif buf index: %u\n",
|
||||
notif_buf_index);
|
||||
}
|
||||
}
|
||||
|
||||
// Now that the GPU is safely out of the global table, lock the GPU and mark
|
||||
// it as no longer handling interrupts so the top half knows not to schedule
|
||||
@ -459,24 +528,38 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
|
||||
}
|
||||
|
||||
if (parent_gpu->access_counters_supported) {
|
||||
// It is safe to deinitialize access counters even if they have not been
|
||||
// successfully initialized.
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu);
|
||||
block_context =
|
||||
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
NvU32 notif_buf_index;
|
||||
|
||||
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
|
||||
// It is safe to deinitialize access counters even if they have not
|
||||
// been successfully initialized.
|
||||
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
|
||||
|
||||
if (parent_gpu->access_counter_buffer) {
|
||||
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
|
||||
block_context = access_counter->batch_service_context.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
}
|
||||
|
||||
if (parent_gpu->isr.access_counters)
|
||||
uvm_kvfree(parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.access_counters);
|
||||
uvm_kvfree(parent_gpu->access_counter_buffer);
|
||||
}
|
||||
|
||||
if (parent_gpu->non_replayable_faults_supported) {
|
||||
block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
|
||||
block_context = parent_gpu->fault_buffer.non_replayable.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
block_context = parent_gpu->fault_buffer.replayable.block_service_context.block_context;
|
||||
uvm_va_block_context_free(block_context);
|
||||
|
||||
uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
|
||||
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
|
||||
uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
|
||||
}
|
||||
|
||||
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
|
||||
@ -584,25 +667,29 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)
|
||||
|
||||
static void access_counters_isr_bottom_half(void *args)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
|
||||
uvm_access_counter_buffer_t *access_counters = (uvm_access_counter_buffer_t *)args;
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
unsigned int cpu;
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_supported);
|
||||
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
|
||||
|
||||
uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
|
||||
uvm_record_lock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
|
||||
|
||||
// Multiple bottom halves for counter notifications can be running
|
||||
// concurrently, but only one can be running this function for a given GPU
|
||||
// since we enter with the access_counters_isr_lock held.
|
||||
// concurrently, but only one per-notification-buffer (i.e.,
|
||||
// notif_buf_index) can be running this function for a given GPU since we
|
||||
// enter with the per-notification-buffer access_counters_isr_lock held.
|
||||
cpu = get_cpu();
|
||||
++parent_gpu->isr.access_counters.stats.bottom_half_count;
|
||||
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
|
||||
++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
|
||||
++parent_gpu->isr.access_counters[notif_buf_index].stats.bottom_half_count;
|
||||
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters[notif_buf_index].stats.cpus_used_mask);
|
||||
++parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count[cpu];
|
||||
put_cpu();
|
||||
|
||||
uvm_parent_gpu_service_access_counters(parent_gpu);
|
||||
uvm_service_access_counters(access_counters);
|
||||
|
||||
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
|
||||
uvm_access_counters_isr_unlock(access_counters);
|
||||
|
||||
uvm_parent_gpu_kref_put(parent_gpu);
|
||||
}
|
||||
@ -725,7 +812,7 @@ void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
// clear_replayable_faults is a no-op for architectures that don't
|
||||
// support pulse-based interrupts.
|
||||
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
|
||||
parent_gpu->fault_buffer_info.replayable.cached_get);
|
||||
parent_gpu->fault_buffer.replayable.cached_get);
|
||||
}
|
||||
|
||||
// This unlock call has to be out-of-order unlock due to interrupts_lock
|
||||
@ -751,37 +838,41 @@ void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gp
|
||||
uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
// See comments in uvm_parent_gpu_replayable_faults_isr_lock
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
|
||||
uvm_access_counters_intr_disable(access_counters);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_down(&parent_gpu->isr.access_counters.service_lock);
|
||||
uvm_down(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
uvm_access_counter_buffer_hal_t *ac_hal = parent_gpu->access_counter_buffer_hal;
|
||||
|
||||
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
|
||||
|
||||
// See comments in uvm_parent_gpu_replayable_faults_isr_unlock
|
||||
|
||||
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
|
||||
uvm_access_counters_intr_enable(access_counters);
|
||||
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
|
||||
parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
|
||||
parent_gpu->access_counter_buffer_info.cached_get);
|
||||
}
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
|
||||
ac_hal->clear_access_counter_notifications(access_counters, access_counters->cached_get);
|
||||
|
||||
// This unlock call has to be out-of-order unlock due to interrupts_lock
|
||||
// still being held. Otherwise, it would result in a lock order violation.
|
||||
uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
|
||||
uvm_up_out_of_order(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
|
||||
|
||||
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
|
||||
}
|
||||
@ -806,8 +897,11 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren
|
||||
parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
|
||||
// The read of handling_ref_count could race with a write from
|
||||
@ -815,24 +909,27 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
|
||||
// ISR lock. But those functions are invoked with the interrupt disabled
|
||||
// (disable_intr_ref_count > 0), so the check always returns false when the
|
||||
// race occurs
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters);
|
||||
}
|
||||
|
||||
++parent_gpu->isr.access_counters.disable_intr_ref_count;
|
||||
++parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
|
||||
}
|
||||
|
||||
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
|
||||
NvU32 notif_buf_index = access_counters->index;
|
||||
|
||||
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
|
||||
UVM_ASSERT(parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count > 0);
|
||||
|
||||
--parent_gpu->isr.access_counters.disable_intr_ref_count;
|
||||
--parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
|
||||
|
||||
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
|
||||
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
|
||||
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
|
||||
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters);
|
||||
}
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -70,8 +70,8 @@ typedef struct
|
||||
|
||||
struct
|
||||
{
|
||||
// Number of the bottom-half invocations for this interrupt on a GPU over
|
||||
// its lifetime
|
||||
// Number of the bottom-half invocations for this interrupt on a GPU
|
||||
// over its lifetime.
|
||||
NvU64 bottom_half_count;
|
||||
|
||||
// A bitmask of the CPUs on which the bottom half has executed. The
|
||||
@ -110,20 +110,20 @@ typedef struct
|
||||
// bottom-half per interrupt type.
|
||||
nv_kthread_q_t bottom_half_q;
|
||||
|
||||
// Protects the state of interrupts (enabled/disabled) and whether the GPU is
|
||||
// currently handling them. Taken in both interrupt and process context.
|
||||
// Protects the state of interrupts (enabled/disabled) and whether the GPU
|
||||
// is currently handling them. Taken in both interrupt and process context.
|
||||
uvm_spinlock_irqsave_t interrupts_lock;
|
||||
|
||||
uvm_intr_handler_t replayable_faults;
|
||||
uvm_intr_handler_t non_replayable_faults;
|
||||
uvm_intr_handler_t access_counters;
|
||||
uvm_intr_handler_t *access_counters;
|
||||
|
||||
// Kernel thread used to kill channels on fatal non-replayable faults.
|
||||
// This is needed because we cannot call into RM from the bottom-half to
|
||||
// avoid deadlocks.
|
||||
nv_kthread_q_t kill_channel_q;
|
||||
|
||||
// Number of top-half ISRs called for this GPU over its lifetime
|
||||
// Number of top-half ISRs called for this GPU over its lifetime.
|
||||
NvU64 interrupt_count;
|
||||
} uvm_isr_info_t;
|
||||
|
||||
@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
|
||||
// Initialize ISR handling state
|
||||
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Flush any currently scheduled bottom halves. This is called during GPU
|
||||
// Flush any currently scheduled bottom halves. This is called during GPU
|
||||
// removal.
|
||||
void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
|
||||
// half thread. This will also disable replayable page fault interrupts (if
|
||||
// half thread. This will also disable replayable page fault interrupts (if
|
||||
// supported by the GPU) because the top half attempts to take this lock, and we
|
||||
// would cause an interrupt storm if we didn't disable them first.
|
||||
//
|
||||
@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
|
||||
// re-enable replayable page fault interrupts. Unlike
|
||||
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
|
||||
// re-enable replayable page fault interrupts. Unlike
|
||||
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
|
||||
// non-top/bottom half threads, this can be called by any thread.
|
||||
void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// Lock/unlock routines for non-replayable faults. These do not need to prevent
|
||||
// interrupt storms since the GPU fault buffers for non-replayable faults are
|
||||
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
|
||||
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
|
||||
// under the parent need to have been previously retained.
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
// See uvm_parent_gpu_replayable_faults_isr_lock/unlock
|
||||
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
// Increments the reference count tracking whether access counter interrupts
|
||||
// should be disabled. The caller is guaranteed that access counter interrupts
|
||||
// are disabled upon return. Interrupts might already be disabled prior to
|
||||
// making this call. Each call is ref-counted, so this must be paired with a
|
||||
// call to uvm_parent_gpu_access_counters_intr_enable().
|
||||
// call to uvm_access_counters_intr_enable().
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
// Decrements the reference count tracking whether access counter interrupts
|
||||
// should be disabled. Only once the count reaches 0 are the HW interrupts
|
||||
// actually enabled, so this call does not guarantee that the interrupts have
|
||||
// been re-enabled upon return.
|
||||
//
|
||||
// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
|
||||
// calling this function.
|
||||
// uvm_access_counters_intr_disable() must have been called prior to calling
|
||||
// this function.
|
||||
//
|
||||
// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
|
||||
// the interrupt.
|
||||
//
|
||||
// parent_gpu->isr.interrupts_lock must be held to call this function.
|
||||
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);
|
||||
|
||||
// Return the first valid GPU given the parent GPU or NULL if no MIG instances
|
||||
// are registered. This should only be called from bottom halves or if the
|
||||
// g_uvm_global.global_lock is held so that the returned pointer remains valid.
|
||||
//
|
||||
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
#endif // __UVM_GPU_ISR_H__
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -119,18 +119,18 @@
|
||||
// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
|
||||
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
|
||||
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||||
|
||||
non_replayable_faults->shadow_buffer_copy = NULL;
|
||||
non_replayable_faults->fault_cache = NULL;
|
||||
|
||||
non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
|
||||
non_replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize /
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
|
||||
non_replayable_faults->shadow_buffer_copy =
|
||||
uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
|
||||
uvm_kvmalloc_zero(parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize);
|
||||
if (!non_replayable_faults->shadow_buffer_copy)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
@ -147,7 +147,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_
|
||||
|
||||
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
|
||||
if (non_replayable_faults->fault_cache) {
|
||||
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
|
||||
@ -170,7 +170,7 @@ bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);
|
||||
|
||||
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
|
||||
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
|
||||
&has_pending_faults);
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
@ -182,14 +182,14 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
|
||||
NV_STATUS status;
|
||||
NvU32 i;
|
||||
NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
|
||||
uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
|
||||
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
|
||||
|
||||
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
|
||||
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
|
||||
current_hw_entry,
|
||||
cached_faults);
|
||||
|
||||
@ -267,7 +267,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
|
||||
uvm_gpu_t *gpu = user_channel->gpu;
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
|
||||
@ -355,7 +355,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t new_residency;
|
||||
bool read_duplicate;
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
|
||||
const uvm_va_policy_t *policy;
|
||||
|
||||
UVM_ASSERT(!fault_entry->is_fatal);
|
||||
@ -450,7 +450,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
|
||||
NV_STATUS status, tracker_status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
uvm_gpu_t *gpu = fault_entry->gpu;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.non_replayable.block_service_context;
|
||||
|
||||
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
|
||||
service_context->num_retries = 0;
|
||||
@ -467,7 +467,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
|
||||
service_context,
|
||||
hmm_migratable));
|
||||
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
|
||||
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer.non_replayable.fault_service_tracker,
|
||||
&va_block->tracker);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
@ -507,7 +507,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
|
||||
{
|
||||
uvm_va_space_t *va_space = fault_entry->va_space;
|
||||
uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
|
||||
(fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
|
||||
|
||||
@ -551,7 +551,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
|
||||
{
|
||||
uvm_va_space_t *va_space = gpu_va_space->va_space;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
|
||||
NV_STATUS status = lookup_status;
|
||||
NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
|
||||
@ -649,7 +649,7 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
|
||||
struct mm_struct *mm;
|
||||
uvm_gpu_va_space_t *gpu_va_space;
|
||||
uvm_gpu_t *gpu;
|
||||
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
|
||||
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
|
||||
uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;
|
||||
|
||||
status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
|
||||
@ -757,7 +757,7 @@ exit_no_channel:
|
||||
static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
|
||||
{
|
||||
uvm_service_block_context_t *service_context =
|
||||
&parent_gpu->fault_buffer_info.non_replayable.block_service_context;
|
||||
&parent_gpu->fault_buffer.non_replayable.block_service_context;
|
||||
NV_STATUS status;
|
||||
bool hmm_migratable = true;
|
||||
|
||||
@ -794,7 +794,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
|
||||
// non-replayable faults since getting multiple faults on the same
|
||||
// memory region is not very likely
|
||||
for (i = 0; i < cached_faults; ++i) {
|
||||
status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
|
||||
status = service_fault(parent_gpu, &parent_gpu->fault_buffer.non_replayable.fault_cache[i]);
|
||||
if (status != NV_OK)
|
||||
return;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -119,7 +119,7 @@ module_param(uvm_perf_fault_coalesce, uint, S_IRUGO);
|
||||
// the power management resume path.
|
||||
static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
|
||||
// Read the current get/put pointers, as this might not be the first time
|
||||
// we take control of the fault buffer since the GPU was initialized,
|
||||
@ -129,7 +129,7 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
|
||||
|
||||
// (Re-)enable fault prefetching
|
||||
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (parent_gpu->fault_buffer.prefetch_faults_enabled)
|
||||
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
|
||||
else
|
||||
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
|
||||
@ -140,28 +140,28 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize %
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.rm_info.replayable.bufferSize %
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu) == 0);
|
||||
|
||||
replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize /
|
||||
replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.replayable.bufferSize /
|
||||
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
|
||||
|
||||
// Check provided module parameter value
|
||||
parent_gpu->fault_buffer_info.max_batch_size = max(uvm_perf_fault_batch_count,
|
||||
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
|
||||
parent_gpu->fault_buffer_info.max_batch_size = min(parent_gpu->fault_buffer_info.max_batch_size,
|
||||
replayable_faults->max_faults);
|
||||
parent_gpu->fault_buffer.max_batch_size = max(uvm_perf_fault_batch_count,
|
||||
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
|
||||
parent_gpu->fault_buffer.max_batch_size = min(parent_gpu->fault_buffer.max_batch_size,
|
||||
replayable_faults->max_faults);
|
||||
|
||||
if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
|
||||
pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_batch_count,
|
||||
UVM_PERF_FAULT_BATCH_COUNT_MIN,
|
||||
replayable_faults->max_faults,
|
||||
parent_gpu->fault_buffer_info.max_batch_size);
|
||||
if (parent_gpu->fault_buffer.max_batch_size != uvm_perf_fault_batch_count) {
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_batch_count,
|
||||
UVM_PERF_FAULT_BATCH_COUNT_MIN,
|
||||
replayable_faults->max_faults,
|
||||
parent_gpu->fault_buffer.max_batch_size);
|
||||
}
|
||||
|
||||
batch_context->fault_cache = uvm_kvmalloc_zero(replayable_faults->max_faults * sizeof(*batch_context->fault_cache));
|
||||
@ -198,22 +198,22 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
UVM_PERF_FAULT_REPLAY_POLICY_DEFAULT;
|
||||
|
||||
if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
|
||||
pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_policy,
|
||||
replayable_faults->replay_policy);
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_policy,
|
||||
replayable_faults->replay_policy);
|
||||
}
|
||||
|
||||
replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
|
||||
if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
|
||||
pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_update_put_ratio,
|
||||
replayable_faults->replay_update_put_ratio);
|
||||
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
|
||||
uvm_parent_gpu_name(parent_gpu),
|
||||
uvm_perf_fault_replay_update_put_ratio,
|
||||
replayable_faults->replay_update_put_ratio);
|
||||
}
|
||||
|
||||
// Re-enable fault prefetching just in case it was disabled in a previous run
|
||||
parent_gpu->fault_buffer_info.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
|
||||
parent_gpu->fault_buffer.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
|
||||
|
||||
fault_buffer_reinit_replayable_faults(parent_gpu);
|
||||
|
||||
@ -222,7 +222,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
|
||||
|
||||
static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
|
||||
|
||||
if (batch_context->fault_cache) {
|
||||
@ -230,9 +230,9 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
uvm_tracker_deinit(&replayable_faults->replay_tracker);
|
||||
}
|
||||
|
||||
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
|
||||
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
|
||||
// Re-enable prefetch faults in case we disabled them
|
||||
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer.prefetch_faults_enabled)
|
||||
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
|
||||
@ -252,7 +252,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceInitFaultInfo(parent_gpu->rm_device,
|
||||
&parent_gpu->fault_buffer_info.rm_info));
|
||||
&parent_gpu->fault_buffer.rm_info));
|
||||
if (status != NV_OK) {
|
||||
UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
|
||||
nvstatusToString(status),
|
||||
@ -262,7 +262,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
|
||||
// when it returns an error. Set the buffer handle to zero as it is
|
||||
// used by the deinitialization logic to determine if it was correctly
|
||||
// initialized.
|
||||
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
|
||||
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
|
||||
goto fail;
|
||||
}
|
||||
|
||||
@ -304,24 +304,25 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
fault_buffer_deinit_replayable_faults(parent_gpu);
|
||||
|
||||
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
|
||||
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
|
||||
status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE));
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device,
|
||||
&parent_gpu->fault_buffer_info.rm_info));
|
||||
&parent_gpu->fault_buffer.rm_info));
|
||||
|
||||
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
|
||||
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
|
||||
}
|
||||
}
|
||||
|
||||
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
|
||||
// Fast path 1: we left some faults unserviced in the buffer in the last pass
|
||||
// Fast path 1: we left some faults unserviced in the buffer in the last
|
||||
// pass
|
||||
if (replayable_faults->cached_get != replayable_faults->cached_put)
|
||||
return true;
|
||||
|
||||
@ -357,7 +358,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
|
||||
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer.replayable.replay_tracker;
|
||||
|
||||
UVM_ASSERT(tracker != NULL);
|
||||
|
||||
@ -443,7 +444,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
|
||||
uvm_gpu_t *gpu = fault_entry->gpu;
|
||||
uvm_gpu_phys_address_t pdb;
|
||||
uvm_push_t push;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
NvU64 offset;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
@ -505,7 +506,7 @@ static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_push_t push;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_tracker_t *tracker = NULL;
|
||||
|
||||
if (batch_context)
|
||||
@ -556,7 +557,7 @@ static NV_STATUS push_replay_on_parent_gpu(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
{
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
|
||||
@ -589,7 +590,7 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_f
|
||||
return NV_OK;
|
||||
|
||||
is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
|
||||
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer.rm_info, is_flush_mode_move);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
|
||||
@ -618,7 +619,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 get;
|
||||
NvU32 put;
|
||||
uvm_spin_loop_t spin;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
NV_STATUS status;
|
||||
|
||||
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
|
||||
@ -852,7 +853,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
uvm_fault_buffer_entry_t *fault_cache;
|
||||
uvm_spin_loop_t spin;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
const bool in_pascal_cancel_path = (!parent_gpu->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
|
||||
const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;
|
||||
|
||||
@ -887,7 +888,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
|
||||
|
||||
// Parse until get != put and have enough space to cache.
|
||||
while ((get != put) &&
|
||||
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer_info.max_batch_size)) {
|
||||
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer.max_batch_size)) {
|
||||
bool is_same_instance_ptr = true;
|
||||
uvm_fault_buffer_entry_t *current_entry = &fault_cache[fault_index];
|
||||
uvm_fault_utlb_info_t *current_tlb;
|
||||
@ -1385,7 +1386,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
|
||||
uvm_page_index_t last_page_index;
|
||||
NvU32 page_fault_count = 0;
|
||||
uvm_range_group_range_iter_t iter;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
|
||||
uvm_fault_buffer_entry_t *first_fault_entry = ordered_fault_cache[first_fault_index];
|
||||
uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
|
||||
@ -1612,7 +1613,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
|
||||
NV_STATUS status;
|
||||
uvm_va_block_retry_t va_block_retry;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_service_block_context_t *fault_block_context = &replayable_faults->block_service_context;
|
||||
|
||||
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
|
||||
@ -1803,7 +1804,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
bool replay_per_va_block =
|
||||
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
|
||||
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
|
||||
|
||||
UVM_ASSERT(vma);
|
||||
|
||||
@ -1851,8 +1852,8 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
|
||||
|
||||
page_index = (fault_address - sub_batch_base) / PAGE_SIZE;
|
||||
|
||||
// Do not check for coalesced access type. If there are multiple different
|
||||
// accesses to an address, we can disregard the prefetch one.
|
||||
// Do not check for coalesced access type. If there are multiple
|
||||
// different accesses to an address, we can disregard the prefetch one.
|
||||
if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
|
||||
(uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
|
||||
uvm_page_mask_set(prefetch_only_fault_mask, page_index);
|
||||
@ -1956,7 +1957,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_gpu_t *gpu = gpu_va_space->gpu;
|
||||
uvm_va_block_context_t *va_block_context =
|
||||
gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
|
||||
gpu->parent->fault_buffer.replayable.block_service_context.block_context;
|
||||
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
|
||||
struct mm_struct *mm = va_block_context->mm;
|
||||
NvU64 fault_address = current_entry->fault_address;
|
||||
@ -1985,7 +1986,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
|
||||
NvU64 outer = ~0ULL;
|
||||
|
||||
UVM_ASSERT(replay_per_va_block ==
|
||||
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
|
||||
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
|
||||
|
||||
// Limit outer to the minimum of next va_range.start and first
|
||||
// fault_address' next UVM_GMMU_ATS_GRANULARITY alignment so that it's
|
||||
@ -2046,8 +2047,8 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
|
||||
uvm_gpu_t *gpu = batch_context->fatal_gpu;
|
||||
uvm_gpu_va_space_t *gpu_va_space = NULL;
|
||||
struct mm_struct *mm;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
|
||||
UVM_ASSERT(va_space);
|
||||
@ -2155,7 +2156,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
|
||||
++i;
|
||||
}
|
||||
else {
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer.replayable.ats_invalidate;
|
||||
NvU32 block_faults;
|
||||
const bool hmm_migratable = true;
|
||||
|
||||
@ -2236,12 +2237,12 @@ static NV_STATUS service_fault_batch(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 i;
|
||||
uvm_va_space_t *va_space = NULL;
|
||||
uvm_gpu_va_space_t *prev_gpu_va_space = NULL;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer_info.replayable.ats_invalidate;
|
||||
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer.replayable.ats_invalidate;
|
||||
struct mm_struct *mm = NULL;
|
||||
const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
|
||||
parent_gpu->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
parent_gpu->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
|
||||
uvm_service_block_context_t *service_context =
|
||||
&parent_gpu->fault_buffer_info.replayable.block_service_context;
|
||||
&parent_gpu->fault_buffer.replayable.block_service_context;
|
||||
uvm_va_block_context_t *va_block_context = service_context->block_context;
|
||||
bool hmm_migratable = true;
|
||||
|
||||
@ -2711,8 +2712,9 @@ static void cancel_fault_batch(uvm_parent_gpu_t *parent_gpu,
|
||||
// 5- Fetch all faults from buffer
|
||||
// 6- Check what uTLBs are in lockdown mode and can be cancelled
|
||||
// 7- Preprocess faults (order per va_space, fault address, access type)
|
||||
// 8- Service all non-fatal faults and mark all non-serviceable faults as fatal
|
||||
// 6.1- If fatal faults are not found, we are done
|
||||
// 8- Service all non-fatal faults and mark all non-serviceable faults as
|
||||
// fatal.
|
||||
// 8.1- If fatal faults are not found, we are done
|
||||
// 9- Search for a uTLB which can be targeted for cancel, as described in
|
||||
// try_to_cancel_utlbs. If found, cancel it.
|
||||
// END LOOP
|
||||
@ -2726,14 +2728,14 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
{
|
||||
NV_STATUS status;
|
||||
NV_STATUS tracker_status;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
|
||||
bool first = true;
|
||||
|
||||
UVM_ASSERT(gpu->parent->replayable_faults_supported);
|
||||
|
||||
// 1) Disable prefetching to avoid new requests keep coming and flooding
|
||||
// the buffer
|
||||
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
|
||||
gpu->parent->arch_hal->disable_prefetch_faults(gpu->parent);
|
||||
|
||||
while (1) {
|
||||
@ -2847,7 +2849,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
|
||||
}
|
||||
|
||||
// 10) Re-enable prefetching
|
||||
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
|
||||
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
|
||||
gpu->parent->arch_hal->enable_prefetch_faults(gpu->parent);
|
||||
|
||||
if (status == NV_OK)
|
||||
@ -2884,16 +2886,16 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu,
|
||||
// comment in mark_fault_invalid_prefetch(..).
|
||||
// Some tests rely on this logic (and ratio) to correctly disable prefetch
|
||||
// fault reporting. If the logic changes, the tests will have to be changed.
|
||||
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled &&
|
||||
if (parent_gpu->fault_buffer.prefetch_faults_enabled &&
|
||||
uvm_perf_reenable_prefetch_faults_lapse_msec > 0 &&
|
||||
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer_info.max_batch_size * 2) ||
|
||||
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer.max_batch_size * 2) ||
|
||||
(uvm_enable_builtin_tests &&
|
||||
parent_gpu->rm_info.isSimulated &&
|
||||
batch_context->num_invalid_prefetch_faults > 5))) {
|
||||
uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
|
||||
}
|
||||
else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
|
||||
else if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
|
||||
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer.disable_prefetch_faults_timestamp;
|
||||
|
||||
// Reenable prefetch faults after some time
|
||||
if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
|
||||
@ -2907,7 +2909,7 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
NvU32 num_batches = 0;
|
||||
NvU32 num_throttled = 0;
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
|
||||
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
|
||||
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
|
||||
|
||||
UVM_ASSERT(parent_gpu->replayable_faults_supported);
|
||||
@ -3030,9 +3032,9 @@ void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
|
||||
if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
|
||||
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
|
||||
parent_gpu->fault_buffer_info.prefetch_faults_enabled = true;
|
||||
parent_gpu->fault_buffer.prefetch_faults_enabled = true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -3041,10 +3043,10 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
|
||||
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
|
||||
|
||||
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
|
||||
if (parent_gpu->fault_buffer.prefetch_faults_enabled) {
|
||||
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
|
||||
parent_gpu->fault_buffer_info.prefetch_faults_enabled = false;
|
||||
parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp = NV_GETTIME();
|
||||
parent_gpu->fault_buffer.prefetch_faults_enabled = false;
|
||||
parent_gpu->fault_buffer.disable_prefetch_faults_timestamp = NV_GETTIME();
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -217,7 +217,6 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
|
||||
.clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
|
||||
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
|
||||
.access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
|
||||
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
|
||||
.get_time = uvm_hal_maxwell_get_time,
|
||||
}
|
||||
@ -254,9 +253,6 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.replay_faults = uvm_hal_volta_replay_faults,
|
||||
.cancel_faults_va = uvm_hal_volta_cancel_faults_va,
|
||||
.clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
|
||||
.access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
|
||||
.access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
|
||||
.access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
|
||||
.semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
|
||||
}
|
||||
},
|
||||
@ -271,6 +267,8 @@ static uvm_hal_class_ops_t host_table[] =
|
||||
.tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
|
||||
.tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
|
||||
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
|
||||
.access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
|
||||
.access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -537,22 +535,19 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
|
||||
.u.access_counter_buffer_ops = {
|
||||
.enable_access_counter_notifications = uvm_hal_volta_enable_access_counter_notifications,
|
||||
.disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
|
||||
.clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
|
||||
.parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
|
||||
.entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
|
||||
.entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
|
||||
.entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
|
||||
}
|
||||
.u.access_counter_buffer_ops = {}
|
||||
},
|
||||
{
|
||||
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
|
||||
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
|
||||
.u.access_counter_buffer_ops = {
|
||||
.enable_access_counter_notifications = uvm_hal_turing_enable_access_counter_notifications,
|
||||
.disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
|
||||
.clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
|
||||
.parse_entry = uvm_hal_turing_access_counter_buffer_parse_entry,
|
||||
.entry_is_valid = uvm_hal_turing_access_counter_buffer_entry_is_valid,
|
||||
.entry_clear_valid = uvm_hal_turing_access_counter_buffer_entry_clear_valid,
|
||||
.entry_size = uvm_hal_turing_access_counter_buffer_entry_size,
|
||||
}
|
||||
},
|
||||
{
|
||||
@ -843,10 +838,8 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
// Computing.
|
||||
//
|
||||
// TODO: Bug 200692962: Add support for access counters in vGPU
|
||||
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled) {
|
||||
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
|
||||
parent_gpu->access_counters_supported = false;
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
@ -1042,36 +1035,15 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
|
||||
UVM_DBG_PRINT(" timestamp: %llu\n", entry->timestamp);
|
||||
}
|
||||
|
||||
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
|
||||
|
||||
switch (access_counter_type) {
|
||||
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
|
||||
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
|
||||
UVM_ENUM_STRING_DEFAULT();
|
||||
}
|
||||
}
|
||||
|
||||
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
|
||||
{
|
||||
if (!entry->address.is_virtual) {
|
||||
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
|
||||
entry->address.address,
|
||||
uvm_aperture_string(entry->address.aperture));
|
||||
}
|
||||
else {
|
||||
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
|
||||
entry->virtual_info.instance_ptr.address,
|
||||
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
|
||||
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
|
||||
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);
|
||||
}
|
||||
|
||||
UVM_DBG_PRINT(" is_virtual %u\n", entry->address.is_virtual);
|
||||
UVM_DBG_PRINT(" counter_type %s\n", uvm_access_counter_type_string(entry->counter_type));
|
||||
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address);
|
||||
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
|
||||
entry->instance_ptr.address,
|
||||
uvm_aperture_string(entry->instance_ptr.aperture));
|
||||
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->mmu_engine_type));
|
||||
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->mmu_engine_id);
|
||||
UVM_DBG_PRINT(" ve_id %u\n", entry->ve_id);
|
||||
UVM_DBG_PRINT(" counter_value %u\n", entry->counter_value);
|
||||
UVM_DBG_PRINT(" subgranularity 0x%08x\n", entry->sub_granularity);
|
||||
UVM_DBG_PRINT(" bank %u\n", entry->bank);
|
||||
|
@ -686,54 +686,52 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
|
||||
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);
|
||||
|
||||
// Access counters
|
||||
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
|
||||
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
|
||||
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters, NvU32 get);
|
||||
|
||||
// Parse the entry on the given buffer index. This also clears the valid bit of
|
||||
// the entry in the buffer.
|
||||
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
|
||||
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index);
|
||||
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index);
|
||||
typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
|
||||
typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
|
||||
typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
|
||||
typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 get);
|
||||
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index);
|
||||
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index);
|
||||
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
|
||||
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
|
||||
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
|
||||
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
|
||||
|
||||
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
|
||||
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
|
||||
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
|
||||
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
|
||||
void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get);
|
||||
void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
|
||||
void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
|
||||
NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
|
||||
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
|
||||
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
// The source and destination addresses must be 16-byte aligned. Note that the
|
||||
// best performance is achieved with 256-byte alignment. The decrypt size must
|
||||
@ -786,7 +784,6 @@ struct uvm_host_hal_struct
|
||||
uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
|
||||
uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
|
||||
uvm_hal_access_counter_clear_all_t access_counter_clear_all;
|
||||
uvm_hal_access_counter_clear_type_t access_counter_clear_type;
|
||||
uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
|
||||
uvm_hal_get_time_t get_time;
|
||||
};
|
||||
|
@ -471,69 +471,34 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
|
||||
return max(membar_1, membar_2);
|
||||
}
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
|
||||
UVM_ACCESS_COUNTER_TYPE_MOMC,
|
||||
|
||||
UVM_ACCESS_COUNTER_TYPE_MAX,
|
||||
} uvm_access_counter_type_t;
|
||||
|
||||
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
|
||||
|
||||
struct uvm_access_counter_buffer_entry_struct
|
||||
{
|
||||
// Whether this counter refers to outbound accesses to remote GPUs or
|
||||
// sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
|
||||
// GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
|
||||
uvm_access_counter_type_t counter_type;
|
||||
|
||||
// Address of the region for which a notification was sent
|
||||
uvm_gpu_address_t address;
|
||||
NvU64 address;
|
||||
|
||||
union
|
||||
{
|
||||
// These fields are only valid if address.is_virtual is true
|
||||
struct
|
||||
{
|
||||
// Instance pointer of one of the channels in the TSG that triggered
|
||||
// the notification.
|
||||
uvm_gpu_phys_address_t instance_ptr;
|
||||
// Instance pointer of one of the channels in the TSG that triggered
|
||||
// the notification.
|
||||
uvm_gpu_phys_address_t instance_ptr;
|
||||
|
||||
uvm_mmu_engine_type_t mmu_engine_type;
|
||||
uvm_mmu_engine_type_t mmu_engine_type;
|
||||
|
||||
NvU32 mmu_engine_id;
|
||||
NvU32 mmu_engine_id;
|
||||
|
||||
// Identifier of the subcontext that performed the memory accesses
|
||||
// that triggered the notification. This value, combined with the
|
||||
// instance_ptr, is needed to obtain the GPU VA space of the process
|
||||
// that triggered the notification.
|
||||
NvU32 ve_id;
|
||||
// Identifier of the subcontext that performed the memory accesses
|
||||
// that triggered the notification. This value, combined with the
|
||||
// instance_ptr, is needed to obtain the GPU VA space of the process
|
||||
// that triggered the notification.
|
||||
NvU32 ve_id;
|
||||
|
||||
// VA space for the address that triggered the notification
|
||||
uvm_va_space_t *va_space;
|
||||
} virtual_info;
|
||||
// VA space for the address that triggered the notification
|
||||
uvm_va_space_t *va_space;
|
||||
|
||||
// These fields are only valid if address.is_virtual is false
|
||||
struct
|
||||
{
|
||||
// Processor id where data is resident
|
||||
//
|
||||
// Although this information is not tied to a VA space, we can use
|
||||
// a regular processor id because P2P is not allowed between
|
||||
// partitioned GPUs.
|
||||
uvm_processor_id_t resident_id;
|
||||
|
||||
} physical_info;
|
||||
};
|
||||
|
||||
// This is the GPU that triggered the notification. Note that physical
|
||||
// address based notifications are only supported on non-MIG-capable GPUs.
|
||||
// This is the GPU that triggered the notification.
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// Number of times the tracked region was accessed since the last time it
|
||||
// was cleared. Counter values saturate at the maximum value supported by
|
||||
// the GPU (2^16 - 1 in Volta)
|
||||
// the GPU (2^16 - 1 on Turing)
|
||||
NvU32 counter_value;
|
||||
|
||||
// When the granularity of the tracked regions is greater than 64KB, the
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -1602,7 +1602,7 @@ static NV_STATUS hmm_va_block_cpu_page_populate(uvm_va_block_t *va_block,
|
||||
return status;
|
||||
}
|
||||
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk, page_index);
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk);
|
||||
if (status != NV_OK) {
|
||||
uvm_cpu_chunk_remove_from_block(va_block, page_to_nid(page), page_index);
|
||||
uvm_cpu_chunk_free(chunk);
|
||||
|
@ -50,12 +50,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
|
||||
parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
|
||||
(sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Hopper covers 64 PB and that's the minimum
|
||||
@ -99,8 +97,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2020 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -111,13 +111,13 @@ void uvm_kvmalloc_exit(void)
|
||||
return;
|
||||
|
||||
if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
|
||||
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
||||
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
|
||||
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
|
||||
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
|
||||
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
||||
UVM_INFO_PRINT("Memory leak of %lu bytes detected.%s\n",
|
||||
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
|
||||
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
|
||||
" insmod with uvm_leak_checker=2 for detailed information." :
|
||||
"");
|
||||
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
||||
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
|
||||
|
||||
if (g_uvm_global.unload_state.ptr)
|
||||
*g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
|
||||
@ -129,12 +129,12 @@ void uvm_kvmalloc_exit(void)
|
||||
uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
|
||||
uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);
|
||||
|
||||
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX " Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
|
||||
uvm_kvsize((void *)((uintptr_t)info->node.key)),
|
||||
kbasename(info->file),
|
||||
info->line,
|
||||
info->function,
|
||||
info->node.key);
|
||||
UVM_INFO_PRINT(" Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
|
||||
uvm_kvsize((void *)((uintptr_t)info->node.key)),
|
||||
kbasename(info->file),
|
||||
info->line,
|
||||
info->function,
|
||||
info->node.key);
|
||||
|
||||
// Free so we don't keep eating up memory while debugging. Note that
|
||||
// this also removes the entry from the table, frees info, and drops
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -27,12 +27,13 @@
|
||||
|
||||
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
|
||||
{
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
|
||||
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37);
|
||||
|
||||
switch (lock_order) {
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ACCESS_COUNTERS);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ISR);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_MMAP_LOCK);
|
||||
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACES_LIST);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2022 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -69,6 +69,17 @@
|
||||
//
|
||||
// This should be taken whenever global GPU state might need to be modified.
|
||||
//
|
||||
// - Access counters VA space enablement state lock
|
||||
// Order: UVM_LOCK_ORDER_ACCESS_COUNTERS
|
||||
// Exclusive lock (mutex)
|
||||
//
|
||||
// This protects VA space state associated with access counters enablement.
|
||||
// Blackwell+ GPUs may have multiple access counters notification buffers
|
||||
// and their "atomic" enablement is protected by this lock.
|
||||
//
|
||||
// This should be taken whenever VA space access counters state might need
|
||||
// to be modified.
|
||||
//
|
||||
// - GPU ISR lock
|
||||
// Order: UVM_LOCK_ORDER_ISR
|
||||
// Exclusive lock (mutex) per gpu
|
||||
@ -487,6 +498,7 @@ typedef enum
|
||||
UVM_LOCK_ORDER_INVALID = 0,
|
||||
UVM_LOCK_ORDER_GLOBAL_PM,
|
||||
UVM_LOCK_ORDER_GLOBAL,
|
||||
UVM_LOCK_ORDER_ACCESS_COUNTERS,
|
||||
UVM_LOCK_ORDER_ISR,
|
||||
UVM_LOCK_ORDER_MMAP_LOCK,
|
||||
UVM_LOCK_ORDER_VA_SPACES_LIST,
|
||||
@ -742,7 +754,8 @@ bool __uvm_locking_initialized(void);
|
||||
ret; \
|
||||
})
|
||||
|
||||
// Helper for calling a UVM-RM interface function that returns void with lock recording
|
||||
// Helper for calling a UVM-RM interface function that returns void with lock
|
||||
// recording
|
||||
#define uvm_rm_locked_call_void(call) ({ \
|
||||
uvm_record_lock_rm_all(); \
|
||||
call; \
|
||||
|
@ -63,8 +63,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = false;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = false;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = false;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021 NVIDIA Corporation
|
||||
Copyright (c) 2021-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -24,25 +24,29 @@
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"enable_access_counter_notifications is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"enable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"disable_access_counter_notifications is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"disable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 get)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"clear_access_counter_notifications is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"clear_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
|
||||
@ -53,26 +57,31 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"access_counter_buffer_entry_is_valid is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
return false;
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"access_counter_buffer_entry_clear_valid is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
|
||||
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
UVM_ASSERT_MSG(false,
|
||||
"access_counter_buffer_parse_entry is not supported on GPU: %s.\n",
|
||||
uvm_parent_gpu_name(parent_gpu));
|
||||
"access_counter_buffer_parse_entry is not supported on GPU: %s notif buf index: %u.\n",
|
||||
uvm_parent_gpu_name(access_counters->parent_gpu),
|
||||
access_counters->index);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2022 NVIDIA Corporation
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -330,11 +330,6 @@ void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push)
|
||||
UVM_ASSERT_MSG(false, "host access_counter_clear_all called on Maxwell GPU\n");
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type)
|
||||
{
|
||||
UVM_ASSERT_MSG(false, "host access_counter_clear_type called on Maxwell GPU\n");
|
||||
}
|
||||
|
||||
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
|
@ -582,7 +582,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
|
||||
managed_range_last = managed_range;
|
||||
|
||||
// For UVM-Lite GPUs, the CUDA driver may suballocate a single
|
||||
// managed_range into many range groups. For this reason, we iterate
|
||||
// managed_range into many range groups. For this reason, we iterate
|
||||
// over each managed_range first then through the range groups within.
|
||||
uvm_range_group_for_each_migratability_in(&iter,
|
||||
va_space,
|
||||
@ -865,9 +865,9 @@ NV_STATUS uvm_migrate_init(void)
|
||||
else {
|
||||
g_uvm_perf_migrate_cpu_preunmap_size = UVM_VA_BLOCK_SIZE << UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT;
|
||||
|
||||
pr_info("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
|
||||
uvm_perf_migrate_cpu_preunmap_block_order,
|
||||
UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
|
||||
UVM_INFO_PRINT("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
|
||||
uvm_perf_migrate_cpu_preunmap_block_order,
|
||||
UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
|
||||
}
|
||||
}
|
||||
|
||||
@ -909,14 +909,13 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
|
||||
|
||||
if ((params->flags & UVM_MIGRATE_FLAGS_TEST_ALL) && !uvm_enable_builtin_tests) {
|
||||
UVM_INFO_PRINT("Test flag set for UVM_MIGRATE. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
|
||||
UVM_INFO_PRINT("TEMP\n");
|
||||
return NV_ERR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
gpus_to_check_for_nvlink_errors = uvm_processor_mask_cache_alloc();
|
||||
if (!gpus_to_check_for_nvlink_errors)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
|
||||
uvm_processor_mask_zero(gpus_to_check_for_nvlink_errors);
|
||||
|
||||
// mmap_lock will be needed if we have to create CPU mappings
|
||||
|
@ -90,9 +90,9 @@ NV_STATUS uvm_mmu_init(void)
|
||||
page_table_aperture = UVM_APERTURE_SYS;
|
||||
}
|
||||
else {
|
||||
pr_info("Invalid uvm_page_table_location %s. Using %s instead.\n",
|
||||
uvm_page_table_location,
|
||||
uvm_aperture_string(page_table_aperture));
|
||||
UVM_INFO_PRINT("Invalid uvm_page_table_location %s. Using %s instead.\n",
|
||||
uvm_page_table_location,
|
||||
uvm_aperture_string(page_table_aperture));
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -40,10 +40,10 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_pascal_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Pascal covers 128 TB and that's the minimum
|
||||
@ -92,8 +92,6 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = false;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = false;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = false;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -44,8 +44,8 @@ void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
volatile NvU32 *reg;
|
||||
NvU32 mask;
|
||||
|
||||
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnSet;
|
||||
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
|
||||
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnSet;
|
||||
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*reg, mask);
|
||||
}
|
||||
@ -55,33 +55,33 @@ void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
volatile NvU32 *reg;
|
||||
NvU32 mask;
|
||||
|
||||
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnClear;
|
||||
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
|
||||
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnClear;
|
||||
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*reg, mask);
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferPut);
|
||||
UVM_ASSERT(put < parent_gpu->fault_buffer_info.replayable.max_faults);
|
||||
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferPut);
|
||||
UVM_ASSERT(put < parent_gpu->fault_buffer.replayable.max_faults);
|
||||
|
||||
return put;
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet);
|
||||
UVM_ASSERT(get < parent_gpu->fault_buffer_info.replayable.max_faults);
|
||||
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet);
|
||||
UVM_ASSERT(get < parent_gpu->fault_buffer.replayable.max_faults);
|
||||
|
||||
return get;
|
||||
}
|
||||
|
||||
void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, index);
|
||||
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, index);
|
||||
}
|
||||
|
||||
static uvm_fault_access_type_t get_fault_access_type(const NvU32 *fault_entry)
|
||||
@ -189,9 +189,9 @@ static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
fault_buffer_entry_b069_t *buffer_start;
|
||||
NvU32 *fault_entry;
|
||||
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
|
||||
|
||||
buffer_start = (fault_buffer_entry_b069_t *)parent_gpu->fault_buffer_info.rm_info.replayable.bufferAddress;
|
||||
buffer_start = (fault_buffer_entry_b069_t *)parent_gpu->fault_buffer.rm_info.replayable.bufferAddress;
|
||||
fault_entry = (NvU32 *)&buffer_start[index];
|
||||
|
||||
return fault_entry;
|
||||
@ -205,10 +205,10 @@ static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t
|
||||
{
|
||||
UvmFaultMetadataPacket *fault_entry_metadata;
|
||||
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata;
|
||||
fault_entry_metadata = parent_gpu->fault_buffer.rm_info.replayable.bufferMetadata;
|
||||
UVM_ASSERT(fault_entry_metadata != NULL);
|
||||
|
||||
return fault_entry_metadata + index;
|
||||
@ -267,7 +267,7 @@ NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *p
|
||||
|
||||
// Compute global uTLB id
|
||||
utlb_id = buffer_entry->fault_source.gpc_id * parent_gpu->utlb_per_gpc_count + gpc_utlb_id;
|
||||
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer_info.replayable.utlb_count);
|
||||
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer.replayable.utlb_count);
|
||||
|
||||
buffer_entry->fault_source.utlb_id = utlb_id;
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2023 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -21,7 +21,6 @@
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
|
||||
// For Pascal, UVM page tree 'depth' maps to hardware as follows:
|
||||
//
|
||||
// UVM depth HW level VA bits
|
||||
@ -377,7 +376,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size)
|
||||
|
||||
static void mmu_set_prefetch_faults(uvm_parent_gpu_t *parent_gpu, bool enable)
|
||||
{
|
||||
volatile NvU32 *prefetch_ctrl = parent_gpu->fault_buffer_info.rm_info.replayable.pPrefetchCtrl;
|
||||
volatile NvU32 *prefetch_ctrl = parent_gpu->fault_buffer.rm_info.replayable.pPrefetchCtrl;
|
||||
|
||||
// A null prefetch control mapping indicates that UVM should toggle the
|
||||
// register's value using the RM API, instead of performing a direct access.
|
||||
@ -388,7 +387,7 @@ static void mmu_set_prefetch_faults(uvm_parent_gpu_t *parent_gpu, bool enable)
|
||||
// Computing.
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
status = nvUvmInterfaceTogglePrefetchFaults(&parent_gpu->fault_buffer_info.rm_info, (NvBool)enable);
|
||||
status = nvUvmInterfaceTogglePrefetchFaults(&parent_gpu->fault_buffer.rm_info, (NvBool)enable);
|
||||
|
||||
UVM_ASSERT(status == NV_OK);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -512,8 +512,9 @@ NV_STATUS uvm_perf_prefetch_init(void)
|
||||
g_uvm_perf_prefetch_threshold = uvm_perf_prefetch_threshold;
|
||||
}
|
||||
else {
|
||||
pr_info("Invalid value %u for uvm_perf_prefetch_threshold. Using %u instead\n",
|
||||
uvm_perf_prefetch_threshold, UVM_PREFETCH_THRESHOLD_DEFAULT);
|
||||
UVM_INFO_PRINT("Invalid value %u for uvm_perf_prefetch_threshold. Using %u instead\n",
|
||||
uvm_perf_prefetch_threshold,
|
||||
UVM_PREFETCH_THRESHOLD_DEFAULT);
|
||||
|
||||
g_uvm_perf_prefetch_threshold = UVM_PREFETCH_THRESHOLD_DEFAULT;
|
||||
}
|
||||
@ -523,8 +524,9 @@ NV_STATUS uvm_perf_prefetch_init(void)
|
||||
g_uvm_perf_prefetch_min_faults = uvm_perf_prefetch_min_faults;
|
||||
}
|
||||
else {
|
||||
pr_info("Invalid value %u for uvm_perf_prefetch_min_faults. Using %u instead\n",
|
||||
uvm_perf_prefetch_min_faults, UVM_PREFETCH_MIN_FAULTS_DEFAULT);
|
||||
UVM_INFO_PRINT("Invalid value %u for uvm_perf_prefetch_min_faults. Using %u instead\n",
|
||||
uvm_perf_prefetch_min_faults,
|
||||
UVM_PREFETCH_MIN_FAULTS_DEFAULT);
|
||||
|
||||
g_uvm_perf_prefetch_min_faults = UVM_PREFETCH_MIN_FAULTS_DEFAULT;
|
||||
}
|
||||
|
@ -338,28 +338,28 @@ static unsigned g_uvm_perf_thrashing_max_resets;
|
||||
// parameter _d. The user value is read from _v, and the final value is stored
|
||||
// in a variable named g_##_v, so it must be declared, too. Only unsigned
|
||||
// parameters are supported.
|
||||
#define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma) \
|
||||
do { \
|
||||
unsigned v = (_v); \
|
||||
unsigned d = (_d); \
|
||||
unsigned mi = (_mi); \
|
||||
unsigned ma = (_ma); \
|
||||
\
|
||||
BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned)); \
|
||||
BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v)); \
|
||||
\
|
||||
UVM_ASSERT(mi <= ma); \
|
||||
UVM_ASSERT(d >= mi); \
|
||||
UVM_ASSERT(d <= ma); \
|
||||
\
|
||||
if (v >= mi && v <= ma) { \
|
||||
g_##_v = v; \
|
||||
} \
|
||||
else { \
|
||||
pr_info("Invalid value %u for " #_v ". Using %u instead\n", v, d); \
|
||||
\
|
||||
g_##_v = d; \
|
||||
} \
|
||||
#define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma) \
|
||||
do { \
|
||||
unsigned v = (_v); \
|
||||
unsigned d = (_d); \
|
||||
unsigned mi = (_mi); \
|
||||
unsigned ma = (_ma); \
|
||||
\
|
||||
BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned)); \
|
||||
BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v)); \
|
||||
\
|
||||
UVM_ASSERT(mi <= ma); \
|
||||
UVM_ASSERT(d >= mi); \
|
||||
UVM_ASSERT(d <= ma); \
|
||||
\
|
||||
if (v >= mi && v <= ma) { \
|
||||
g_##_v = v; \
|
||||
} \
|
||||
else { \
|
||||
UVM_INFO_PRINT("Invalid value %u for " #_v ". Using %u instead\n", v, d); \
|
||||
\
|
||||
g_##_v = d; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define INIT_THRASHING_PARAMETER(v, d) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, UINT_MAX)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -31,21 +31,14 @@ static int uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
|
||||
module_param(uvm_cpu_chunk_allocation_sizes, uint, S_IRUGO | S_IWUSR);
|
||||
MODULE_PARM_DESC(uvm_cpu_chunk_allocation_sizes, "OR'ed value of all CPU chunk allocation sizes.");
|
||||
|
||||
static struct kmem_cache *g_reverse_page_map_cache __read_mostly;
|
||||
|
||||
NV_STATUS uvm_pmm_sysmem_init(void)
|
||||
{
|
||||
g_reverse_page_map_cache = NV_KMEM_CACHE_CREATE("uvm_pmm_sysmem_page_reverse_map_t",
|
||||
uvm_reverse_map_t);
|
||||
if (!g_reverse_page_map_cache)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// Ensure that only supported CPU chunk sizes are enabled.
|
||||
uvm_cpu_chunk_allocation_sizes &= UVM_CPU_CHUNK_SIZES;
|
||||
if (!uvm_cpu_chunk_allocation_sizes || !(uvm_cpu_chunk_allocation_sizes & PAGE_SIZE)) {
|
||||
pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
|
||||
uvm_cpu_chunk_allocation_sizes,
|
||||
UVM_CPU_CHUNK_SIZES);
|
||||
UVM_INFO_PRINT("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
|
||||
uvm_cpu_chunk_allocation_sizes,
|
||||
UVM_CPU_CHUNK_SIZES);
|
||||
uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
|
||||
}
|
||||
|
||||
@ -54,387 +47,11 @@ NV_STATUS uvm_pmm_sysmem_init(void)
|
||||
|
||||
void uvm_pmm_sysmem_exit(void)
|
||||
{
|
||||
kmem_cache_destroy_safe(&g_reverse_page_map_cache);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings)
|
||||
{
|
||||
memset(sysmem_mappings, 0, sizeof(*sysmem_mappings));
|
||||
|
||||
sysmem_mappings->gpu = gpu;
|
||||
|
||||
uvm_mutex_init(&sysmem_mappings->reverse_map_lock, UVM_LOCK_ORDER_LEAF);
|
||||
uvm_init_radix_tree_preloadable(&sysmem_mappings->reverse_map_tree);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings)
|
||||
{
|
||||
if (sysmem_mappings->gpu) {
|
||||
UVM_ASSERT_MSG(radix_tree_empty(&sysmem_mappings->reverse_map_tree),
|
||||
"radix_tree not empty for GPU %s\n",
|
||||
uvm_gpu_name(sysmem_mappings->gpu));
|
||||
}
|
||||
|
||||
sysmem_mappings->gpu = NULL;
|
||||
}
|
||||
|
||||
// TODO: Bug 1995015: use a more efficient data structure for
|
||||
// physically-contiguous allocations.
|
||||
NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
NvU64 virt_addr,
|
||||
NvU64 region_size,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t owner)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_reverse_map_t *new_reverse_map;
|
||||
NvU64 key;
|
||||
const NvU64 base_key = dma_addr / PAGE_SIZE;
|
||||
const NvU32 num_pages = region_size / PAGE_SIZE;
|
||||
uvm_page_index_t page_index;
|
||||
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(!uvm_va_block_is_dead(va_block));
|
||||
UVM_ASSERT(IS_ALIGNED(dma_addr, region_size));
|
||||
UVM_ASSERT(IS_ALIGNED(virt_addr, region_size));
|
||||
UVM_ASSERT(region_size <= UVM_VA_BLOCK_SIZE);
|
||||
UVM_ASSERT(is_power_of_2(region_size));
|
||||
UVM_ASSERT(uvm_va_block_contains_address(va_block, virt_addr));
|
||||
UVM_ASSERT(uvm_va_block_contains_address(va_block, virt_addr + region_size - 1));
|
||||
uvm_assert_mutex_locked(&va_block->lock);
|
||||
|
||||
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
|
||||
return NV_OK;
|
||||
|
||||
new_reverse_map = nv_kmem_cache_zalloc(g_reverse_page_map_cache, NV_UVM_GFP_FLAGS);
|
||||
if (!new_reverse_map)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
page_index = uvm_va_block_cpu_page_index(va_block, virt_addr);
|
||||
|
||||
new_reverse_map->va_block = va_block;
|
||||
new_reverse_map->region = uvm_va_block_region(page_index, page_index + num_pages);
|
||||
new_reverse_map->owner = owner;
|
||||
|
||||
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
|
||||
for (key = base_key; key < base_key + num_pages; ++key) {
|
||||
int ret = radix_tree_insert(&sysmem_mappings->reverse_map_tree, key, new_reverse_map);
|
||||
if (ret != 0) {
|
||||
NvU64 remove_key;
|
||||
|
||||
for (remove_key = base_key; remove_key < key; ++remove_key)
|
||||
(void)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
|
||||
|
||||
kmem_cache_free(g_reverse_page_map_cache, new_reverse_map);
|
||||
status = errno_to_nv_status(ret);
|
||||
break;
|
||||
}
|
||||
}
|
||||
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
// The assert is added for Coverity's sake. It is equivalent to adding
|
||||
// assert(num_pages > 0) before the loop. However, Coverity is not able to
|
||||
// deduce that the loop has to execute at least once from num_pages > 0.
|
||||
UVM_ASSERT(key != base_key || status != NV_OK);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static void pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
bool check_mapping)
|
||||
{
|
||||
uvm_reverse_map_t *reverse_map;
|
||||
NvU64 key;
|
||||
const NvU64 base_key = dma_addr / PAGE_SIZE;
|
||||
|
||||
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
|
||||
return;
|
||||
|
||||
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
reverse_map = radix_tree_delete(&sysmem_mappings->reverse_map_tree, base_key);
|
||||
if (check_mapping)
|
||||
UVM_ASSERT(reverse_map);
|
||||
|
||||
if (!reverse_map) {
|
||||
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
uvm_assert_mutex_locked(&reverse_map->va_block->lock);
|
||||
|
||||
for (key = base_key + 1; key < base_key + uvm_va_block_region_num_pages(reverse_map->region); ++key) {
|
||||
uvm_reverse_map_t *curr_reverse_map = radix_tree_delete(&sysmem_mappings->reverse_map_tree, key);
|
||||
UVM_ASSERT(curr_reverse_map == reverse_map);
|
||||
}
|
||||
|
||||
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
kmem_cache_free(g_reverse_page_map_cache, reverse_map);
|
||||
}
|
||||
|
||||
void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr)
|
||||
{
|
||||
pmm_sysmem_mappings_remove_gpu_mapping(sysmem_mappings, dma_addr, true);
|
||||
}
|
||||
|
||||
void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr)
|
||||
{
|
||||
pmm_sysmem_mappings_remove_gpu_mapping(sysmem_mappings, dma_addr, false);
|
||||
}
|
||||
|
||||
void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
uvm_va_block_t *va_block)
|
||||
{
|
||||
NvU64 virt_addr;
|
||||
uvm_reverse_map_t *reverse_map;
|
||||
const NvU64 base_key = dma_addr / PAGE_SIZE;
|
||||
uvm_page_index_t new_start_page;
|
||||
|
||||
UVM_ASSERT(PAGE_ALIGNED(dma_addr));
|
||||
UVM_ASSERT(va_block);
|
||||
UVM_ASSERT(!uvm_va_block_is_dead(va_block));
|
||||
|
||||
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
|
||||
return;
|
||||
|
||||
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
|
||||
UVM_ASSERT(reverse_map);
|
||||
|
||||
// Compute virt address by hand since the old VA block may be messed up
|
||||
// during split
|
||||
virt_addr = reverse_map->va_block->start + reverse_map->region.first * PAGE_SIZE;
|
||||
new_start_page = uvm_va_block_cpu_page_index(va_block, virt_addr);
|
||||
|
||||
reverse_map->region = uvm_va_block_region(new_start_page,
|
||||
new_start_page + uvm_va_block_region_num_pages(reverse_map->region));
|
||||
reverse_map->va_block = va_block;
|
||||
|
||||
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_start(reverse_map)));
|
||||
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_end(reverse_map)));
|
||||
|
||||
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
|
||||
}
|
||||
|
||||
NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
NvU64 new_region_size)
|
||||
{
|
||||
uvm_reverse_map_t *orig_reverse_map;
|
||||
const NvU64 base_key = dma_addr / PAGE_SIZE;
|
||||
const size_t num_pages = new_region_size / PAGE_SIZE;
|
||||
size_t old_num_pages;
|
||||
size_t subregion, num_subregions;
|
||||
uvm_reverse_map_t **new_reverse_maps;
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED(dma_addr, new_region_size));
|
||||
UVM_ASSERT(new_region_size <= UVM_VA_BLOCK_SIZE);
|
||||
UVM_ASSERT(is_power_of_2(new_region_size));
|
||||
|
||||
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
|
||||
return NV_OK;
|
||||
|
||||
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
|
||||
orig_reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
|
||||
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
// We can access orig_reverse_map outside the tree lock because we hold the
|
||||
// VA block lock so we cannot have concurrent modifications in the tree for
|
||||
// the mappings of the chunks that belong to that VA block.
|
||||
UVM_ASSERT(orig_reverse_map);
|
||||
UVM_ASSERT(orig_reverse_map->va_block);
|
||||
uvm_assert_mutex_locked(&orig_reverse_map->va_block->lock);
|
||||
old_num_pages = uvm_va_block_region_num_pages(orig_reverse_map->region);
|
||||
UVM_ASSERT(num_pages < old_num_pages);
|
||||
|
||||
num_subregions = old_num_pages / num_pages;
|
||||
|
||||
new_reverse_maps = uvm_kvmalloc_zero(sizeof(*new_reverse_maps) * (num_subregions - 1));
|
||||
if (!new_reverse_maps)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// Allocate the descriptors for the new subregions
|
||||
for (subregion = 1; subregion < num_subregions; ++subregion) {
|
||||
uvm_reverse_map_t *new_reverse_map = nv_kmem_cache_zalloc(g_reverse_page_map_cache, NV_UVM_GFP_FLAGS);
|
||||
uvm_page_index_t page_index = orig_reverse_map->region.first + num_pages * subregion;
|
||||
|
||||
if (new_reverse_map == NULL) {
|
||||
// On error, free the previously-created descriptors
|
||||
while (--subregion != 0)
|
||||
kmem_cache_free(g_reverse_page_map_cache, new_reverse_maps[subregion - 1]);
|
||||
|
||||
uvm_kvfree(new_reverse_maps);
|
||||
return NV_ERR_NO_MEMORY;
|
||||
}
|
||||
|
||||
new_reverse_map->va_block = orig_reverse_map->va_block;
|
||||
new_reverse_map->region = uvm_va_block_region(page_index, page_index + num_pages);
|
||||
new_reverse_map->owner = orig_reverse_map->owner;
|
||||
|
||||
new_reverse_maps[subregion - 1] = new_reverse_map;
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
for (subregion = 1; subregion < num_subregions; ++subregion) {
|
||||
NvU64 key;
|
||||
|
||||
for (key = base_key + num_pages * subregion; key < base_key + num_pages * (subregion + 1); ++key) {
|
||||
void **slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key);
|
||||
UVM_ASSERT(slot);
|
||||
UVM_ASSERT(radix_tree_deref_slot(slot) == orig_reverse_map);
|
||||
|
||||
NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, new_reverse_maps[subregion - 1]);
|
||||
}
|
||||
}
|
||||
|
||||
orig_reverse_map->region = uvm_va_block_region(orig_reverse_map->region.first,
|
||||
orig_reverse_map->region.first + num_pages);
|
||||
|
||||
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
uvm_kvfree(new_reverse_maps);
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
NvU64 new_region_size)
|
||||
{
|
||||
uvm_reverse_map_t *first_reverse_map;
|
||||
uvm_page_index_t running_page_index;
|
||||
NvU64 key;
|
||||
const NvU64 base_key = dma_addr / PAGE_SIZE;
|
||||
const size_t num_pages = new_region_size / PAGE_SIZE;
|
||||
size_t num_mapping_pages;
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED(dma_addr, new_region_size));
|
||||
UVM_ASSERT(new_region_size <= UVM_VA_BLOCK_SIZE);
|
||||
UVM_ASSERT(is_power_of_2(new_region_size));
|
||||
|
||||
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
|
||||
return;
|
||||
|
||||
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
// Find the first mapping in the region
|
||||
first_reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
|
||||
UVM_ASSERT(first_reverse_map);
|
||||
num_mapping_pages = uvm_va_block_region_num_pages(first_reverse_map->region);
|
||||
UVM_ASSERT(num_pages >= num_mapping_pages);
|
||||
UVM_ASSERT(IS_ALIGNED(base_key, num_mapping_pages));
|
||||
|
||||
// The region in the tree matches the size of the merged region, just return
|
||||
if (num_pages == num_mapping_pages)
|
||||
goto unlock_no_update;
|
||||
|
||||
// Otherwise update the rest of slots to point at the same reverse map
|
||||
// descriptor
|
||||
key = base_key + uvm_va_block_region_num_pages(first_reverse_map->region);
|
||||
running_page_index = first_reverse_map->region.outer;
|
||||
while (key < base_key + num_pages) {
|
||||
uvm_reverse_map_t *reverse_map = NULL;
|
||||
void **slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key);
|
||||
size_t slot_index;
|
||||
UVM_ASSERT(slot);
|
||||
|
||||
reverse_map = radix_tree_deref_slot(slot);
|
||||
UVM_ASSERT(reverse_map);
|
||||
UVM_ASSERT(reverse_map != first_reverse_map);
|
||||
UVM_ASSERT(reverse_map->va_block == first_reverse_map->va_block);
|
||||
UVM_ASSERT(uvm_id_equal(reverse_map->owner, first_reverse_map->owner));
|
||||
UVM_ASSERT(reverse_map->region.first == running_page_index);
|
||||
|
||||
NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, first_reverse_map);
|
||||
|
||||
num_mapping_pages = uvm_va_block_region_num_pages(reverse_map->region);
|
||||
UVM_ASSERT(IS_ALIGNED(key, num_mapping_pages));
|
||||
UVM_ASSERT(key + num_mapping_pages <= base_key + num_pages);
|
||||
|
||||
for (slot_index = 1; slot_index < num_mapping_pages; ++slot_index) {
|
||||
slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key + slot_index);
|
||||
UVM_ASSERT(slot);
|
||||
UVM_ASSERT(reverse_map == radix_tree_deref_slot(slot));
|
||||
|
||||
NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, first_reverse_map);
|
||||
}
|
||||
|
||||
key += num_mapping_pages;
|
||||
running_page_index = reverse_map->region.outer;
|
||||
|
||||
kmem_cache_free(g_reverse_page_map_cache, reverse_map);
|
||||
}
|
||||
|
||||
// Grow the first mapping to cover the whole region
|
||||
first_reverse_map->region.outer = first_reverse_map->region.first + num_pages;
|
||||
|
||||
unlock_no_update:
|
||||
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
|
||||
}
|
||||
|
||||
size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
NvU64 region_size,
|
||||
uvm_reverse_map_t *out_mappings,
|
||||
size_t max_out_mappings)
|
||||
{
|
||||
NvU64 key;
|
||||
size_t num_mappings = 0;
|
||||
const NvU64 base_key = dma_addr / PAGE_SIZE;
|
||||
NvU32 num_pages = region_size / PAGE_SIZE;
|
||||
|
||||
UVM_ASSERT(region_size >= PAGE_SIZE);
|
||||
UVM_ASSERT(PAGE_ALIGNED(region_size));
|
||||
UVM_ASSERT(sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses);
|
||||
UVM_ASSERT(max_out_mappings > 0);
|
||||
|
||||
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
key = base_key;
|
||||
do {
|
||||
uvm_reverse_map_t *reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, key);
|
||||
|
||||
if (reverse_map) {
|
||||
size_t num_chunk_pages = uvm_va_block_region_num_pages(reverse_map->region);
|
||||
NvU32 page_offset = key & (num_chunk_pages - 1);
|
||||
NvU32 num_mapping_pages = min(num_pages, (NvU32)num_chunk_pages - page_offset);
|
||||
|
||||
// Sysmem mappings are removed during VA block destruction.
|
||||
// Therefore, we can safely retain the VA blocks as long as they
|
||||
// are in the reverse map and we hold the reverse map lock.
|
||||
uvm_va_block_retain(reverse_map->va_block);
|
||||
out_mappings[num_mappings] = *reverse_map;
|
||||
out_mappings[num_mappings].region.first += page_offset;
|
||||
out_mappings[num_mappings].region.outer = out_mappings[num_mappings].region.first + num_mapping_pages;
|
||||
|
||||
if (++num_mappings == max_out_mappings)
|
||||
break;
|
||||
|
||||
num_pages -= num_mapping_pages;
|
||||
key += num_mapping_pages;
|
||||
}
|
||||
else {
|
||||
--num_pages;
|
||||
++key;
|
||||
}
|
||||
}
|
||||
while (num_pages > 0);
|
||||
|
||||
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
|
||||
|
||||
return num_mappings;
|
||||
}
|
||||
|
||||
uvm_chunk_sizes_mask_t uvm_cpu_chunk_get_allocation_sizes(void)
|
||||
{
|
||||
return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
|
||||
return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
|
||||
}
|
||||
|
||||
static void uvm_cpu_chunk_set_size(uvm_cpu_chunk_t *chunk, uvm_chunk_size_t size)
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -30,96 +30,12 @@
|
||||
#include "uvm_lock.h"
|
||||
#include "uvm_pmm_gpu.h"
|
||||
|
||||
// Module to handle per-GPU user mappings to sysmem physical memory. Notably,
|
||||
// this implements a reverse map of the DMA address to {va_block, virt_addr}.
|
||||
// This is required by the GPU access counters feature since they may provide a
|
||||
// physical address in the notification packet (GPA notifications). We use the
|
||||
// table to obtain the VAs of the memory regions being accessed remotely. The
|
||||
// reverse map is implemented by a radix tree, which is indexed using the
|
||||
// DMA address. For now, only PAGE_SIZE translations are supported (i.e. no
|
||||
// big/huge pages).
|
||||
//
|
||||
// TODO: Bug 1995015: add support for physically-contiguous mappings.
|
||||
struct uvm_pmm_sysmem_mappings_struct
|
||||
{
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
struct radix_tree_root reverse_map_tree;
|
||||
|
||||
uvm_mutex_t reverse_map_lock;
|
||||
};
|
||||
|
||||
// Global initialization/exit functions, that need to be called during driver
|
||||
// initialization/tear-down. These are needed to allocate/free global internal
|
||||
// data structures.
|
||||
NV_STATUS uvm_pmm_sysmem_init(void);
|
||||
void uvm_pmm_sysmem_exit(void);
|
||||
|
||||
// Initialize per-GPU sysmem mapping tracking
|
||||
NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings);
|
||||
|
||||
// Destroy per-GPU sysmem mapping tracking. The caller must ensure that all the
|
||||
// mappings have been removed before calling this function.
|
||||
void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings);
|
||||
|
||||
// If the GPU used to initialize sysmem_mappings supports access counters, the
|
||||
// dma_addr -> {va_block, virt_addr} mapping is inserted in the reverse map.
|
||||
NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
NvU64 virt_addr,
|
||||
NvU64 region_size,
|
||||
uvm_va_block_t *va_block,
|
||||
uvm_processor_id_t owner);
|
||||
|
||||
// If the GPU used to initialize sysmem_mappings supports access counters, the
|
||||
// entries for the physical region starting at dma_addr are removed from the
|
||||
// reverse map.
|
||||
void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
|
||||
|
||||
// Like uvm_pmm_sysmem_mappings_remove_gpu_mapping but it doesn't assert if the
|
||||
// mapping doesn't exist. See uvm_va_block_evict_chunks for more information.
|
||||
void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
|
||||
|
||||
// If the GPU used to initialize sysmem_mappings supports access counters, the
|
||||
// mapping for the region starting at dma_addr is updated with va_block.
|
||||
// This is required on VA block split.
|
||||
void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
uvm_va_block_t *va_block);
|
||||
|
||||
// If the GPU used to initialize sysmem_mappings supports access counters, the
|
||||
// mapping for the region starting at dma_addr is split into regions of
|
||||
// new_region_size. new_region_size must be a power of two and smaller than the
|
||||
// previously-registered size.
|
||||
NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
NvU64 new_region_size);
|
||||
|
||||
// If the GPU used to initialize sysmem_mappings supports access counters, all
|
||||
// the mappings within the region [dma_addr, dma_addr + new_region_size) are
|
||||
// merged into a single mapping. new_region_size must be a power of two. The
|
||||
// whole region must be previously populated with mappings and all of them must
|
||||
// have the same VA block and processor owner.
|
||||
void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
NvU64 new_region_size);
|
||||
|
||||
// Obtain the {va_block, virt_addr} information for the mappings in the given
|
||||
// [dma_addr:dma_addr + region_size) range. dma_addr and region_size must be
|
||||
// page-aligned.
|
||||
//
|
||||
// Valid translations are written to out_mappings sequentially (there are no
|
||||
// gaps). max_out_mappings are written, at most. The caller is required to
|
||||
// provide enough entries in out_mappings.
|
||||
//
|
||||
// The VA Block in each returned translation entry is retained, and it's up to
|
||||
// the caller to release them
|
||||
size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
|
||||
NvU64 dma_addr,
|
||||
NvU64 region_size,
|
||||
uvm_reverse_map_t *out_mappings,
|
||||
size_t max_out_mappings);
|
||||
|
||||
#define UVM_CPU_CHUNK_SIZES (UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | PAGE_SIZE)
|
||||
|
||||
typedef enum
|
||||
@ -425,9 +341,9 @@ void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
|
||||
bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
|
||||
|
||||
static NV_STATUS uvm_test_get_cpu_chunk_allocation_sizes(UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES_PARAMS *params,
|
||||
struct file *filp)
|
||||
struct file *filp)
|
||||
{
|
||||
params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
|
||||
return NV_OK;
|
||||
params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
|
||||
return NV_OK;
|
||||
}
|
||||
#endif
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -35,544 +35,6 @@
|
||||
#include "uvm_push.h"
|
||||
#include "uvm_processors.h"
|
||||
|
||||
// Pre-allocated array used for dma-to-virt translations
|
||||
static uvm_reverse_map_t g_sysmem_translations[PAGES_PER_UVM_VA_BLOCK];
|
||||
|
||||
// We use our own separate reverse map to easily specify contiguous DMA
|
||||
// address ranges
|
||||
static uvm_pmm_sysmem_mappings_t g_reverse_map;
|
||||
|
||||
// Check that the DMA addresses in the range defined by
|
||||
// [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
|
||||
// are registered in the reverse map, using one call per entry. The returned
|
||||
// virtual addresses must belong to va_block. The function assumes a 1:1
|
||||
// dma-to-virt mapping for the whole VA block
|
||||
static NV_STATUS check_reverse_map_block_page(uvm_va_block_t *va_block,
|
||||
NvU64 base_dma_addr,
|
||||
const uvm_page_mask_t *page_mask)
|
||||
{
|
||||
uvm_page_index_t page_index;
|
||||
|
||||
for_each_va_block_page(page_index, va_block) {
|
||||
size_t num_pages;
|
||||
|
||||
memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
|
||||
num_pages = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
|
||||
base_dma_addr + page_index * PAGE_SIZE,
|
||||
PAGE_SIZE,
|
||||
g_sysmem_translations,
|
||||
PAGES_PER_UVM_VA_BLOCK);
|
||||
if (!page_mask || uvm_page_mask_test(page_mask, page_index)) {
|
||||
TEST_CHECK_RET(num_pages == 1);
|
||||
TEST_CHECK_RET(g_sysmem_translations[0].va_block == va_block);
|
||||
TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
|
||||
TEST_CHECK_RET(uvm_reverse_map_start(&g_sysmem_translations[0]) == uvm_va_block_cpu_page_address(va_block, page_index));
|
||||
TEST_CHECK_RET(uvm_va_block_region_num_pages(g_sysmem_translations[0].region) == 1);
|
||||
TEST_CHECK_RET(UVM_ID_IS_CPU(g_sysmem_translations[0].owner));
|
||||
uvm_va_block_release(g_sysmem_translations[0].va_block);
|
||||
}
|
||||
else {
|
||||
TEST_CHECK_RET(num_pages == 0);
|
||||
}
|
||||
}
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Check that the DMA addresses in the range defined by
|
||||
// [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
|
||||
// are registered in the reverse map, using a single translation call. The
|
||||
// returned virtual addresses must belong to va_block. The function assumes a
|
||||
// 1:1 dma-to-virt mapping for the whole VA block
|
||||
static NV_STATUS check_reverse_map_block_batch(uvm_va_block_t *va_block,
|
||||
NvU64 base_dma_addr,
|
||||
const uvm_page_mask_t *page_mask)
|
||||
{
|
||||
size_t num_translations;
|
||||
size_t num_pages;
|
||||
size_t reverse_map_index;
|
||||
|
||||
memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
|
||||
num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
|
||||
base_dma_addr,
|
||||
uvm_va_block_size(va_block),
|
||||
g_sysmem_translations,
|
||||
PAGES_PER_UVM_VA_BLOCK);
|
||||
if (num_translations == 0 && page_mask)
|
||||
TEST_CHECK_RET(uvm_page_mask_empty(page_mask));
|
||||
|
||||
num_pages = 0;
|
||||
for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
|
||||
uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
|
||||
size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
|
||||
|
||||
num_pages += num_reverse_map_pages;
|
||||
|
||||
TEST_CHECK_RET(reverse_map->va_block == va_block);
|
||||
TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
|
||||
uvm_va_block_release(reverse_map->va_block);
|
||||
TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
|
||||
}
|
||||
|
||||
if (page_mask)
|
||||
TEST_CHECK_RET(num_pages == uvm_page_mask_weight(page_mask));
|
||||
else
|
||||
TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
// Check that the DMA addresses for all the CPU pages of the two given VA blocks
|
||||
// are registered in the reverse map, using a single translation call. The
|
||||
// returned virtual addresses must belong to one of the blocks. The function
|
||||
// assumes a 1:1 dma-to-virt mapping for each VA block and that va_block1 is
|
||||
// mapped behind va_block0.
|
||||
static NV_STATUS check_reverse_map_two_blocks_batch(NvU64 base_dma_addr,
|
||||
uvm_va_block_t *va_block0,
|
||||
uvm_va_block_t *va_block1)
|
||||
{
|
||||
size_t num_pages;
|
||||
size_t num_translations;
|
||||
size_t reverse_map_index;
|
||||
|
||||
memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
|
||||
num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
|
||||
base_dma_addr,
|
||||
UVM_VA_BLOCK_SIZE,
|
||||
g_sysmem_translations,
|
||||
PAGES_PER_UVM_VA_BLOCK);
|
||||
TEST_CHECK_RET(num_translations == 2);
|
||||
|
||||
num_pages = 0;
|
||||
for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
|
||||
uvm_va_block_t *block;
|
||||
uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
|
||||
NvU64 virt_addr = uvm_reverse_map_start(reverse_map);
|
||||
size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
|
||||
|
||||
if (reverse_map_index == 0)
|
||||
block = va_block0;
|
||||
else
|
||||
block = va_block1;
|
||||
|
||||
TEST_CHECK_RET(reverse_map->va_block == block);
|
||||
TEST_CHECK_RET(nv_kref_read(&block->kref) >= 2);
|
||||
uvm_va_block_release(reverse_map->va_block);
|
||||
TEST_CHECK_RET(num_reverse_map_pages == uvm_va_block_num_cpu_pages(block));
|
||||
TEST_CHECK_RET(virt_addr == block->start);
|
||||
TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
|
||||
|
||||
num_pages += num_reverse_map_pages;
|
||||
}
|
||||
|
||||
TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block0) + uvm_va_block_num_cpu_pages(va_block1));
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static const NvU64 g_base_dma_addr = UVM_VA_BLOCK_SIZE;
|
||||
|
||||
// This function adds the mappings for all the subregions in va_block defined
|
||||
// by page_mask. g_base_dma_addr is used as the base DMA address for the whole
|
||||
// VA block.
|
||||
static NV_STATUS test_pmm_sysmem_reverse_map_single(uvm_va_block_t *va_block,
|
||||
uvm_page_mask_t *page_mask,
|
||||
uvm_chunk_size_t split_size,
|
||||
bool merge)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_region_t subregion;
|
||||
|
||||
TEST_CHECK_RET(is_power_of_2(split_size));
|
||||
TEST_CHECK_RET(split_size >= PAGE_SIZE);
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
|
||||
TEST_CHECK_RET(is_power_of_2(uvm_va_block_region_size(subregion)));
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
|
||||
g_base_dma_addr + subregion.first * PAGE_SIZE,
|
||||
va_block->start + subregion.first * PAGE_SIZE,
|
||||
uvm_va_block_region_size(subregion),
|
||||
va_block,
|
||||
UVM_ID_CPU);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
}
|
||||
|
||||
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
|
||||
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
|
||||
|
||||
if (split_size != UVM_CHUNK_SIZE_MAX) {
|
||||
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
|
||||
TEST_CHECK_RET(uvm_va_block_region_size(subregion) > split_size);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&g_reverse_map,
|
||||
g_base_dma_addr + subregion.first * PAGE_SIZE,
|
||||
split_size);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
}
|
||||
|
||||
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
|
||||
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
|
||||
}
|
||||
|
||||
if (split_size != UVM_CHUNK_SIZE_MAX && merge) {
|
||||
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
|
||||
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
|
||||
g_base_dma_addr + subregion.first * PAGE_SIZE,
|
||||
uvm_va_block_region_size(subregion));
|
||||
}
|
||||
|
||||
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
|
||||
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
|
||||
}
|
||||
|
||||
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
|
||||
NvU64 subregion_dma_addr = g_base_dma_addr + subregion.first * PAGE_SIZE;
|
||||
|
||||
if (split_size == UVM_CHUNK_SIZE_MAX || merge) {
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
}
|
||||
else {
|
||||
size_t chunk;
|
||||
size_t num_chunks = uvm_va_block_region_size(subregion) / split_size;
|
||||
TEST_CHECK_RET(num_chunks > 1);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
|
||||
for (chunk = 0; chunk < num_chunks; ++chunk)
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr + chunk * split_size);
|
||||
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_page_mask_zero(page_mask);
|
||||
|
||||
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
|
||||
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static uvm_page_mask_t g_page_mask;
|
||||
|
||||
static NV_STATUS test_pmm_sysmem_reverse_map_single_whole(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_block_t *va_block;
|
||||
const bool merge_array[] = {false, true};
|
||||
const uvm_chunk_size_t chunk_split_array[] = { UVM_CHUNK_SIZE_4K, UVM_CHUNK_SIZE_64K, UVM_CHUNK_SIZE_MAX };
|
||||
unsigned merge_index;
|
||||
unsigned chunk_split_index;
|
||||
|
||||
status = uvm_va_block_find(va_space, addr, &va_block);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
|
||||
|
||||
for (merge_index = 0; merge_index < ARRAY_SIZE(merge_array); ++merge_index) {
|
||||
for (chunk_split_index = 0; chunk_split_index < ARRAY_SIZE(chunk_split_array); ++chunk_split_index) {
|
||||
// The reverse map has PAGE_SIZE granularity
|
||||
if (chunk_split_array[chunk_split_index] < PAGE_SIZE)
|
||||
continue;
|
||||
|
||||
uvm_page_mask_region_fill(&g_page_mask, uvm_va_block_region_from_block(va_block));
|
||||
|
||||
TEST_CHECK_RET(test_pmm_sysmem_reverse_map_single(va_block,
|
||||
&g_page_mask,
|
||||
chunk_split_array[chunk_split_index],
|
||||
merge_array[merge_index]) == NV_OK);
|
||||
}
|
||||
}
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_pmm_sysmem_reverse_map_single_pattern(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_block_t *va_block;
|
||||
uvm_page_index_t page_index;
|
||||
|
||||
status = uvm_va_block_find(va_space, addr, &va_block);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
uvm_page_mask_zero(&g_page_mask);
|
||||
|
||||
for_each_va_block_page(page_index, va_block) {
|
||||
if (page_index % 2 == 0)
|
||||
uvm_page_mask_set(&g_page_mask, page_index);
|
||||
}
|
||||
|
||||
return test_pmm_sysmem_reverse_map_single(va_block, &g_page_mask, UVM_CHUNK_SIZE_MAX, false);
|
||||
}
|
||||
|
||||
// This function assumes that addr points at a VA range with 4 sized VA blocks
|
||||
// with size UVM_VA_BLOCK_SIZE / 4.
|
||||
static NV_STATUS test_pmm_sysmem_reverse_map_many_blocks(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_block_t *va_block0;
|
||||
uvm_va_block_t *va_block1;
|
||||
NvU64 base_dma_addr0;
|
||||
NvU64 base_dma_addr1;
|
||||
|
||||
status = uvm_va_block_find(va_space, addr + UVM_VA_BLOCK_SIZE / 4, &va_block0);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
status = uvm_va_block_find(va_space, addr + 3 * UVM_VA_BLOCK_SIZE / 4, &va_block1);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
TEST_CHECK_RET(va_block0 != va_block1);
|
||||
|
||||
base_dma_addr0 = g_base_dma_addr + uvm_va_block_size(va_block0);
|
||||
base_dma_addr1 = base_dma_addr0 + uvm_va_block_size(va_block0);
|
||||
|
||||
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block0)));
|
||||
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block1)));
|
||||
|
||||
uvm_mutex_lock(&va_block0->lock);
|
||||
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
|
||||
base_dma_addr0,
|
||||
va_block0->start,
|
||||
uvm_va_block_size(va_block0),
|
||||
va_block0,
|
||||
UVM_ID_CPU);
|
||||
uvm_mutex_unlock(&va_block0->lock);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
uvm_mutex_lock(&va_block1->lock);
|
||||
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
|
||||
base_dma_addr1,
|
||||
va_block1->start,
|
||||
uvm_va_block_size(va_block1),
|
||||
va_block1,
|
||||
UVM_ID_CPU);
|
||||
uvm_mutex_unlock(&va_block1->lock);
|
||||
|
||||
// Check each VA block individually
|
||||
if (status == NV_OK) {
|
||||
TEST_CHECK_GOTO(check_reverse_map_block_page(va_block0, base_dma_addr0, NULL) == NV_OK, error);
|
||||
TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block0, base_dma_addr0, NULL) == NV_OK, error);
|
||||
TEST_CHECK_GOTO(check_reverse_map_block_page(va_block1, base_dma_addr1, NULL) == NV_OK, error);
|
||||
TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block1, base_dma_addr1, NULL) == NV_OK, error);
|
||||
|
||||
// Check both VA blocks at the same time
|
||||
TEST_CHECK_GOTO(check_reverse_map_two_blocks_batch(g_base_dma_addr, va_block0, va_block1) == NV_OK, error);
|
||||
|
||||
error:
|
||||
uvm_mutex_lock(&va_block1->lock);
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr1);
|
||||
uvm_mutex_unlock(&va_block1->lock);
|
||||
}
|
||||
|
||||
uvm_mutex_lock(&va_block0->lock);
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr0);
|
||||
uvm_mutex_unlock(&va_block0->lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
// This function registers a non-uniform distribution of chunks (mixing 4K and 64K chunks)
|
||||
// and merges them back to verify that the logic is working.
|
||||
static NV_STATUS test_pmm_sysmem_reverse_map_merge(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_va_block_t *va_block;
|
||||
const unsigned chunks_64k_pos[] =
|
||||
{
|
||||
16,
|
||||
64,
|
||||
96,
|
||||
192,
|
||||
208,
|
||||
224,
|
||||
288,
|
||||
320,
|
||||
384,
|
||||
480
|
||||
};
|
||||
uvm_page_index_t page_index;
|
||||
unsigned i;
|
||||
|
||||
if (PAGE_SIZE != UVM_PAGE_SIZE_4K)
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_va_block_find(va_space, addr, &va_block);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
TEST_CHECK_RET(uvm_va_block_size(va_block) == UVM_VA_BLOCK_SIZE);
|
||||
|
||||
page_index = 0;
|
||||
for (i = 0; i < ARRAY_SIZE(chunks_64k_pos); ++i) {
|
||||
// Fill with 4K mappings until the next 64K mapping
|
||||
while (page_index < chunks_64k_pos[i]) {
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
|
||||
g_base_dma_addr + page_index * PAGE_SIZE,
|
||||
uvm_va_block_cpu_page_address(va_block, page_index),
|
||||
PAGE_SIZE,
|
||||
va_block,
|
||||
UVM_ID_CPU);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
++page_index;
|
||||
}
|
||||
|
||||
// Register the 64K mapping
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
|
||||
g_base_dma_addr + page_index * PAGE_SIZE,
|
||||
uvm_va_block_cpu_page_address(va_block, page_index),
|
||||
UVM_CHUNK_SIZE_64K,
|
||||
va_block,
|
||||
UVM_ID_CPU);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
page_index += UVM_PAGE_SIZE_64K / PAGE_SIZE;
|
||||
}
|
||||
|
||||
// Fill the tail with 4K mappings, too
|
||||
while (page_index < PAGES_PER_UVM_VA_BLOCK) {
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
|
||||
g_base_dma_addr + page_index * PAGE_SIZE,
|
||||
uvm_va_block_cpu_page_address(va_block, page_index),
|
||||
PAGE_SIZE,
|
||||
va_block,
|
||||
UVM_ID_CPU);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
++page_index;
|
||||
}
|
||||
|
||||
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
|
||||
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
|
||||
g_base_dma_addr,
|
||||
uvm_va_block_size(va_block));
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
|
||||
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS test_pmm_sysmem_reverse_map_remove_on_eviction(uvm_va_space_t *va_space, NvU64 addr)
|
||||
{
|
||||
uvm_va_block_t *va_block;
|
||||
NV_STATUS status = uvm_va_block_find(va_space, addr, &va_block);
|
||||
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
|
||||
g_base_dma_addr,
|
||||
addr,
|
||||
uvm_va_block_size(va_block),
|
||||
va_block,
|
||||
UVM_ID_CPU);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
uvm_mutex_lock(&va_block->lock);
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
|
||||
uvm_mutex_unlock(&va_block->lock);
|
||||
|
||||
TEST_CHECK_RET(status == NV_OK);
|
||||
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
static NV_STATUS test_pmm_sysmem_reverse_map(uvm_va_space_t *va_space, NvU64 addr1, NvU64 addr2)
|
||||
{
|
||||
NV_STATUS status = NV_OK;
|
||||
uvm_gpu_t *volta_gpu = NULL;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
// Find a GPU with support for access counters with physical address
|
||||
// notifications, since it is required to add or remove entries to the
|
||||
// reverse map.
|
||||
for_each_va_space_gpu(gpu, va_space) {
|
||||
if (gpu->parent->access_counters_can_use_physical_addresses) {
|
||||
// Initialize the reverse map.
|
||||
status = uvm_pmm_sysmem_mappings_init(gpu, &g_reverse_map);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
volta_gpu = gpu;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!volta_gpu)
|
||||
return NV_ERR_INVALID_DEVICE;
|
||||
|
||||
status = test_pmm_sysmem_reverse_map_single_whole(va_space, addr1);
|
||||
|
||||
if (status == NV_OK)
|
||||
status = test_pmm_sysmem_reverse_map_single_pattern(va_space, addr1);
|
||||
|
||||
if (status == NV_OK)
|
||||
status = test_pmm_sysmem_reverse_map_many_blocks(va_space, addr2);
|
||||
|
||||
if (status == NV_OK)
|
||||
status = test_pmm_sysmem_reverse_map_merge(va_space, addr1);
|
||||
|
||||
if (status == NV_OK)
|
||||
status = test_pmm_sysmem_reverse_map_remove_on_eviction(va_space, addr1);
|
||||
|
||||
uvm_pmm_sysmem_mappings_deinit(&g_reverse_map);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *filp)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_va_space_t *va_space;
|
||||
|
||||
va_space = uvm_va_space_get(filp);
|
||||
|
||||
// Take the global lock to void interferences from different instances of
|
||||
// the test, since we use a bunch of global variables
|
||||
uvm_mutex_lock(&g_uvm_global.global_lock);
|
||||
uvm_va_space_down_write(va_space);
|
||||
|
||||
status = test_pmm_sysmem_reverse_map(va_space, params->range_address1, params->range_address2);
|
||||
|
||||
uvm_va_space_up_write(va_space);
|
||||
uvm_mutex_unlock(&g_uvm_global.global_lock);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
static NV_STATUS cpu_chunk_map_on_cpu(uvm_cpu_chunk_t *chunk, void **cpu_addr)
|
||||
{
|
||||
struct page **pages;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -144,6 +144,9 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
|
||||
}
|
||||
|
||||
if (gpu->parent->replayable_faults_supported) {
|
||||
UVM_ASSERT(gpu->parent->isr.access_counters);
|
||||
UVM_ASSERT(gpu->parent->access_counter_buffer);
|
||||
|
||||
uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
|
||||
status = uvm_test_verify_bh_affinity(&gpu->parent->isr.replayable_faults,
|
||||
gpu->parent->closest_cpu_numa_node);
|
||||
@ -161,10 +164,11 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
|
||||
}
|
||||
|
||||
if (gpu->parent->access_counters_supported) {
|
||||
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
|
||||
status = uvm_test_verify_bh_affinity(&gpu->parent->isr.access_counters,
|
||||
// We only need to test one notification buffer, we pick index 0.
|
||||
uvm_access_counters_isr_lock(&gpu->parent->access_counter_buffer[0]);
|
||||
status = uvm_test_verify_bh_affinity(&gpu->parent->isr.access_counters[0],
|
||||
gpu->parent->closest_cpu_numa_node);
|
||||
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
|
||||
uvm_access_counters_isr_unlock(&gpu->parent->access_counter_buffer[0]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -311,7 +315,6 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DISABLE_NVLINK_PEER_ACCESS, uvm_test_disable_nvlink_peer_access);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_GET_PAGE_THRASHING_POLICY, uvm_test_get_page_thrashing_policy);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SET_PAGE_THRASHING_POLICY, uvm_test_set_page_thrashing_policy);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_SYSMEM, uvm_test_pmm_sysmem);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_REVERSE_MAP, uvm_test_pmm_reverse_map);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_SPACE_MM_RETAIN, uvm_test_va_space_mm_retain);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE, uvm_test_pmm_chunk_with_elevated_page);
|
||||
@ -350,6 +353,7 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_INJECT_TOOLS_EVENT_V2, uvm_test_inject_tools_event_v2);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SET_P2P_SUSPENDED, uvm_test_set_p2p_suspended);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_INJECT_NVLINK_ERROR, uvm_test_inject_nvlink_error);
|
||||
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_QUERY_ACCESS_COUNTERS, uvm_test_query_access_counters);
|
||||
}
|
||||
|
||||
return -EINVAL;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -28,8 +28,7 @@
|
||||
#include "uvm_common.h"
|
||||
#include "uvm_test_ioctl.h"
|
||||
|
||||
// Unlike UVM_INFO_PRINT, this prints on release builds
|
||||
#define UVM_TEST_PRINT(fmt, ...) UVM_PRINT_FUNC(pr_info, " " fmt, ##__VA_ARGS__)
|
||||
#define UVM_TEST_PRINT UVM_ERR_PRINT_ALWAYS
|
||||
|
||||
// WARNING: This macro will return out of the current scope
|
||||
#define TEST_CHECK_RET(cond) \
|
||||
@ -160,30 +159,35 @@ NV_STATUS uvm_test_range_group_tree(UVM_TEST_RANGE_GROUP_TREE_PARAMS *params, st
|
||||
NV_STATUS uvm_test_range_group_range_info(UVM_TEST_RANGE_GROUP_RANGE_INFO_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_range_group_range_count(UVM_TEST_RANGE_GROUP_RANGE_COUNT_PARAMS *params, struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_get_prefetch_faults_reenable_lapse(UVM_TEST_GET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_set_prefetch_faults_reenable_lapse(UVM_TEST_SET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_get_prefetch_faults_reenable_lapse(UVM_TEST_GET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params,
|
||||
struct file *filp);
|
||||
NV_STATUS uvm_test_set_prefetch_faults_reenable_lapse(UVM_TEST_SET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params,
|
||||
struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_check_channel_va_space(UVM_TEST_CHECK_CHANNEL_VA_SPACE_PARAMS *params, struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_pmm_reverse_map(UVM_TEST_PMM_REVERSE_MAP_PARAMS *params, struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_pmm_chunk_with_elevated_page(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_pmm_chunk_with_elevated_page(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE_PARAMS *params,
|
||||
struct file *filp);
|
||||
NV_STATUS uvm_test_va_space_inject_error(UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS *params, struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_get_gpu_time(UVM_TEST_GET_GPU_TIME_PARAMS *params, struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_pmm_release_free_root_chunks(UVM_TEST_PMM_RELEASE_FREE_ROOT_CHUNKS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_pmm_release_free_root_chunks(UVM_TEST_PMM_RELEASE_FREE_ROOT_CHUNKS_PARAMS *params,
|
||||
struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARAMS *params, struct file *filp);
|
||||
|
||||
NV_STATUS uvm_test_va_space_add_dummy_thread_contexts(UVM_TEST_VA_SPACE_ADD_DUMMY_THREAD_CONTEXTS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_va_space_remove_dummy_thread_contexts(UVM_TEST_VA_SPACE_REMOVE_DUMMY_THREAD_CONTEXTS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_va_space_add_dummy_thread_contexts(UVM_TEST_VA_SPACE_ADD_DUMMY_THREAD_CONTEXTS_PARAMS *params,
|
||||
struct file *filp);
|
||||
NV_STATUS uvm_test_va_space_remove_dummy_thread_contexts(UVM_TEST_VA_SPACE_REMOVE_DUMMY_THREAD_CONTEXTS_PARAMS *params,
|
||||
struct file *filp);
|
||||
NV_STATUS uvm_test_thread_context_sanity(UVM_TEST_THREAD_CONTEXT_SANITY_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_thread_context_perf(UVM_TEST_THREAD_CONTEXT_PERF_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params,
|
||||
struct file *filp);
|
||||
NV_STATUS uvm_test_rb_tree_directed(UVM_TEST_RB_TREE_DIRECTED_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_rb_tree_random(UVM_TEST_RB_TREE_RANDOM_PARAMS *params, struct file *filp);
|
||||
NV_STATUS uvm_test_sec2_sanity(UVM_TEST_SEC2_SANITY_PARAMS *params, struct file *filp);
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVidia Corporation
|
||||
Copyright (c) 2015-2025 NVidia Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -926,31 +926,38 @@ typedef struct
|
||||
|
||||
// Change configuration of access counters. This call will disable access
|
||||
// counters and reenable them using the new configuration. All previous
|
||||
// notifications will be lost
|
||||
// notifications will be lost.
|
||||
//
|
||||
// The reconfiguration affects all VA spaces that rely on the access
|
||||
// counters information for the same GPU. To avoid conflicting configurations,
|
||||
// only one VA space is allowed to reconfigure the GPU at a time.
|
||||
//
|
||||
// When the reconfiguration VA space is destroyed, the bottom-half control
|
||||
// settings are reset.
|
||||
//
|
||||
// Error returns:
|
||||
// NV_ERR_INVALID_STATE
|
||||
// - The GPU has already been reconfigured in a different VA space
|
||||
// - The GPU has already been reconfigured in a different VA space.
|
||||
#define UVM_TEST_RECONFIGURE_ACCESS_COUNTERS UVM_TEST_IOCTL_BASE(56)
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpu_uuid; // In
|
||||
|
||||
// Type UVM_ACCESS_COUNTER_GRANULARITY from nv_uvm_types.h
|
||||
NvU32 mimc_granularity; // In
|
||||
NvU32 momc_granularity; // In
|
||||
|
||||
// Type UVM_ACCESS_COUNTER_USE_LIMIT from nv_uvm_types.h
|
||||
NvU32 mimc_use_limit; // In
|
||||
NvU32 momc_use_limit; // In
|
||||
NvU32 granularity; // In
|
||||
|
||||
NvU32 threshold; // In
|
||||
NvBool enable_mimc_migrations; // In
|
||||
NvBool enable_momc_migrations; // In
|
||||
NvBool enable_migrations; // In
|
||||
|
||||
// Settings to control how notifications are serviced by the access counters
|
||||
// bottom-half. These settings help tests to exercise races in the driver,
|
||||
// e.g., unregister a GPU while (valid) pending notifications remain in the
|
||||
// notification buffer.
|
||||
//
|
||||
// 0 max_batch_size doesn't change driver's behavior.
|
||||
NvU32 max_batch_size; // In
|
||||
NvBool one_iteration_per_batch; // In
|
||||
NvU32 sleep_per_iteration_us; // In
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS;
|
||||
@ -962,13 +969,6 @@ typedef enum
|
||||
UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX
|
||||
} UVM_TEST_ACCESS_COUNTER_RESET_MODE;
|
||||
|
||||
typedef enum
|
||||
{
|
||||
UVM_TEST_ACCESS_COUNTER_TYPE_MIMC = 0,
|
||||
UVM_TEST_ACCESS_COUNTER_TYPE_MOMC,
|
||||
UVM_TEST_ACCESS_COUNTER_TYPE_MAX
|
||||
} UVM_TEST_ACCESS_COUNTER_TYPE;
|
||||
|
||||
// Clear the contents of the access counters. This call supports different
|
||||
// modes for targeted/global resets.
|
||||
#define UVM_TEST_RESET_ACCESS_COUNTERS UVM_TEST_IOCTL_BASE(57)
|
||||
@ -979,9 +979,6 @@ typedef struct
|
||||
// Type UVM_TEST_ACCESS_COUNTER_RESET_MODE
|
||||
NvU32 mode; // In
|
||||
|
||||
// Type UVM_TEST_ACCESS_COUNTER_TYPE
|
||||
NvU32 counter_type; // In
|
||||
|
||||
NvU32 bank; // In
|
||||
NvU32 tag; // In
|
||||
NV_STATUS rmStatus; // Out
|
||||
@ -1061,14 +1058,6 @@ typedef struct
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_SET_PAGE_THRASHING_POLICY_PARAMS;
|
||||
|
||||
#define UVM_TEST_PMM_SYSMEM UVM_TEST_IOCTL_BASE(64)
|
||||
typedef struct
|
||||
{
|
||||
NvU64 range_address1 NV_ALIGN_BYTES(8); // In
|
||||
NvU64 range_address2 NV_ALIGN_BYTES(8); // In
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_PMM_SYSMEM_PARAMS;
|
||||
|
||||
#define UVM_TEST_PMM_REVERSE_MAP UVM_TEST_IOCTL_BASE(65)
|
||||
typedef struct
|
||||
{
|
||||
@ -1142,18 +1131,46 @@ typedef struct
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS;
|
||||
|
||||
// Inject an error into the VA space
|
||||
// Inject an error into the VA space or into a to-be registered GPU.
|
||||
//
|
||||
// If migrate_vma_allocation_fail_nth is greater than 0, the nth page
|
||||
// allocation within migrate_vma will fail.
|
||||
//
|
||||
// If va_block_allocation_fail_nth is greater than 0, the nth call to
|
||||
// uvm_va_block_find_create() will fail with NV_ERR_NO_MEMORY.
|
||||
//
|
||||
// If gpu_access_counters_alloc_buffer is set, the parent_gpu's access counters
|
||||
// buffer allocation will fail with NV_ERR_NO_MEMORY.
|
||||
//
|
||||
// If gpu_access_counters_alloc_block_context is set, the access counters
|
||||
// buffer's block_context allocation will fail with NV_ERR_NO_MEMORY.
|
||||
//
|
||||
// If gpu_isr_access_counters_alloc is set, the ISR access counters allocation
|
||||
// will fail with NV_ERR_NO_MEMORY.
|
||||
//
|
||||
// If gpu_isr_access_counters_alloc_stats_cpu is set, the ISR access counters
|
||||
// buffer's stats_cpu allocation will fail with NV_ERR_NO_MEMORY.
|
||||
//
|
||||
// If access_counters_batch_context_notifications is set, the access counters
|
||||
// batch_context's notifications allocation will fail with NV_ERR_NO_MEMORY.
|
||||
//
|
||||
// If access_counters_batch_context_notification_cache is set, the access
|
||||
// counters batch_context's notification cache allocation will fail with
|
||||
// NV_ERR_NO_MEMORY.
|
||||
//
|
||||
// Note that only one of the gpu_* or access_counters_* setting can be selected
|
||||
// at a time.
|
||||
#define UVM_TEST_VA_SPACE_INJECT_ERROR UVM_TEST_IOCTL_BASE(72)
|
||||
typedef struct
|
||||
{
|
||||
NvU32 migrate_vma_allocation_fail_nth; // In
|
||||
NvU32 va_block_allocation_fail_nth; // In
|
||||
NvBool gpu_access_counters_alloc_buffer; // In
|
||||
NvBool gpu_access_counters_alloc_block_context; // In
|
||||
NvBool gpu_isr_access_counters_alloc; // In
|
||||
NvBool gpu_isr_access_counters_alloc_stats_cpu; // In
|
||||
NvBool access_counters_batch_context_notifications; // In
|
||||
NvBool access_counters_batch_context_notification_cache; // In
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS;
|
||||
@ -1505,6 +1522,16 @@ typedef struct
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_INJECT_NVLINK_ERROR_PARAMS;
|
||||
|
||||
#define UVM_TEST_QUERY_ACCESS_COUNTERS UVM_TEST_IOCTL_BASE(109)
|
||||
typedef struct
|
||||
{
|
||||
NvProcessorUuid gpu_uuid; // In
|
||||
NvU8 num_notification_buffers; // Out
|
||||
NvU32 num_notification_entries; // Out
|
||||
|
||||
NV_STATUS rmStatus; // Out
|
||||
} UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS;
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -1305,8 +1305,7 @@ void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_c
|
||||
|
||||
void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry,
|
||||
bool on_managed_phys)
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
uvm_down_read(&va_space->tools.lock);
|
||||
|
||||
@ -1318,18 +1317,10 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
|
||||
|
||||
info->eventType = UvmEventTypeTestAccessCounter;
|
||||
info->srcIndex = uvm_parent_id_value_from_processor_id(gpu_id);
|
||||
info->address = buffer_entry->address.address;
|
||||
info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
|
||||
if (buffer_entry->address.is_virtual) {
|
||||
info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
|
||||
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
|
||||
info->veId = buffer_entry->virtual_info.ve_id;
|
||||
}
|
||||
else {
|
||||
info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
|
||||
}
|
||||
info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
|
||||
info->physOnManaged = on_managed_phys? 1 : 0;
|
||||
info->address = buffer_entry->address;
|
||||
info->instancePtr = buffer_entry->instance_ptr.address;
|
||||
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->instance_ptr.aperture];
|
||||
info->veId = buffer_entry->ve_id;
|
||||
info->value = buffer_entry->counter_value;
|
||||
info->subGranularity = buffer_entry->sub_granularity;
|
||||
info->bank = buffer_entry->bank;
|
||||
@ -1345,18 +1336,10 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
|
||||
|
||||
info->eventType = UvmEventTypeTestAccessCounter;
|
||||
info->srcIndex = uvm_id_value(gpu_id);
|
||||
info->address = buffer_entry->address.address;
|
||||
info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
|
||||
if (buffer_entry->address.is_virtual) {
|
||||
info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
|
||||
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
|
||||
info->veId = buffer_entry->virtual_info.ve_id;
|
||||
}
|
||||
else {
|
||||
info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
|
||||
}
|
||||
info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
|
||||
info->physOnManaged = on_managed_phys? 1 : 0;
|
||||
info->address = buffer_entry->address;
|
||||
info->instancePtr = buffer_entry->instance_ptr.address;
|
||||
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->instance_ptr.aperture];
|
||||
info->veId = buffer_entry->ve_id;
|
||||
info->value = buffer_entry->counter_value;
|
||||
info->subGranularity = buffer_entry->sub_granularity;
|
||||
info->bank = buffer_entry->bank;
|
||||
@ -1368,18 +1351,13 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
|
||||
uvm_up_read(&va_space->tools.lock);
|
||||
}
|
||||
|
||||
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry,
|
||||
bool on_managed_phys)
|
||||
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, const uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
uvm_va_space_t *va_space;
|
||||
|
||||
uvm_down_read(&g_tools_va_space_list_lock);
|
||||
list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
|
||||
uvm_tools_record_access_counter(va_space,
|
||||
gpu->id,
|
||||
buffer_entry,
|
||||
on_managed_phys);
|
||||
uvm_tools_record_access_counter(va_space, gpu->id, buffer_entry);
|
||||
}
|
||||
uvm_up_read(&g_tools_va_space_list_lock);
|
||||
}
|
||||
|
@ -111,14 +111,11 @@ void uvm_tools_broadcast_replay(uvm_gpu_t *gpu, uvm_push_t *push, NvU32 batch_id
|
||||
|
||||
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type);
|
||||
|
||||
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry,
|
||||
bool on_managed_phys);
|
||||
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
|
||||
uvm_gpu_id_t gpu_id,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry,
|
||||
bool on_managed_phys);
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry);
|
||||
|
||||
void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space);
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2017-2024 NVIDIA Corporation
|
||||
Copyright (c) 2017-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -37,10 +37,10 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_turing_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Turing covers 128 TB and that's the minimum
|
||||
@ -79,8 +79,6 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
parent_gpu->scoped_atomics_supported = true;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -25,42 +25,174 @@
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "clc365.h"
|
||||
|
||||
static void clear_access_counter_notifications_interrupt(uvm_parent_gpu_t *parent_gpu)
|
||||
typedef struct {
|
||||
NvU8 bufferEntry[NVC365_NOTIFY_BUF_SIZE];
|
||||
} access_counter_buffer_entry_c365_t;
|
||||
|
||||
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
volatile NvU32 *reg;
|
||||
NvU32 mask;
|
||||
|
||||
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntr;
|
||||
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
|
||||
reg = access_counters->rm_info.pHubIntrEnSet;
|
||||
mask = access_counters->rm_info.accessCounterMask;
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*reg, mask);
|
||||
}
|
||||
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
|
||||
static void clear_access_counter_notifications_interrupt(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
volatile NvU32 *reg;
|
||||
NvU32 mask;
|
||||
|
||||
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnClear;
|
||||
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
|
||||
reg = access_counters->rm_info.pHubIntr;
|
||||
mask = access_counters->rm_info.accessCounterMask;
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*reg, mask);
|
||||
}
|
||||
|
||||
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters)
|
||||
{
|
||||
volatile NvU32 *reg;
|
||||
NvU32 mask;
|
||||
|
||||
reg = access_counters->rm_info.pHubIntrEnClear;
|
||||
mask = access_counters->rm_info.accessCounterMask;
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*reg, mask);
|
||||
|
||||
wmb();
|
||||
|
||||
// See the comment in uvm_hal_turing_disable_replayable_faults
|
||||
clear_access_counter_notifications_interrupt(parent_gpu);
|
||||
clear_access_counter_notifications_interrupt(access_counters);
|
||||
}
|
||||
|
||||
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get)
|
||||
{
|
||||
clear_access_counter_notifications_interrupt(parent_gpu);
|
||||
clear_access_counter_notifications_interrupt(access_counters);
|
||||
|
||||
wmb();
|
||||
|
||||
// Write GET to force the re-evaluation of the interrupt condition after the
|
||||
// interrupt bit has been cleared.
|
||||
UVM_GPU_WRITE_ONCE(*parent_gpu->access_counter_buffer_info.rm_info.pAccessCntrBufferGet, get);
|
||||
UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return NVC365_NOTIFY_BUF_SIZE;
|
||||
}
|
||||
|
||||
static uvm_aperture_t get_access_counter_inst_aperture(NvU32 *access_counter_entry)
|
||||
{
|
||||
NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_APERTURE);
|
||||
|
||||
switch (hw_aperture_value) {
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
|
||||
return UVM_APERTURE_VID;
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
|
||||
return UVM_APERTURE_SYS;
|
||||
}
|
||||
|
||||
UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value);
|
||||
|
||||
return UVM_APERTURE_MAX;
|
||||
}
|
||||
|
||||
static NvU64 get_address(uvm_parent_gpu_t *parent_gpu, NvU32 *access_counter_entry)
|
||||
{
|
||||
NvU64 address;
|
||||
NvU64 addr_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_HI);
|
||||
NvU64 addr_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_LO);
|
||||
NvU32 addr_type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_TYPE);
|
||||
|
||||
UVM_ASSERT(addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA);
|
||||
|
||||
address = addr_lo + (addr_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, ADDR_LO));
|
||||
address = uvm_parent_gpu_canonical_address(parent_gpu, address);
|
||||
|
||||
return address;
|
||||
}
|
||||
|
||||
static NvU32 *get_access_counter_buffer_entry(uvm_access_counter_buffer_t *access_counters, NvU32 index)
|
||||
{
|
||||
access_counter_buffer_entry_c365_t *buffer_start;
|
||||
NvU32 *access_counter_entry;
|
||||
|
||||
UVM_ASSERT(index < access_counters->max_notifications);
|
||||
|
||||
buffer_start = (access_counter_buffer_entry_c365_t *)access_counters->rm_info.bufferAddress;
|
||||
access_counter_entry = (NvU32 *)&buffer_start[index];
|
||||
|
||||
return access_counter_entry;
|
||||
}
|
||||
|
||||
bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index)
|
||||
{
|
||||
NvU32 *access_counter_entry;
|
||||
bool is_valid;
|
||||
|
||||
access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
|
||||
|
||||
is_valid = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID);
|
||||
|
||||
return is_valid;
|
||||
}
|
||||
|
||||
void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index)
|
||||
{
|
||||
NvU32 *access_counter_entry;
|
||||
|
||||
access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
|
||||
|
||||
WRITE_HWCONST_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID, FALSE);
|
||||
}
|
||||
|
||||
void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
NvU32 *access_counter_entry;
|
||||
NvU64 inst_hi, inst_lo;
|
||||
|
||||
// Valid bit must be set before this function is called
|
||||
UVM_ASSERT(uvm_hal_turing_access_counter_buffer_entry_is_valid(access_counters, index));
|
||||
|
||||
access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
|
||||
|
||||
UVM_ASSERT(READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, TYPE) != NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU);
|
||||
|
||||
buffer_entry->address = get_address(access_counters->parent_gpu, access_counter_entry);
|
||||
|
||||
inst_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_HI);
|
||||
inst_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_LO);
|
||||
buffer_entry->instance_ptr.address = inst_lo + (inst_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, INST_LO));
|
||||
|
||||
// HW value contains the 4K page number. Shift to build the full address
|
||||
buffer_entry->instance_ptr.address <<= 12;
|
||||
|
||||
buffer_entry->instance_ptr.aperture = get_access_counter_inst_aperture(access_counter_entry);
|
||||
|
||||
buffer_entry->mmu_engine_id = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, MMU_ENGINE_ID);
|
||||
|
||||
buffer_entry->mmu_engine_type = UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
|
||||
// MMU engine id aligns with the fault buffer packets. Therefore, we reuse
|
||||
// the helper to compute the VE ID from the fault buffer class.
|
||||
buffer_entry->ve_id = access_counters->parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->mmu_engine_id,
|
||||
buffer_entry->mmu_engine_type);
|
||||
|
||||
buffer_entry->counter_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, COUNTER_VAL);
|
||||
|
||||
buffer_entry->sub_granularity = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, SUB_GRANULARITY);
|
||||
|
||||
buffer_entry->bank = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, BANK);
|
||||
|
||||
buffer_entry->tag = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, NOTIFY_TAG);
|
||||
|
||||
// Automatically clear valid bit for the entry in the access counter buffer
|
||||
uvm_hal_turing_access_counter_buffer_entry_clear_valid(access_counters, index);
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2021-2024 NVIDIA Corporation
|
||||
Copyright (c) 2021-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -32,8 +32,8 @@ static void clear_replayable_faults_interrupt(uvm_parent_gpu_t *parent_gpu)
|
||||
volatile NvU32 *reg;
|
||||
NvU32 mask;
|
||||
|
||||
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntr;
|
||||
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
|
||||
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntr;
|
||||
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*reg, mask);
|
||||
}
|
||||
@ -54,8 +54,8 @@ void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
|
||||
volatile NvU32 *reg;
|
||||
NvU32 mask;
|
||||
|
||||
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnClear;
|
||||
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
|
||||
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnClear;
|
||||
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*reg, mask);
|
||||
|
||||
|
@ -361,3 +361,24 @@ void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
|
||||
if (params->membar == UvmInvalidateTlbMemBarLocal)
|
||||
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
|
||||
}
|
||||
|
||||
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push)
|
||||
{
|
||||
NV_PUSH_4U(C46F, MEM_OP_A, 0,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, 0,
|
||||
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
|
||||
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, ALL));
|
||||
}
|
||||
|
||||
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
NV_PUSH_4U(C46F, MEM_OP_A, 0,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWVALUE(C46F, MEM_OP_C, ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG, buffer_entry->tag),
|
||||
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
|
||||
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, TARGETED) |
|
||||
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, MIMC) |
|
||||
HWVALUE(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank));
|
||||
}
|
||||
|
@ -1323,14 +1323,11 @@ typedef struct
|
||||
NvU8 aperture;
|
||||
NvU8 instancePtrAperture;
|
||||
|
||||
NvU8 isVirtual;
|
||||
NvU8 isFromCpu;
|
||||
NvU8 padding8bits;
|
||||
|
||||
NvU8 veId;
|
||||
|
||||
// The physical access counter notification was triggered on a managed
|
||||
// memory region. This is not set for virtual access counter notifications.
|
||||
NvU8 physOnManaged;
|
||||
NvU16 padding16bits;
|
||||
|
||||
NvU32 value;
|
||||
NvU32 subGranularity;
|
||||
@ -1348,26 +1345,21 @@ typedef struct
|
||||
// data in a queue.
|
||||
//
|
||||
NvU8 eventType;
|
||||
|
||||
// See uvm_access_counter_buffer_entry_t for details
|
||||
NvU8 aperture;
|
||||
NvU8 instancePtrAperture;
|
||||
NvU8 isVirtual;
|
||||
NvU8 isFromCpu;
|
||||
NvU8 veId;
|
||||
|
||||
// The physical access counter notification was triggered on a managed
|
||||
// memory region. This is not set for virtual access counter notifications.
|
||||
NvU8 physOnManaged;
|
||||
|
||||
//
|
||||
// This structure is shared between UVM kernel and tools.
|
||||
// Manually padding the structure so that compiler options like pragma pack
|
||||
// or malign-double will have no effect on the field offsets
|
||||
//
|
||||
NvU8 padding8bits;
|
||||
NvU16 padding16bits;
|
||||
|
||||
NvU16 srcIndex; // index of the gpu that received the access counter
|
||||
// notification
|
||||
NvU16 padding16bits;
|
||||
NvU32 value;
|
||||
NvU32 subGranularity;
|
||||
NvU32 tag;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -664,10 +664,11 @@ static void uvm_va_block_cpu_clear_resident_region(uvm_va_block_t *va_block, int
|
||||
block_update_cpu_resident_mask(va_block);
|
||||
}
|
||||
|
||||
// Clear residency bits from any/all processors that might have had pages resident.
|
||||
// Note that both the destination processor and any CPU NUMA nodes where pages are
|
||||
// migrating to need to be skipped as the block logic sets the new page residency
|
||||
// before clearing the old ones (see uvm_va_block_make_resident_finish()).
|
||||
// Clear residency bits from any/all processors that might have had pages
|
||||
// resident. Note that both the destination processor and any CPU NUMA nodes
|
||||
// where pages are migrating to need to be skipped as the block logic sets the
|
||||
// new page residency before clearing the old ones
|
||||
// (see uvm_va_block_make_resident_finish()).
|
||||
static void uvm_va_block_cpu_clear_resident_all_chunks(uvm_va_block_t *va_block,
|
||||
uvm_va_block_context_t *va_block_context,
|
||||
uvm_page_mask_t *page_mask)
|
||||
@ -1328,40 +1329,18 @@ static void cpu_chunk_remove_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_
|
||||
if (gpu_mapping_addr == 0)
|
||||
return;
|
||||
|
||||
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
|
||||
}
|
||||
|
||||
static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk,
|
||||
uvm_va_block_t *block,
|
||||
uvm_page_index_t page_index,
|
||||
uvm_gpu_t *gpu)
|
||||
static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_chunk_size_t chunk_size;
|
||||
|
||||
// When the Confidential Computing feature is enabled the transfers don't
|
||||
// use the DMA mapping of CPU chunks (since it's protected memory), but
|
||||
// the DMA address of the unprotected dma buffer.
|
||||
if (g_uvm_global.conf_computing_enabled)
|
||||
return NV_OK;
|
||||
|
||||
status = uvm_cpu_chunk_map_gpu(chunk, gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
|
||||
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
|
||||
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu),
|
||||
uvm_va_block_cpu_page_address(block, page_index),
|
||||
chunk_size,
|
||||
block,
|
||||
UVM_ID_CPU);
|
||||
if (status != NV_OK)
|
||||
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
|
||||
|
||||
return status;
|
||||
return uvm_cpu_chunk_map_gpu(chunk, gpu);
|
||||
}
|
||||
|
||||
static void block_gpu_unmap_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu_t *gpu)
|
||||
@ -1393,7 +1372,7 @@ static NV_STATUS block_gpu_map_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu
|
||||
uvm_id_value(gpu->id),
|
||||
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu));
|
||||
|
||||
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, page_index, gpu);
|
||||
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, gpu);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
@ -1468,14 +1447,10 @@ void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block,
|
||||
}
|
||||
}
|
||||
|
||||
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index)
|
||||
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block, uvm_cpu_chunk_t *chunk)
|
||||
{
|
||||
NV_STATUS status;
|
||||
uvm_gpu_id_t id;
|
||||
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
uvm_va_block_region_t chunk_region = uvm_va_block_chunk_region(block, chunk_size, page_index);
|
||||
|
||||
// We can't iterate over va_space->registered_gpus because we might be
|
||||
// on the eviction path, which does not have the VA space lock held. We have
|
||||
@ -1489,7 +1464,7 @@ NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
|
||||
continue;
|
||||
|
||||
gpu = uvm_gpu_get(id);
|
||||
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, chunk_region.first, gpu);
|
||||
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, gpu);
|
||||
if (status != NV_OK)
|
||||
goto error;
|
||||
}
|
||||
@ -1756,7 +1731,7 @@ static NV_STATUS block_populate_overlapping_cpu_chunks(uvm_va_block_t *block,
|
||||
// before mapping.
|
||||
chunk_ptr = split_chunks[i];
|
||||
split_chunks[i] = NULL;
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr, running_page_index);
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
@ -1793,7 +1768,7 @@ static NV_STATUS block_populate_overlapping_cpu_chunks(uvm_va_block_t *block,
|
||||
// before mapping.
|
||||
chunk_ptr = small_chunks[j];
|
||||
small_chunks[j] = NULL;
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr, running_page_index);
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr);
|
||||
if (status != NV_OK)
|
||||
goto done;
|
||||
}
|
||||
@ -1860,7 +1835,7 @@ static NV_STATUS block_add_cpu_chunk(uvm_va_block_t *block,
|
||||
if (status != NV_OK)
|
||||
goto out;
|
||||
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk, page_index);
|
||||
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk);
|
||||
if (status != NV_OK) {
|
||||
uvm_cpu_chunk_remove_from_block(block, uvm_cpu_chunk_get_numa_node(chunk), page_index);
|
||||
goto out;
|
||||
@ -3155,8 +3130,8 @@ static NV_STATUS block_populate_pages(uvm_va_block_t *block,
|
||||
uvm_page_mask_or(pages_staged, pages_staged, scratch_page_mask);
|
||||
}
|
||||
|
||||
// 2. Remove any pages in pages_staged that are on any resident processor
|
||||
// dest_id can copy from.
|
||||
// 2. Remove any pages in pages_staged that are on any resident
|
||||
// processor dest_id can copy from.
|
||||
if (uvm_processor_mask_and(tmp_processor_mask, can_copy_from_processors, &block->resident)) {
|
||||
for_each_id_in_mask(id, tmp_processor_mask) {
|
||||
id_resident_mask = uvm_va_block_resident_mask_get(block, id, NUMA_NO_NODE);
|
||||
@ -3210,14 +3185,21 @@ static uvm_gpu_chunk_t *block_phys_page_chunk(uvm_va_block_t *block, block_phys_
|
||||
return chunk;
|
||||
}
|
||||
|
||||
typedef enum {
|
||||
REMOTE_EGM_ALLOWED = 0,
|
||||
REMOTE_EGM_NOT_ALLOWED = 1,
|
||||
} remote_egm_mode_t;
|
||||
|
||||
// Get the physical GPU address of a block's page from the POV of the specified
|
||||
// GPU. This is the address that should be used for making PTEs for the
|
||||
// specified GPU.
|
||||
static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
|
||||
block_phys_page_t block_page,
|
||||
uvm_gpu_t *gpu)
|
||||
uvm_gpu_t *gpu,
|
||||
remote_egm_mode_t egm_mode)
|
||||
{
|
||||
uvm_va_block_gpu_state_t *accessing_gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
|
||||
bool allow_remote_egm = egm_mode == REMOTE_EGM_ALLOWED;
|
||||
size_t chunk_offset;
|
||||
uvm_gpu_chunk_t *chunk;
|
||||
|
||||
@ -3231,7 +3213,7 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
|
||||
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
|
||||
uvm_parent_gpu_t *routing_gpu = uvm_va_space_get_egm_routing_gpu(va_space, gpu, block_page.nid);
|
||||
|
||||
if (routing_gpu) {
|
||||
if (routing_gpu && (allow_remote_egm || routing_gpu == gpu->parent)) {
|
||||
struct page *page = uvm_cpu_chunk_get_cpu_page(block, chunk, block_page.page_index);
|
||||
|
||||
phys_addr = page_to_phys(page);
|
||||
@ -3296,9 +3278,14 @@ static uvm_gpu_address_t block_phys_page_copy_address(uvm_va_block_t *block,
|
||||
// CPU and local GPU accesses can rely on block_phys_page_address, but the
|
||||
// resulting physical address may need to be converted into virtual.
|
||||
if (UVM_ID_IS_CPU(block_page.processor) || uvm_id_equal(block_page.processor, gpu->id)) {
|
||||
uvm_gpu_phys_address_t phys_addr = block_phys_page_address(block, block_page, gpu);
|
||||
// Do not use remote EGM addresses internally until
|
||||
// NVLINK STO handling is updated to handle EGM.
|
||||
// TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
|
||||
// when accessing EGM memory
|
||||
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
|
||||
// systems
|
||||
uvm_gpu_phys_address_t phys_addr = block_phys_page_address(block, block_page, gpu, REMOTE_EGM_NOT_ALLOWED);
|
||||
|
||||
// EGM mappings use physical addresses with a PEER aperture.
|
||||
if (uvm_aperture_is_peer(phys_addr.aperture)) {
|
||||
UVM_ASSERT(block_check_egm_peer(uvm_va_block_get_va_space(block), gpu, block_page.nid, phys_addr));
|
||||
return uvm_gpu_address_from_phys(phys_addr);
|
||||
@ -3334,7 +3321,7 @@ uvm_gpu_phys_address_t uvm_va_block_res_phys_page_address(uvm_va_block_t *va_blo
|
||||
UVM_ASSERT(nid != NUMA_NO_NODE);
|
||||
}
|
||||
|
||||
return block_phys_page_address(va_block, block_phys_page(residency, nid, page_index), gpu);
|
||||
return block_phys_page_address(va_block, block_phys_page(residency, nid, page_index), gpu, REMOTE_EGM_ALLOWED);
|
||||
}
|
||||
|
||||
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
|
||||
@ -3949,9 +3936,9 @@ static NV_STATUS block_copy_pages(uvm_va_block_t *va_block,
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) >= uvm_va_block_region_size(region));
|
||||
UVM_ASSERT(uvm_va_block_region_size(region) <= uvm_cpu_chunk_get_size(dst_chunk));
|
||||
|
||||
// CPU-to-CPU copies using memcpy() don't have any inherent ordering with
|
||||
// copies using GPU CEs. So, we have to make sure that all previously
|
||||
// submitted work is complete.
|
||||
// CPU-to-CPU copies using memcpy() don't have any inherent ordering
|
||||
// with copies using GPU CEs. So, we have to make sure that all
|
||||
// previously submitted work is complete.
|
||||
status = uvm_tracker_wait(&va_block->tracker);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
@ -4204,9 +4191,9 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
uvm_processor_mask_set(&block_context->make_resident.all_involved_processors, copying_gpu->id);
|
||||
|
||||
// This function is called just once per VA block and needs to
|
||||
// receive the "main" cause for the migration (it mainly checks if
|
||||
// we are in the eviction path). Therefore, we pass cause instead
|
||||
// of contig_cause
|
||||
// receive the "main" cause for the migration (it mainly checks
|
||||
// if we are in the eviction path). Therefore, we pass cause
|
||||
// instead of contig_cause.
|
||||
uvm_tools_record_block_migration_begin(block,
|
||||
&push,
|
||||
dst_id,
|
||||
@ -4233,8 +4220,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
contig_cause = page_cause;
|
||||
|
||||
if (block_copy_should_use_push(block, ©_state)) {
|
||||
// When CC is enabled, transfers between GPU and CPU don't rely on
|
||||
// any GPU mapping of CPU chunks, physical or virtual.
|
||||
// When CC is enabled, transfers between GPU and CPU don't rely
|
||||
// on any GPU mapping of CPU chunks, physical or virtual.
|
||||
if (UVM_ID_IS_CPU(src_id) && g_uvm_global.conf_computing_enabled)
|
||||
can_cache_src_phys_addr = false;
|
||||
|
||||
@ -4244,8 +4231,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
|
||||
// Computing the physical address is a non-trivial operation and
|
||||
// seems to be a performance limiter on systems with 2 or more
|
||||
// NVLINK links. Therefore, for physically-contiguous block
|
||||
// storage, we cache the start address and compute the page address
|
||||
// using the page index.
|
||||
// storage, we cache the start address and compute the page
|
||||
// address using the page index.
|
||||
if (can_cache_src_phys_addr) {
|
||||
copy_state.src.gpu_address = block_phys_page_copy_address(block,
|
||||
block_phys_page(src_id,
|
||||
@ -5187,12 +5174,13 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
|
||||
if (!scratch_residency_mask)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
// We cannot read-duplicate on different CPU NUMA nodes since there is only one
|
||||
// CPU page table. So, the page has to migrate from the source NUMA node to the
|
||||
// destination one.
|
||||
// We cannot read-duplicate on different CPU NUMA nodes since there is only
|
||||
// one CPU page table. So, the page has to migrate from the source NUMA node
|
||||
// to the destination one.
|
||||
// In order to correctly map pages on the destination NUMA node, all pages
|
||||
// resident on other NUMA nodes have to be unmapped. Otherwise, their WRITE
|
||||
// permission will be revoked but they'll remain mapped on the source NUMA node.
|
||||
// permission will be revoked but they'll remain mapped on the source NUMA
|
||||
// node.
|
||||
if (uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) &&
|
||||
UVM_ID_IS_CPU(va_block_context->make_resident.dest_id)) {
|
||||
uvm_page_mask_t *dest_nid_resident = uvm_va_block_resident_mask_get(va_block,
|
||||
@ -5623,7 +5611,8 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
|
||||
}
|
||||
|
||||
// atomic mappings from GPUs with disabled system-wide atomics are treated
|
||||
// as write mappings. Therefore, we remove them from the atomic mappings mask
|
||||
// as write mappings. Therefore, we remove them from the atomic mappings
|
||||
// mask
|
||||
uvm_processor_mask_and(atomic_mappings, atomic_mappings, &va_space->system_wide_atomics_enabled_processors);
|
||||
|
||||
if (!uvm_processor_mask_empty(read_mappings)) {
|
||||
@ -5696,7 +5685,8 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
|
||||
*residency_has_native_atomics->bitmap,
|
||||
*va_space->system_wide_atomics_enabled_processors.bitmap);
|
||||
|
||||
// Only one processor outside of the native group can have atomics enabled
|
||||
// Only one processor outside of the native group can have atomics
|
||||
// enabled
|
||||
UVM_ASSERT_MSG(uvm_processor_mask_get_count(atomic_mappings) == 1,
|
||||
"Too many atomics mappings to %s from processors with non-native atomics\n"
|
||||
"Resident: 0x%lx - Mappings R: 0x%lx W: 0x%lx A: 0x%lx -"
|
||||
@ -5714,9 +5704,9 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
|
||||
|
||||
non_native_atomics = &mapping_masks->non_native_atomics;
|
||||
|
||||
// One or more processors within the native group have atomics enabled.
|
||||
// All processors outside of that group may have write but not atomic
|
||||
// permissions.
|
||||
// One or more processors within the native group have atomics
|
||||
// enabled. All processors outside of that group may have write but
|
||||
// not atomic permissions.
|
||||
uvm_processor_mask_andnot(non_native_atomics, atomic_mappings, residency_has_native_atomics);
|
||||
|
||||
UVM_ASSERT_MSG(uvm_processor_mask_empty(non_native_atomics),
|
||||
@ -6143,7 +6133,10 @@ static void block_gpu_pte_write_4k(uvm_va_block_t *block,
|
||||
|
||||
if (page_index >= contig_region.outer || nid != contig_nid) {
|
||||
contig_region = block_phys_contig_region(block, page_index, resident_id, nid);
|
||||
contig_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, contig_region.first), gpu);
|
||||
contig_addr = block_phys_page_address(block,
|
||||
block_phys_page(resident_id, nid, contig_region.first),
|
||||
gpu,
|
||||
REMOTE_EGM_ALLOWED);
|
||||
page_addr = contig_addr;
|
||||
contig_nid = nid;
|
||||
}
|
||||
@ -6368,7 +6361,10 @@ static void block_gpu_pte_write_big(uvm_va_block_t *block,
|
||||
|
||||
if (big_region.first >= contig_region.outer || nid != contig_nid) {
|
||||
contig_region = block_phys_contig_region(block, big_region.first, resident_id, nid);
|
||||
contig_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, contig_region.first), gpu);
|
||||
contig_addr = block_phys_page_address(block,
|
||||
block_phys_page(resident_id, nid, contig_region.first),
|
||||
gpu,
|
||||
REMOTE_EGM_ALLOWED);
|
||||
page_addr = contig_addr;
|
||||
contig_nid = nid;
|
||||
}
|
||||
@ -6520,7 +6516,7 @@ static void block_gpu_pte_write_2m(uvm_va_block_t *block,
|
||||
block_mark_cpu_page_dirty(block, 0, nid);
|
||||
}
|
||||
|
||||
page_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, 0), gpu);
|
||||
page_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, 0), gpu, REMOTE_EGM_ALLOWED);
|
||||
pte_val = tree->hal->make_pte(page_addr.aperture, page_addr.address, new_prot, pte_flags);
|
||||
uvm_pte_batch_write_pte(pte_batch, pte_addr, pte_val, pte_size);
|
||||
|
||||
@ -10037,16 +10033,8 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
|
||||
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
|
||||
uvm_chunk_size_t new_size;
|
||||
uvm_gpu_t *gpu;
|
||||
NvU64 gpu_mapping_addr;
|
||||
uvm_processor_mask_t *gpu_split_mask;
|
||||
uvm_gpu_id_t id;
|
||||
NV_STATUS status;
|
||||
|
||||
gpu_split_mask = uvm_processor_mask_cache_alloc();
|
||||
if (!gpu_split_mask)
|
||||
return NV_ERR_NO_MEMORY;
|
||||
|
||||
if (chunk_size == UVM_CHUNK_SIZE_2M)
|
||||
new_size = UVM_CHUNK_SIZE_64K;
|
||||
else
|
||||
@ -10054,45 +10042,11 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
|
||||
|
||||
UVM_ASSERT(IS_ALIGNED(chunk_size, new_size));
|
||||
|
||||
uvm_processor_mask_zero(gpu_split_mask);
|
||||
for_each_gpu_id(id) {
|
||||
if (!uvm_va_block_gpu_state_get(block, id))
|
||||
continue;
|
||||
|
||||
gpu = uvm_gpu_get(id);
|
||||
|
||||
// If the parent chunk has not been mapped, there is nothing to split.
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
if (gpu_mapping_addr == 0)
|
||||
continue;
|
||||
|
||||
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
|
||||
gpu_mapping_addr,
|
||||
new_size);
|
||||
if (status != NV_OK)
|
||||
goto merge;
|
||||
|
||||
uvm_processor_mask_set(gpu_split_mask, id);
|
||||
}
|
||||
|
||||
if (new_size == UVM_CHUNK_SIZE_64K)
|
||||
status = block_split_cpu_chunk_to_64k(block, nid);
|
||||
else
|
||||
status = block_split_cpu_chunk_to_4k(block, page_index, nid);
|
||||
|
||||
if (status != NV_OK) {
|
||||
merge:
|
||||
for_each_gpu_id_in_mask(id, gpu_split_mask) {
|
||||
gpu = uvm_gpu_get(id);
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
|
||||
gpu_mapping_addr,
|
||||
chunk_size);
|
||||
}
|
||||
}
|
||||
|
||||
uvm_processor_mask_cache_free(gpu_split_mask);
|
||||
|
||||
return status;
|
||||
}
|
||||
|
||||
@ -10109,8 +10063,8 @@ static NV_STATUS block_prealloc_cpu_chunk_storage(uvm_va_block_t *existing, uvm_
|
||||
UVM_ASSERT(uvm_cpu_storage_get_type(node_state) == UVM_CPU_CHUNK_STORAGE_MIXED);
|
||||
existing_mixed = uvm_cpu_storage_get_ptr(node_state);
|
||||
|
||||
// Pre-allocate chunk storage for the new block. By definition, the new block
|
||||
// will contain either 64K and/or 4K chunks.
|
||||
// Pre-allocate chunk storage for the new block. By definition, the new
|
||||
// block will contain either 64K and/or 4K chunks.
|
||||
//
|
||||
// We do this here so there are no failures in block_split_cpu().
|
||||
new_mixed = uvm_kvmalloc_zero(sizeof(*new_mixed));
|
||||
@ -10182,8 +10136,8 @@ static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_bloc
|
||||
for_each_possible_uvm_node(nid) {
|
||||
splitting_chunk = uvm_cpu_chunk_get_chunk_for_page(existing, nid, page_index);
|
||||
|
||||
// If the page covering the split point has not been populated, there is no
|
||||
// need to split.
|
||||
// If the page covering the split point has not been populated, there is
|
||||
// no need to split.
|
||||
if (!splitting_chunk)
|
||||
continue;
|
||||
|
||||
@ -10247,7 +10201,6 @@ static void block_merge_cpu_chunks_to_2m(uvm_va_block_t *block, uvm_page_index_t
|
||||
static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t page_index, int nid)
|
||||
{
|
||||
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
|
||||
uvm_gpu_id_t id;
|
||||
|
||||
if (!chunk)
|
||||
return;
|
||||
@ -10259,25 +10212,6 @@ static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t p
|
||||
UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_64K);
|
||||
block_merge_cpu_chunks_to_2m(block, page_index, nid);
|
||||
}
|
||||
|
||||
chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
|
||||
|
||||
for_each_gpu_id(id) {
|
||||
NvU64 gpu_mapping_addr;
|
||||
uvm_gpu_t *gpu;
|
||||
|
||||
if (!uvm_va_block_gpu_state_get(block, id))
|
||||
continue;
|
||||
|
||||
gpu = uvm_gpu_get(id);
|
||||
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
|
||||
if (gpu_mapping_addr == 0)
|
||||
continue;
|
||||
|
||||
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
|
||||
gpu_mapping_addr,
|
||||
uvm_cpu_chunk_get_size(chunk));
|
||||
}
|
||||
}
|
||||
|
||||
static void block_merge_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
|
||||
@ -10695,9 +10629,6 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
|
||||
size_t new_pages = uvm_va_block_num_cpu_pages(new);
|
||||
size_t existing_pages, existing_pages_4k, existing_pages_big, new_pages_big;
|
||||
uvm_pte_bits_gpu_t pte_bit;
|
||||
uvm_cpu_chunk_t *cpu_chunk;
|
||||
uvm_page_index_t page_index;
|
||||
int nid;
|
||||
|
||||
if (!existing_gpu_state)
|
||||
return;
|
||||
@ -10711,14 +10642,6 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
|
||||
UVM_ASSERT(PAGE_ALIGNED(existing->start));
|
||||
existing_pages = (new->start - existing->start) / PAGE_SIZE;
|
||||
|
||||
for_each_possible_uvm_node(nid) {
|
||||
for_each_cpu_chunk_in_block(cpu_chunk, page_index, new, nid) {
|
||||
uvm_pmm_sysmem_mappings_reparent_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
|
||||
uvm_cpu_chunk_get_gpu_phys_addr(cpu_chunk, gpu),
|
||||
new);
|
||||
}
|
||||
}
|
||||
|
||||
block_copy_split_gpu_chunks(existing, new, gpu);
|
||||
|
||||
block_split_page_mask(&existing_gpu_state->resident,
|
||||
@ -10727,8 +10650,10 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
|
||||
new_pages);
|
||||
|
||||
for (pte_bit = 0; pte_bit < UVM_PTE_BITS_GPU_MAX; pte_bit++) {
|
||||
block_split_page_mask(&existing_gpu_state->pte_bits[pte_bit], existing_pages,
|
||||
&new_gpu_state->pte_bits[pte_bit], new_pages);
|
||||
block_split_page_mask(&existing_gpu_state->pte_bits[pte_bit],
|
||||
existing_pages,
|
||||
&new_gpu_state->pte_bits[pte_bit],
|
||||
new_pages);
|
||||
}
|
||||
|
||||
// Adjust page table ranges.
|
||||
@ -11113,7 +11038,8 @@ static NV_STATUS do_block_add_mappings_after_migration(uvm_va_block_t *va_block,
|
||||
bool map_processor_has_enabled_system_wide_atomics =
|
||||
uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, map_processor_id);
|
||||
|
||||
// Write mappings from processors with disabled system-wide atomics are treated like atomics
|
||||
// Write mappings from processors with disabled system-wide atomics are
|
||||
// treated like atomics
|
||||
if (new_map_prot == UVM_PROT_READ_WRITE && !map_processor_has_enabled_system_wide_atomics)
|
||||
final_map_prot = UVM_PROT_READ_WRITE_ATOMIC;
|
||||
else
|
||||
@ -11346,14 +11272,17 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
|
||||
|
||||
block_page_authorized_processors(va_block, page_index, UVM_PROT_READ_WRITE_ATOMIC, atomic_mappings);
|
||||
|
||||
// Exclude processors with system-wide atomics disabled from atomic_mappings
|
||||
// Exclude processors with system-wide atomics disabled from
|
||||
// atomic_mappings
|
||||
uvm_processor_mask_and(atomic_mappings, atomic_mappings, &va_space->system_wide_atomics_enabled_processors);
|
||||
|
||||
// Exclude the processor for which the mapping protections are being computed
|
||||
// Exclude the processor for which the mapping protections are being
|
||||
// computed
|
||||
uvm_processor_mask_clear(atomic_mappings, processor_id);
|
||||
|
||||
// If there is any processor with atomic mapping, check if it has native atomics to the processor
|
||||
// with the resident copy. If it does not, we can only map READ ONLY
|
||||
// If there is any processor with atomic mapping, check if it has native
|
||||
// atomics to the processor with the resident copy. If it does not, we
|
||||
// can only map READ ONLY
|
||||
atomic_id = uvm_processor_mask_find_first_id(atomic_mappings);
|
||||
if (UVM_ID_IS_VALID(atomic_id) &&
|
||||
!uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(residency)], atomic_id)) {
|
||||
@ -11364,7 +11293,8 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
|
||||
|
||||
block_page_authorized_processors(va_block, page_index, UVM_PROT_READ_WRITE, write_mappings);
|
||||
|
||||
// Exclude the processor for which the mapping protections are being computed
|
||||
// Exclude the processor for which the mapping protections are being
|
||||
// computed
|
||||
uvm_processor_mask_clear(write_mappings, processor_id);
|
||||
|
||||
// At this point, any processor with atomic mappings either has native
|
||||
@ -11639,31 +11569,32 @@ static uvm_processor_id_t block_select_processor_residency(uvm_va_block_t *va_bl
|
||||
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(preferred_location)], processor_id))
|
||||
return preferred_location;
|
||||
|
||||
// Check if we should map the closest resident processor remotely on remote CPU fault
|
||||
// Check if we should map the closest resident processor remotely on remote
|
||||
// CPU fault
|
||||
//
|
||||
// When faulting on CPU, there's a linux process on behalf of it, which is associated
|
||||
// with a unique VM pointed by current->mm. A block of memory residing on GPU is also
|
||||
// associated with VM, pointed by va_block_context->mm. If they match, it's a regular
|
||||
// (local) fault, and we may want to migrate a page from GPU to CPU.
|
||||
// If it's a 'remote' fault, i.e. linux process differs from one associated with block
|
||||
// VM, we might preserve residence.
|
||||
// When faulting on CPU, there's a linux process on behalf of it, which is
|
||||
// associated with a unique VM pointed by current->mm. A block of memory
|
||||
// residing on GPU is also associated with VM, pointed by
|
||||
// va_block_context->mm. If they match, it's a regular (local) fault, and we
|
||||
// may want to migrate a page from GPU to CPU. If it's a 'remote' fault,
|
||||
// i.e., linux process differs from one associated with block VM, we might
|
||||
// preserve residence.
|
||||
//
|
||||
// Establishing a remote fault without access counters means the memory could stay in
|
||||
// the wrong spot for a long time, which is why we prefer to avoid creating remote
|
||||
// mappings. However when NIC accesses a memory residing on GPU, it's worth to keep it
|
||||
// in place for NIC accesses.
|
||||
// Establishing a remote fault without access counters means the memory
|
||||
// could stay in the wrong spot for a long time, which is why we prefer to
|
||||
// avoid creating remote mappings. However when NIC accesses a memory
|
||||
// residing on GPU, it's worth to keep it in place for NIC accesses.
|
||||
//
|
||||
// The logic that's used to detect remote faulting also keeps memory in place for
|
||||
// ptrace accesses. We would prefer to control those policies separately, but the
|
||||
// NIC case takes priority.
|
||||
// If the accessing processor is CPU, we're either handling a fault
|
||||
// from other than owning process, or we're handling an MOMC
|
||||
// notification. Only prevent migration for the former.
|
||||
// The logic that's used to detect remote faulting also keeps memory in
|
||||
// place for ptrace accesses. We would prefer to control those policies
|
||||
// separately, but the NIC case takes priority. If the accessing processor
|
||||
// is the CPU, we're handling a fault from other than the owning process,
|
||||
// we want to prevent a migration.
|
||||
if (UVM_ID_IS_CPU(processor_id) &&
|
||||
operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS &&
|
||||
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(closest_resident_processor)], processor_id) &&
|
||||
va_block_context->mm != current->mm) {
|
||||
UVM_ASSERT(va_block_context->mm != NULL);
|
||||
UVM_ASSERT(operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS);
|
||||
return closest_resident_processor;
|
||||
}
|
||||
|
||||
@ -11693,7 +11624,8 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
|
||||
// For HMM allocations UVM doesn't always control allocation of the
|
||||
// destination page as the kernel may have already allocated one. Therefore
|
||||
// we can't respect the preferred node ID for HMM pages.
|
||||
// TODO: Bug 4453874: [UVM-HMM] Respect the preferred CPU NUMA Node ID when making a HMM page resident
|
||||
// TODO: Bug 4453874: [UVM-HMM] Respect the preferred CPU NUMA Node ID when
|
||||
// making a HMM page resident
|
||||
if (uvm_va_block_is_hmm(va_block))
|
||||
return NUMA_NO_NODE;
|
||||
|
||||
@ -11867,9 +11799,12 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
|
||||
break;
|
||||
case UVM_SERVICE_OPERATION_ACCESS_COUNTERS:
|
||||
cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
|
||||
service_context->block_context->make_resident.access_counters_buffer_index =
|
||||
service_context->access_counters_buffer_index;
|
||||
break;
|
||||
default:
|
||||
UVM_ASSERT_MSG(false, "Invalid operation value %d\n", service_context->operation);
|
||||
|
||||
// Set cause to silence compiler warning that it may be unused.
|
||||
cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
|
||||
break;
|
||||
@ -11955,16 +11890,21 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
// TODO: Bug 5069427: [uvm] Fix the migration STO error checks.
|
||||
// Same as above for nvlink errors. Check the source GPU as well
|
||||
// as all its peers.
|
||||
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
|
||||
for_each_gpu_in_mask(peer_gpu, &gpu->peer_info.peer_gpu_mask) {
|
||||
status = uvm_gpu_check_nvlink_error_no_rm(peer_gpu);
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
uvm_processor_mask_set(&service_context->gpus_to_check_for_nvlink_errors, peer_gpu->id);
|
||||
|
||||
if (status != NV_OK)
|
||||
if (status != NV_OK) {
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
return status;
|
||||
}
|
||||
}
|
||||
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
|
||||
|
||||
status = uvm_gpu_check_nvlink_error_no_rm(gpu);
|
||||
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
|
||||
@ -13542,7 +13482,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
|
||||
}
|
||||
else {
|
||||
params->resident_physical_address[count] =
|
||||
block_phys_page_address(block, block_page, uvm_gpu_get(id)).address;
|
||||
block_phys_page_address(block, block_page, uvm_gpu_get(id), REMOTE_EGM_ALLOWED).address;
|
||||
}
|
||||
|
||||
++count;
|
||||
@ -13572,7 +13512,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
|
||||
block_page = block_phys_page(processor_to_map, nid, page_index);
|
||||
if (!UVM_ID_IS_CPU(id)) {
|
||||
uvm_gpu_t *gpu = uvm_gpu_get(id);
|
||||
uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, gpu);
|
||||
uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, gpu, REMOTE_EGM_ALLOWED);
|
||||
NvU64 phys_addr = gpu_phys_addr.address;
|
||||
|
||||
if (UVM_ID_IS_CPU(block_page.processor)) {
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -205,12 +205,12 @@ typedef struct
|
||||
//
|
||||
// The indices represent the corresponding big PTEs in the block's interior.
|
||||
// For example, a block with alignment and size of one 4k page on either
|
||||
// side of a big page will only use bit 0. Use uvm_va_block_big_page_index to look
|
||||
// the big_ptes index of a page.
|
||||
// side of a big page will only use bit 0. Use uvm_va_block_big_page_index
|
||||
// to look up the big_ptes index of a page.
|
||||
//
|
||||
// The block might not be able to fit any big PTEs, in which case this
|
||||
// bitmap is always zero. Use uvm_va_block_gpu_num_big_pages to find the number of
|
||||
// valid bits in this mask.
|
||||
// bitmap is always zero. Use uvm_va_block_gpu_num_big_pages to find the
|
||||
// number of valid bits in this mask.
|
||||
DECLARE_BITMAP(big_ptes, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
|
||||
|
||||
// See the comments for uvm_va_block_mmap_t::cpu.pte_bits.
|
||||
@ -565,8 +565,8 @@ struct uvm_va_block_wrapper_struct
|
||||
// testing only.
|
||||
bool inject_eviction_error;
|
||||
|
||||
// Force the next successful chunk allocation to then fail. Used for testing
|
||||
// only to simulate driver metadata allocation failure.
|
||||
// Force the next successful chunk allocation to then fail. Used for
|
||||
// testing only to simulate driver metadata allocation failure.
|
||||
bool inject_populate_error;
|
||||
|
||||
// Force the next split on this block to fail.
|
||||
@ -1250,8 +1250,8 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
|
||||
// context.
|
||||
//
|
||||
// service_context must not be NULL and policy for service_context->region must
|
||||
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
|
||||
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
|
||||
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
|
||||
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
|
||||
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context->prefetch_hint is set by this function.
|
||||
//
|
||||
@ -1282,8 +1282,8 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
|
||||
// pages to new_residency.
|
||||
//
|
||||
// service_context must not be NULL and policy for service_context->region must
|
||||
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
|
||||
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
|
||||
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
|
||||
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
|
||||
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context->prefetch_hint should be set before calling this function.
|
||||
//
|
||||
@ -1311,8 +1311,8 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
|
||||
// to the new residency (which may be remote).
|
||||
//
|
||||
// service_context must not be NULL and policy for service_context->region must
|
||||
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
|
||||
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
|
||||
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
|
||||
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
|
||||
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
// service_context must be initialized by calling uvm_va_block_service_copy()
|
||||
// before calling this function.
|
||||
@ -1499,8 +1499,8 @@ uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu
|
||||
//
|
||||
// service_context and service_context->block_context must not be NULL and
|
||||
// policy for the region must match. See the comments for
|
||||
// uvm_va_block_check_policy_is_valid(). If va_block is a HMM block,
|
||||
// service->block_context->hmm.vma must be valid. See the comments for
|
||||
// uvm_va_block_check_policy_is_valid(). If va_block is a HMM block,
|
||||
// service->block_context->hmm.vma must be valid. See the comments for
|
||||
// uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
|
||||
//
|
||||
// LOCKING: The caller must hold the va_block lock. If
|
||||
@ -1550,7 +1550,8 @@ void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry);
|
||||
// Frees all the remaining free chunks and unpins all the used chunks.
|
||||
void uvm_va_block_retry_deinit(uvm_va_block_retry_t *uvm_va_block_retry, uvm_va_block_t *va_block);
|
||||
|
||||
// Evict all chunks from the block that are subchunks of the passed in root_chunk.
|
||||
// Evict all chunks from the block that are subchunks of the passed in
|
||||
// root_chunk.
|
||||
//
|
||||
// Add all the work tracking the eviction to the tracker.
|
||||
//
|
||||
@ -2139,16 +2140,12 @@ struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_
|
||||
struct page *uvm_va_block_get_cpu_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
|
||||
// Physically map a CPU chunk so it is DMA'able from all registered GPUs.
|
||||
// nid cannot be NUMA_NO_NODE.
|
||||
// Locking: The va_block lock must be held.
|
||||
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk,
|
||||
uvm_page_index_t page_index);
|
||||
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);
|
||||
|
||||
// Physically unmap a CPU chunk from all registered GPUs.
|
||||
// Locking: The va_block lock must be held.
|
||||
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
|
||||
uvm_cpu_chunk_t *chunk);
|
||||
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);
|
||||
|
||||
// Remove any CPU chunks in the given region.
|
||||
// Locking: The va_block lock must be held.
|
||||
@ -2163,8 +2160,7 @@ NvU64 uvm_va_block_get_physical_size(uvm_va_block_t *block,
|
||||
uvm_page_index_t page_index);
|
||||
|
||||
// Get CPU page size or 0 if it is not mapped
|
||||
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
|
||||
uvm_page_index_t page_index);
|
||||
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index);
|
||||
|
||||
// Get GPU page size or 0 if it is not mapped on the given GPU
|
||||
NvU64 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
|
||||
@ -2262,8 +2258,8 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block,
|
||||
// otherwise it will be initialized and deinitialized by the macro.
|
||||
//
|
||||
// The macro also locks and unlocks the block's lock internally as it's expected
|
||||
// that the block's lock has been unlocked and relocked whenever the function call
|
||||
// returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the
|
||||
// that the block's lock has been unlocked and relocked whenever the function
|
||||
// call returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the
|
||||
// block's state is not locked across these calls.
|
||||
#define UVM_VA_BLOCK_LOCK_RETRY(va_block, block_retry, call) ({ \
|
||||
NV_STATUS __status; \
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2023 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -235,6 +235,10 @@ typedef struct
|
||||
|
||||
// Event that triggered the call
|
||||
uvm_make_resident_cause_t cause;
|
||||
|
||||
// Access counters notification buffer index. Only valid when cause is
|
||||
// UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER.
|
||||
NvU32 access_counters_buffer_index;
|
||||
} make_resident;
|
||||
|
||||
// State used by the mapping APIs (unmap, map, revoke). This could be used
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -558,7 +558,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
|
||||
|
||||
if (gpu->parent->access_counters_supported)
|
||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||
uvm_gpu_access_counters_disable(gpu, va_space);
|
||||
|
||||
}
|
||||
|
||||
@ -576,7 +576,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
|
||||
|
||||
uvm_deferred_free_object_list(&deferred_free_list);
|
||||
|
||||
// Normally we'd expect this to happen as part of uvm_mm_release()
|
||||
// Normally we'd expect this to happen as part of uvm_release_mm()
|
||||
// but if userspace never initialized uvm_mm_fd that won't happen.
|
||||
// We don't have to take the va_space_mm spinlock and update state
|
||||
// here because we know no other thread can be in or subsequently
|
||||
@ -760,7 +760,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
|
||||
bool gpu_can_access_sysmem = true;
|
||||
uvm_processor_mask_t *peers_to_release = NULL;
|
||||
|
||||
status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &gpu);
|
||||
status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &va_space->test.parent_gpu_error, &gpu);
|
||||
if (status != NV_OK)
|
||||
return status;
|
||||
|
||||
@ -936,7 +936,7 @@ done:
|
||||
// registered GPU: the enablement step would have failed before even
|
||||
// discovering that the GPU is already registered.
|
||||
if (uvm_parent_gpu_access_counters_required(gpu->parent))
|
||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||
uvm_gpu_access_counters_disable(gpu, va_space);
|
||||
|
||||
uvm_gpu_release(gpu);
|
||||
}
|
||||
@ -1011,7 +1011,7 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
|
||||
// acquires the VA space lock after the unregistration does. Both outcomes
|
||||
// result on valid states.
|
||||
if (disable_access_counters)
|
||||
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
|
||||
uvm_gpu_access_counters_disable(gpu, va_space);
|
||||
|
||||
// mmap_lock is needed to establish CPU mappings to any pages evicted from
|
||||
// the GPU if accessed by CPU is set for them.
|
||||
@ -2207,6 +2207,17 @@ NV_STATUS uvm_test_va_space_inject_error(UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS *
|
||||
atomic_set(&va_space->test.migrate_vma_allocation_fail_nth, params->migrate_vma_allocation_fail_nth);
|
||||
atomic_set(&va_space->test.va_block_allocation_fail_nth, params->va_block_allocation_fail_nth);
|
||||
|
||||
va_space->test.parent_gpu_error.access_counters_alloc_buffer = params->gpu_access_counters_alloc_buffer;
|
||||
va_space->test.parent_gpu_error.access_counters_alloc_block_context =
|
||||
params->gpu_access_counters_alloc_block_context;
|
||||
va_space->test.parent_gpu_error.access_counters_batch_context_notifications =
|
||||
params->access_counters_batch_context_notifications;
|
||||
va_space->test.parent_gpu_error.access_counters_batch_context_notification_cache =
|
||||
params->access_counters_batch_context_notification_cache;
|
||||
va_space->test.parent_gpu_error.isr_access_counters_alloc = params->gpu_isr_access_counters_alloc;
|
||||
va_space->test.parent_gpu_error.isr_access_counters_alloc_stats_cpu =
|
||||
params->gpu_isr_access_counters_alloc_stats_cpu;
|
||||
|
||||
return NV_OK;
|
||||
}
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2015-2024 NVIDIA Corporation
|
||||
Copyright (c) 2015-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -424,6 +424,8 @@ struct uvm_va_space_struct
|
||||
bool force_cpu_to_cpu_copy_with_ce;
|
||||
|
||||
bool allow_allocation_from_movable;
|
||||
|
||||
uvm_test_parent_gpu_inject_error_t parent_gpu_error;
|
||||
} test;
|
||||
|
||||
// Queue item for deferred f_ops->release() handling
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -37,10 +37,10 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->utlb_per_gpc_count = uvm_volta_get_utlbs_per_gpc(parent_gpu);
|
||||
|
||||
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
|
||||
{
|
||||
uvm_fault_buffer_entry_t *dummy;
|
||||
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
|
||||
}
|
||||
|
||||
// A single top level PDE on Volta covers 128 TB and that's the minimum
|
||||
@ -82,9 +82,9 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
parent_gpu->non_replayable_faults_supported = true;
|
||||
|
||||
parent_gpu->access_counters_supported = true;
|
||||
|
||||
parent_gpu->access_counters_can_use_physical_addresses = true;
|
||||
// Although access counters are supported in HW, it only notifies memory
|
||||
// accesses using physical addresses, which is not supported in SW.
|
||||
parent_gpu->access_counters_supported = false;
|
||||
|
||||
parent_gpu->fault_cancel_va_supported = true;
|
||||
|
||||
|
@ -1,228 +0,0 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
deal in the Software without restriction, including without limitation the
|
||||
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
|
||||
sell copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be
|
||||
included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
||||
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
||||
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
|
||||
DEALINGS IN THE SOFTWARE.
|
||||
|
||||
*******************************************************************************/
|
||||
|
||||
#include "uvm_linux.h"
|
||||
#include "uvm_global.h"
|
||||
#include "uvm_gpu.h"
|
||||
#include "uvm_hal.h"
|
||||
#include "clc365.h"
|
||||
#include "uvm_volta_fault_buffer.h"
|
||||
|
||||
typedef struct {
|
||||
NvU8 bufferEntry[NVC365_NOTIFY_BUF_SIZE];
|
||||
} access_counter_buffer_entry_c365_t;
|
||||
|
||||
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
volatile NvU32 *reg;
|
||||
NvU32 mask;
|
||||
|
||||
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnSet;
|
||||
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*reg, mask);
|
||||
}
|
||||
|
||||
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
volatile NvU32 *reg;
|
||||
NvU32 mask;
|
||||
|
||||
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnClear;
|
||||
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
|
||||
|
||||
UVM_GPU_WRITE_ONCE(*reg, mask);
|
||||
}
|
||||
|
||||
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get)
|
||||
{
|
||||
// No-op, this function is only used by pulse-based interrupt GPUs.
|
||||
}
|
||||
|
||||
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
return NVC365_NOTIFY_BUF_SIZE;
|
||||
}
|
||||
|
||||
static uvm_aperture_t get_access_counter_inst_aperture(NvU32 *access_counter_entry)
|
||||
{
|
||||
NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_APERTURE);
|
||||
|
||||
switch (hw_aperture_value) {
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
|
||||
return UVM_APERTURE_VID;
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
|
||||
return UVM_APERTURE_SYS;
|
||||
}
|
||||
|
||||
UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value);
|
||||
return UVM_APERTURE_MAX;
|
||||
}
|
||||
|
||||
static uvm_aperture_t get_access_counter_aperture(NvU32 *access_counter_entry)
|
||||
{
|
||||
NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, APERTURE);
|
||||
NvU32 peer_id = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, PEER_ID);
|
||||
|
||||
switch (hw_aperture_value) {
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
|
||||
return UVM_APERTURE_VID;
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_PEER_MEM:
|
||||
return UVM_APERTURE_PEER(peer_id);
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
|
||||
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
|
||||
return UVM_APERTURE_SYS;
|
||||
}
|
||||
|
||||
UVM_ASSERT_MSG(false, "Invalid aperture value: %d\n", hw_aperture_value);
|
||||
return UVM_APERTURE_MAX;
|
||||
}
|
||||
|
||||
static uvm_gpu_address_t get_address(uvm_parent_gpu_t *parent_gpu, NvU32 *access_counter_entry)
|
||||
{
|
||||
NvU64 address;
|
||||
bool is_virtual;
|
||||
NvU64 addr_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_HI);
|
||||
NvU64 addr_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_LO);
|
||||
NvU32 addr_type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_TYPE);
|
||||
|
||||
address = addr_lo + (addr_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, ADDR_LO));
|
||||
is_virtual = (addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA);
|
||||
|
||||
if (is_virtual) {
|
||||
address = uvm_parent_gpu_canonical_address(parent_gpu, address);
|
||||
return uvm_gpu_address_virtual(address);
|
||||
}
|
||||
else {
|
||||
uvm_aperture_t aperture = get_access_counter_aperture(access_counter_entry);
|
||||
|
||||
UVM_ASSERT(parent_gpu->access_counters_can_use_physical_addresses);
|
||||
UVM_ASSERT_MSG(addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GPA,
|
||||
"Invalid address type%u\n", addr_type_value);
|
||||
|
||||
return uvm_gpu_address_physical(aperture, address);
|
||||
}
|
||||
}
|
||||
|
||||
static uvm_access_counter_type_t get_access_counter_type(NvU32 *access_counter_entry)
|
||||
{
|
||||
NvU32 type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, TYPE);
|
||||
if (type_value == NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU)
|
||||
return UVM_ACCESS_COUNTER_TYPE_MOMC;
|
||||
else
|
||||
return UVM_ACCESS_COUNTER_TYPE_MIMC;
|
||||
}
|
||||
|
||||
static NvU32 *get_access_counter_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
access_counter_buffer_entry_c365_t *buffer_start;
|
||||
NvU32 *access_counter_entry;
|
||||
|
||||
UVM_ASSERT(index < parent_gpu->access_counter_buffer_info.max_notifications);
|
||||
|
||||
buffer_start = (access_counter_buffer_entry_c365_t *)parent_gpu->access_counter_buffer_info.rm_info.bufferAddress;
|
||||
access_counter_entry = (NvU32 *)&buffer_start[index];
|
||||
|
||||
return access_counter_entry;
|
||||
}
|
||||
|
||||
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
NvU32 *access_counter_entry;
|
||||
bool is_valid;
|
||||
|
||||
access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
|
||||
|
||||
is_valid = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID);
|
||||
|
||||
return is_valid;
|
||||
}
|
||||
|
||||
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
{
|
||||
NvU32 *access_counter_entry;
|
||||
|
||||
access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
|
||||
|
||||
WRITE_HWCONST_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID, FALSE);
|
||||
}
|
||||
|
||||
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
|
||||
NvU32 index,
|
||||
uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
NvU32 *access_counter_entry;
|
||||
|
||||
// Valid bit must be set before this function is called
|
||||
UVM_ASSERT(uvm_hal_volta_access_counter_buffer_entry_is_valid(parent_gpu, index));
|
||||
|
||||
access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
|
||||
|
||||
buffer_entry->counter_type = get_access_counter_type(access_counter_entry);
|
||||
|
||||
buffer_entry->address = get_address(parent_gpu, access_counter_entry);
|
||||
|
||||
if (buffer_entry->address.is_virtual) {
|
||||
NvU64 inst_hi, inst_lo;
|
||||
|
||||
inst_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_HI);
|
||||
inst_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_LO);
|
||||
buffer_entry->virtual_info.instance_ptr.address =
|
||||
inst_lo + (inst_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, INST_LO));
|
||||
|
||||
// HW value contains the 4K page number. Shift to build the full address
|
||||
buffer_entry->virtual_info.instance_ptr.address <<= 12;
|
||||
|
||||
buffer_entry->virtual_info.instance_ptr.aperture = get_access_counter_inst_aperture(access_counter_entry);
|
||||
|
||||
buffer_entry->virtual_info.mmu_engine_id =
|
||||
READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, MMU_ENGINE_ID);
|
||||
|
||||
buffer_entry->virtual_info.mmu_engine_type = UVM_MMU_ENGINE_TYPE_GRAPHICS;
|
||||
|
||||
// MMU engine id aligns with the fault buffer packets. Therefore, we
|
||||
// reuse the helper to compute the VE ID from the fault buffer class.
|
||||
buffer_entry->virtual_info.ve_id =
|
||||
parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->virtual_info.mmu_engine_id,
|
||||
buffer_entry->virtual_info.mmu_engine_type);
|
||||
}
|
||||
else if (buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC) {
|
||||
// Ignore any set bit beyond 47 since it is the maximum physical address
|
||||
// supported by the GPU. See the definition of
|
||||
// uvm_gpu_t::dma_addressable_start for why higher bits might be set.
|
||||
const NvU64 mask_46_0 = (0x1UL << 47) - 1;
|
||||
buffer_entry->address.address &= mask_46_0;
|
||||
}
|
||||
|
||||
buffer_entry->counter_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, COUNTER_VAL);
|
||||
|
||||
buffer_entry->sub_granularity = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, SUB_GRANULARITY);
|
||||
|
||||
buffer_entry->bank = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, BANK);
|
||||
|
||||
buffer_entry->tag = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, NOTIFY_TAG);
|
||||
|
||||
// Automatically clear valid bit for the entry in the access counter buffer
|
||||
uvm_hal_volta_access_counter_buffer_entry_clear_valid(parent_gpu, index);
|
||||
}
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
Copyright (c) 2016-2025 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -38,7 +38,7 @@ typedef struct {
|
||||
|
||||
NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferPut);
|
||||
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferPut);
|
||||
NvU32 index = READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, PTR);
|
||||
UVM_ASSERT(READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, GETPTR_CORRUPTED) ==
|
||||
NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED_NO);
|
||||
@ -48,8 +48,8 @@ NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
|
||||
|
||||
NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu)
|
||||
{
|
||||
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet);
|
||||
UVM_ASSERT(get < parent_gpu->fault_buffer_info.replayable.max_faults);
|
||||
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet);
|
||||
UVM_ASSERT(get < parent_gpu->fault_buffer.replayable.max_faults);
|
||||
|
||||
return READ_HWVALUE(get, _PFB_PRI_MMU, FAULT_BUFFER_GET, PTR);
|
||||
}
|
||||
@ -58,7 +58,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
|
||||
{
|
||||
NvU32 get = HWVALUE(_PFB_PRI_MMU, FAULT_BUFFER_GET, PTR, index);
|
||||
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
|
||||
|
||||
// If HW has detected an overflow condition (PUT == GET - 1 and a fault has
|
||||
// arrived, which is dropped due to no more space in the fault buffer), it
|
||||
@ -70,7 +70,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
|
||||
// resulting in the overflow condition being instantly reasserted. However,
|
||||
// if the index is updated first and then the OVERFLOW bit is cleared such
|
||||
// a collision will not cause a reassertion of the overflow condition.
|
||||
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get);
|
||||
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, get);
|
||||
|
||||
// Clearing GETPTR_CORRUPTED and OVERFLOW is not needed when GSP-RM owns
|
||||
// the HW replayable fault buffer, because UVM does not write to the actual
|
||||
@ -82,7 +82,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
|
||||
// Clear the GETPTR_CORRUPTED and OVERFLOW bits.
|
||||
get |= HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, GETPTR_CORRUPTED, CLEAR) |
|
||||
HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, OVERFLOW, CLEAR);
|
||||
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get);
|
||||
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, get);
|
||||
}
|
||||
|
||||
// TODO: Bug 1835884: [uvm] Query the maximum number of subcontexts from RM
|
||||
@ -234,9 +234,9 @@ static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
|
||||
fault_buffer_entry_c369_t *buffer_start;
|
||||
NvU32 *fault_entry;
|
||||
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
|
||||
|
||||
buffer_start = (fault_buffer_entry_c369_t *)parent_gpu->fault_buffer_info.rm_info.replayable.bufferAddress;
|
||||
buffer_start = (fault_buffer_entry_c369_t *)parent_gpu->fault_buffer.rm_info.replayable.bufferAddress;
|
||||
fault_entry = (NvU32 *)&buffer_start[index];
|
||||
|
||||
return fault_entry;
|
||||
@ -247,10 +247,10 @@ static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t
|
||||
{
|
||||
UvmFaultMetadataPacket *fault_entry_metadata;
|
||||
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
|
||||
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
|
||||
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
|
||||
|
||||
fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata;
|
||||
fault_entry_metadata = parent_gpu->fault_buffer.rm_info.replayable.bufferMetadata;
|
||||
UVM_ASSERT(fault_entry_metadata != NULL);
|
||||
|
||||
return fault_entry_metadata + index;
|
||||
@ -359,7 +359,7 @@ static void parse_fault_entry_common(uvm_parent_gpu_t *parent_gpu,
|
||||
UVM_ASSERT(gpc_utlb_id < parent_gpu->utlb_per_gpc_count);
|
||||
|
||||
utlb_id = buffer_entry->fault_source.gpc_id * parent_gpu->utlb_per_gpc_count + gpc_utlb_id;
|
||||
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer_info.replayable.utlb_count);
|
||||
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer.replayable.utlb_count);
|
||||
|
||||
buffer_entry->fault_source.utlb_id = utlb_id;
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*******************************************************************************
|
||||
Copyright (c) 2016-2021 NVIDIA Corporation
|
||||
Copyright (c) 2016-2024 NVIDIA Corporation
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to
|
||||
@ -136,64 +136,6 @@ void uvm_hal_volta_host_clear_faulted_channel_method(uvm_push_t *push,
|
||||
clear_type_value);
|
||||
}
|
||||
|
||||
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push)
|
||||
{
|
||||
NV_PUSH_4U(C36F, MEM_OP_A, 0,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, 0,
|
||||
MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
|
||||
HWCONST(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, ALL));
|
||||
}
|
||||
|
||||
static NvU32 get_access_counter_type_value(uvm_access_counter_type_t type)
|
||||
{
|
||||
if (type == UVM_ACCESS_COUNTER_TYPE_MIMC)
|
||||
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC;
|
||||
else if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC;
|
||||
else
|
||||
UVM_ASSERT_MSG(false, "Invalid access counter type %u\n", type);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static NvU32 get_access_counter_targeted_type_value(uvm_access_counter_type_t type)
|
||||
{
|
||||
if (type == UVM_ACCESS_COUNTER_TYPE_MIMC)
|
||||
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC;
|
||||
else if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
|
||||
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC;
|
||||
else
|
||||
UVM_ASSERT_MSG(false, "Invalid access counter type %u\n", type);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type)
|
||||
{
|
||||
NvU32 type_value = get_access_counter_type_value(type);
|
||||
|
||||
NV_PUSH_4U(C36F, MEM_OP_A, 0,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, 0,
|
||||
MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
|
||||
HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, type_value));
|
||||
}
|
||||
|
||||
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
|
||||
const uvm_access_counter_buffer_entry_t *buffer_entry)
|
||||
{
|
||||
NvU32 targeted_type_value = get_access_counter_targeted_type_value(buffer_entry->counter_type);
|
||||
|
||||
NV_PUSH_4U(C36F, MEM_OP_A, 0,
|
||||
MEM_OP_B, 0,
|
||||
MEM_OP_C, HWVALUE(C36F, MEM_OP_C, ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG, buffer_entry->tag),
|
||||
MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
|
||||
HWCONST(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, TARGETED) |
|
||||
HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, targeted_type_value) |
|
||||
HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank));
|
||||
}
|
||||
|
||||
void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
|
||||
uvm_gpu_phys_address_t pdb,
|
||||
NvU32 depth,
|
||||
|
@ -143,6 +143,11 @@ nvidia_vma_access(
|
||||
return -EINVAL;
|
||||
}
|
||||
|
||||
if (write && !(mmap_context->prot & NV_PROTECT_WRITEABLE))
|
||||
{
|
||||
return -EACCES;
|
||||
}
|
||||
|
||||
if (nv->flags & NV_FLAG_CONTROL)
|
||||
{
|
||||
at = NV_VMA_PRIVATE(vma);
|
||||
|
@ -217,7 +217,7 @@ NV_STATUS nvGpuOpsOwnAccessCntrIntr(struct gpuSession *session,
|
||||
|
||||
NV_STATUS nvGpuOpsEnableAccessCntr(struct gpuDevice *device,
|
||||
gpuAccessCntrInfo *pAccessCntrInfo,
|
||||
gpuAccessCntrConfig *pAccessCntrConfig);
|
||||
const gpuAccessCntrConfig *pAccessCntrConfig);
|
||||
|
||||
NV_STATUS nvGpuOpsDisableAccessCntr(struct gpuDevice *device, gpuAccessCntrInfo *pAccessCntrInfo);
|
||||
|
||||
|
@ -931,7 +931,7 @@ EXPORT_SYMBOL(nvUvmInterfaceInitAccessCntrInfo);
|
||||
|
||||
NV_STATUS nvUvmInterfaceEnableAccessCntr(uvmGpuDeviceHandle device,
|
||||
UvmGpuAccessCntrInfo *pAccessCntrInfo,
|
||||
UvmGpuAccessCntrConfig *pAccessCntrConfig)
|
||||
const UvmGpuAccessCntrConfig *pAccessCntrConfig)
|
||||
{
|
||||
nvidia_stack_t *sp = NULL;
|
||||
NV_STATUS status;
|
||||
|
@ -159,6 +159,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_attr_guest_sev_snp
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += hv_get_isolation_type
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += seq_read_iter
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += follow_pfn
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ptep_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked
|
||||
NV_CONFTEST_FUNCTION_COMPILE_TESTS += add_memory_driver_managed
|
||||
@ -229,6 +230,8 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_memory_block_size_b
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto_akcipher_verify
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_follow_pte
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += follow_pte_arg_vma
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_follow_pfnmap_start
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_pci_ats_supported
|
||||
NV_CONFTEST_SYMBOL_COMPILE_TESTS += ecc_digits_from_bytes
|
||||
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -32,14 +32,27 @@
|
||||
#define NV_NUM_PIN_PAGES_PER_ITERATION 0x80000
|
||||
#endif
|
||||
|
||||
static inline int nv_follow_pfn(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned long *pfn)
|
||||
static inline int nv_follow_flavors(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned long *pfn)
|
||||
{
|
||||
#if defined(NV_FOLLOW_PFN_PRESENT)
|
||||
return follow_pfn(vma, address, pfn);
|
||||
#else
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
|
||||
#if NV_IS_EXPORT_SYMBOL_PRESENT_follow_pfnmap_start
|
||||
struct follow_pfnmap_args args = {};
|
||||
int rc;
|
||||
|
||||
args.address = address;
|
||||
args.vma = vma;
|
||||
|
||||
rc = follow_pfnmap_start(&args);
|
||||
if (rc)
|
||||
return rc;
|
||||
|
||||
*pfn = args.pfn;
|
||||
|
||||
follow_pfnmap_end(&args);
|
||||
|
||||
return 0;
|
||||
#elif NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
|
||||
int status = 0;
|
||||
spinlock_t *ptl;
|
||||
pte_t *ptep;
|
||||
@ -47,17 +60,40 @@ static inline int nv_follow_pfn(struct vm_area_struct *vma,
|
||||
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
|
||||
return status;
|
||||
|
||||
//
|
||||
// The first argument of follow_pte() was changed from
|
||||
// mm_struct to vm_area_struct in kernel 6.10.
|
||||
//
|
||||
#if defined(NV_FOLLOW_PTE_ARG1_VMA)
|
||||
status = follow_pte(vma, address, &ptep, &ptl);
|
||||
#else
|
||||
status = follow_pte(vma->vm_mm, address, &ptep, &ptl);
|
||||
#endif
|
||||
if (status)
|
||||
return status;
|
||||
|
||||
#if defined(NV_PTEP_GET_PRESENT)
|
||||
*pfn = pte_pfn(ptep_get(ptep));
|
||||
#else
|
||||
*pfn = pte_pfn(READ_ONCE(*ptep));
|
||||
#endif
|
||||
|
||||
// The lock is acquired inside follow_pte()
|
||||
pte_unmap_unlock(ptep, ptl);
|
||||
return 0;
|
||||
#else // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
|
||||
#else
|
||||
return -1;
|
||||
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
|
||||
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pfnmap_start
|
||||
}
|
||||
|
||||
static inline int nv_follow_pfn(struct vm_area_struct *vma,
|
||||
unsigned long address,
|
||||
unsigned long *pfn)
|
||||
{
|
||||
#if defined(NV_FOLLOW_PFN_PRESENT)
|
||||
return follow_pfn(vma, address, pfn);
|
||||
#else
|
||||
return nv_follow_flavors(vma, address, pfn);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
@ -315,6 +315,12 @@ namespace DisplayPort
|
||||
//
|
||||
bool bNoFallbackInPostLQA;
|
||||
|
||||
//
|
||||
// Set to true when we do not want DSC to be limited
|
||||
// to 16 BPP for multitile on Blackwell++
|
||||
//
|
||||
bool bDisableDscMaxBppLimit;
|
||||
|
||||
bool bReportDeviceLostBeforeNew;
|
||||
bool bDisableSSC;
|
||||
bool bEnableFastLT;
|
||||
@ -335,6 +341,8 @@ namespace DisplayPort
|
||||
|
||||
bool bForceHeadShutdownPerMonitor;
|
||||
|
||||
bool bEnableLowerBppCheckForDsc;
|
||||
|
||||
//
|
||||
// Dual SST Partner connector object pointer
|
||||
ConnectorImpl *pCoupledConnector;
|
||||
|
@ -168,6 +168,7 @@ namespace DisplayPort
|
||||
bool bIgnoreDscCap; // Ignore DSC even if sink reports DSC capability
|
||||
bool bDisableDownspread;
|
||||
bool bForceHeadShutdown;
|
||||
bool bDisableDscMaxBppLimit;
|
||||
bool bSkipCableIdCheck;
|
||||
bool bAllocateManualTimeslots;
|
||||
}_WARFlags;
|
||||
|
@ -101,7 +101,10 @@
|
||||
// Bug 5088957 : Force head shutdown in DpLib
|
||||
#define NV_DP_REGKEY_FORCE_HEAD_SHUTDOWN "DP_WAR_5088957"
|
||||
|
||||
//
|
||||
// Bug 5041041 : Enable Lower BPP check for DSC
|
||||
#define NV_DP_REGKEY_ENABLE_LOWER_BPP_CHECK_FOR_DSC "DP_ENABLE_LOWER_BPP_CHECK"
|
||||
|
||||
|
||||
// Data Base used to store all the regkey values.
|
||||
// The actual data base is declared statically in dp_evoadapter.cpp.
|
||||
// All entries set to 0 before initialized by the first EvoMainLink constructor.
|
||||
@ -141,6 +144,7 @@ struct DP_REGKEY_DATABASE
|
||||
bool bSkipZeroOuiCache;
|
||||
bool bDisable5019537Fix;
|
||||
bool bForceHeadShutdown;
|
||||
bool bEnableLowerBppCheckForDsc;
|
||||
};
|
||||
|
||||
extern struct DP_REGKEY_DATABASE dpRegkeyDatabase;
|
||||
|
@ -158,7 +158,8 @@ void DPCDHALImpl2x::parseAndReadCaps()
|
||||
_ANSI_128B_132B, _YES,
|
||||
buffer[0]);
|
||||
|
||||
if (caps2x.bDP2xChannelCodingSupported == true)
|
||||
// Read this unconditionally when the connection is tunneled
|
||||
if (caps2x.bDP2xChannelCodingSupported == true || caps.dpInTunnelingCaps.bIsSupported)
|
||||
{
|
||||
// 0x2215
|
||||
if (AuxRetry::ack == bus.read(NV_DPCD20_128B_132B_SUPPORTED_LINK_RATES, &buffer[0], 1))
|
||||
@ -264,7 +265,7 @@ void DPCDHALImpl2x::parseAndReadCaps()
|
||||
if (caps2x.dpInTunnelingCaps.bDP2xChannelCodingSupported)
|
||||
{
|
||||
if (AuxRetry::ack ==
|
||||
bus.read(NV_DPCD20_DP_TUNNELING_MAIN_LINK_CHANNEL_CODING, &byte, sizeof byte))
|
||||
bus.read(NV_DPCD20_DP_TUNNELING_128B132B_LINK_RATES, &byte, sizeof byte))
|
||||
{
|
||||
caps2x.dpInTunnelingCaps.bUHBR_10GSupported =
|
||||
FLD_TEST_DRF(_DPCD20, _DP_TUNNELING_128B132B_LINK_RATES, _10_0_GPBS_SUPPORTED, _YES, byte);
|
||||
@ -342,12 +343,18 @@ AuxRetry::status DPCDHALImpl2x::notifySDPErrDetectionCapability()
|
||||
bool DPCDHALImpl2x::isDp2xChannelCodingCapable()
|
||||
{
|
||||
// return false if the device does not support 128b/132b.
|
||||
if (!caps2x.bDP2xChannelCodingSupported)
|
||||
return false;
|
||||
|
||||
// return false if DP-IN Tunneling is supported but not support 128b/132b.
|
||||
if (caps.dpInTunnelingCaps.bIsSupported && !caps2x.dpInTunnelingCaps.bDP2xChannelCodingSupported)
|
||||
return false;
|
||||
// However when dpTunneling is enabled, read the tunneling cap instead
|
||||
if (caps.dpInTunnelingCaps.bIsSupported)
|
||||
{
|
||||
// return false if DP-IN Tunneling is supported but not support 128b/132b.
|
||||
if (!caps2x.dpInTunnelingCaps.bDP2xChannelCodingSupported)
|
||||
return false;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (!caps2x.bDP2xChannelCodingSupported)
|
||||
return false;
|
||||
}
|
||||
|
||||
// return true if there is no LTTPR.
|
||||
if (!bLttprSupported || (caps.phyRepeaterCount == 0))
|
||||
@ -410,6 +417,14 @@ NvU32 DPCDHALImpl2x::getUHBRSupported()
|
||||
bool bUHBR_13_5GSupported = caps2x.bUHBR_13_5GSupported;
|
||||
bool bUHBR_20GSupported = caps2x.bUHBR_20GSupported;
|
||||
|
||||
// When tunneling is supported and bw allocation is enabled, override the caps from tunneling caps
|
||||
if (caps.dpInTunnelingCaps.bIsSupported && bIsDpTunnelBwAllocationEnabled)
|
||||
{
|
||||
bUHBR_10GSupported = caps2x.dpInTunnelingCaps.bUHBR_10GSupported;
|
||||
bUHBR_13_5GSupported = caps2x.dpInTunnelingCaps.bUHBR_13_5GSupported;
|
||||
bUHBR_20GSupported = caps2x.dpInTunnelingCaps.bUHBR_20GSupported;
|
||||
}
|
||||
|
||||
if (!bIgnoreCableIdCaps)
|
||||
{
|
||||
bUHBR_10GSupported = bUHBR_10GSupported && caps2x.cableCaps.bUHBR_10GSupported;
|
||||
@ -424,13 +439,6 @@ NvU32 DPCDHALImpl2x::getUHBRSupported()
|
||||
bUHBR_20GSupported = bUHBR_20GSupported && caps2x.repeaterCaps.bUHBR_20GSupported;
|
||||
}
|
||||
|
||||
if (caps.dpInTunnelingCaps.bIsSupported && bIsDpTunnelBwAllocationEnabled)
|
||||
{
|
||||
bUHBR_10GSupported = bUHBR_10GSupported && caps2x.dpInTunnelingCaps.bUHBR_10GSupported;
|
||||
bUHBR_13_5GSupported = bUHBR_13_5GSupported && caps2x.dpInTunnelingCaps.bUHBR_13_5GSupported;
|
||||
bUHBR_20GSupported = bUHBR_20GSupported && caps2x.dpInTunnelingCaps.bUHBR_20GSupported;
|
||||
}
|
||||
|
||||
if (bUHBR_10GSupported)
|
||||
{
|
||||
uhbrCaps = FLD_SET_DRF(0073_CTRL_CMD_DP, _GET_CAPS_UHBR_SUPPORTED, _UHBR10_0, _YES, uhbrCaps);
|
||||
|
@ -185,6 +185,7 @@ void ConnectorImpl::applyRegkeyOverrides(const DP_REGKEY_DATABASE& dpRegkeyDatab
|
||||
this->bSkipZeroOuiCache = dpRegkeyDatabase.bSkipZeroOuiCache;
|
||||
this->bDisable5019537Fix = dpRegkeyDatabase.bDisable5019537Fix;
|
||||
this->bForceHeadShutdownFromRegkey = dpRegkeyDatabase.bForceHeadShutdown;
|
||||
this->bEnableLowerBppCheckForDsc = dpRegkeyDatabase.bEnableLowerBppCheckForDsc;
|
||||
}
|
||||
|
||||
void ConnectorImpl::setPolicyModesetOrderMitigation(bool enabled)
|
||||
@ -1367,12 +1368,38 @@ bool ConnectorImpl::compoundQueryAttachMST(Group * target,
|
||||
|
||||
if (compoundQueryAttachMSTIsDscPossible(target, modesetParams, pDscParams))
|
||||
{
|
||||
unsigned int forceDscBitsPerPixelX16 = pDscParams->bitsPerPixelX16;
|
||||
result = compoundQueryAttachMSTDsc(target, modesetParams, &localInfo,
|
||||
pDscParams, pErrorCode);
|
||||
if (!result)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
compoundQueryResult = compoundQueryAttachMSTGeneric(target, modesetParams, &localInfo,
|
||||
pDscParams, pErrorCode);
|
||||
//
|
||||
// compoundQueryAttachMST Generic might fail due to the insufficient bandwidth ,
|
||||
// We only check whether bpp can be fit in the available bandwidth based on the tranied link config in compoundQueryAttachMSTDsc function.
|
||||
// There might be cases where the default 10 bpp might fit in the available bandwidth based on the trained link config,
|
||||
// however, the bandwidth might be insufficient at the actual bottleneck link between source and sink to drive the mode, causing CompoundQueryAttachMSTGeneric to fail.
|
||||
// Incase of CompoundQueryAttachMSTGeneric failure, instead of returning false, check whether the mode can be supported with the max dsc compression bpp
|
||||
// and return true if it can be supported.
|
||||
|
||||
if (!compoundQueryResult && forceDscBitsPerPixelX16 == 0U && this->bEnableLowerBppCheckForDsc)
|
||||
{
|
||||
pDscParams->bitsPerPixelX16 = MAX_DSC_COMPRESSION_BPPX16;
|
||||
result = compoundQueryAttachMSTDsc(target, modesetParams, &localInfo,
|
||||
pDscParams, pErrorCode);
|
||||
if (!result)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
return compoundQueryAttachMSTGeneric(target, modesetParams, &localInfo,
|
||||
pDscParams, pErrorCode);
|
||||
}
|
||||
return compoundQueryResult;
|
||||
}
|
||||
|
||||
return compoundQueryAttachMSTGeneric(target, modesetParams, &localInfo,
|
||||
@ -1564,6 +1591,7 @@ bool ConnectorImpl::compoundQueryAttachMSTDsc(Group * target,
|
||||
warData.dpData.dpMode = DSC_DP_MST;
|
||||
warData.dpData.hBlank = modesetParams.modesetInfo.rasterWidth - modesetParams.modesetInfo.surfaceWidth;
|
||||
warData.connectorType = DSC_DP;
|
||||
warData.dpData.bDisableDscMaxBppLimit = bDisableDscMaxBppLimit;
|
||||
|
||||
//
|
||||
// Dplib needs to pass sliceCountMask to clients
|
||||
@ -1636,7 +1664,9 @@ bool ConnectorImpl::compoundQueryAttachMSTDsc(Group * target,
|
||||
localInfo->localModesetInfo.bEnableDsc = true;
|
||||
localInfo->localModesetInfo.depth = bitsPerPixelX16;
|
||||
if (modesetParams.colorFormat == dpColorFormat_YCbCr422 &&
|
||||
dev->dscCaps.dscDecoderColorFormatCaps.bYCbCrNative422)
|
||||
dev->dscCaps.dscDecoderColorFormatCaps.bYCbCrNative422 &&
|
||||
(dscInfo.gpuCaps.encoderColorFormatMask & DSC_ENCODER_COLOR_FORMAT_Y_CB_CR_NATIVE_422) &&
|
||||
(dscInfo.sinkCaps.decoderColorFormatMask & DSC_DECODER_COLOR_FORMAT_Y_CB_CR_NATIVE_422))
|
||||
{
|
||||
localInfo->localModesetInfo.colorFormat = dpColorFormat_YCbCr422_Native;
|
||||
}
|
||||
@ -1790,12 +1820,24 @@ bool ConnectorImpl::compoundQueryAttachMSTGeneric(Group * target,
|
||||
if ( tail->bandwidth.compound_query_state.timeslots_used_by_query > tail->bandwidth.compound_query_state.totalTimeSlots)
|
||||
{
|
||||
compoundQueryResult = false;
|
||||
SET_DP_IMP_ERROR(pErrorCode, DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH)
|
||||
if(this->bEnableLowerBppCheckForDsc)
|
||||
{
|
||||
tail->bandwidth.compound_query_state.timeslots_used_by_query -= linkConfig->slotsForPBN(base_pbn);
|
||||
tail->bandwidth.compound_query_state.bandwidthAllocatedForIndex &= ~(1 << compoundQueryCount);
|
||||
}
|
||||
SET_DP_IMP_ERROR(pErrorCode, DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH);
|
||||
}
|
||||
}
|
||||
tail = (DeviceImpl*)tail->getParent();
|
||||
}
|
||||
}
|
||||
|
||||
// If the compoundQueryResult is false, we need to reset the compoundQueryLocalLinkPBN
|
||||
if (!compoundQueryResult && this->bEnableLowerBppCheckForDsc)
|
||||
{
|
||||
compoundQueryLocalLinkPBN -= slots_pbn;
|
||||
}
|
||||
|
||||
return compoundQueryResult;
|
||||
}
|
||||
bool ConnectorImpl::compoundQueryAttachSST(Group * target,
|
||||
@ -1938,6 +1980,8 @@ bool ConnectorImpl::compoundQueryAttachSST(Group * target,
|
||||
warData.dpData.hBlank = modesetParams.modesetInfo.rasterWidth - modesetParams.modesetInfo.surfaceWidth;
|
||||
warData.dpData.dpMode = DSC_DP_SST;
|
||||
warData.connectorType = DSC_DP;
|
||||
warData.dpData.bDisableDscMaxBppLimit = bDisableDscMaxBppLimit;
|
||||
|
||||
if (main->isEDP())
|
||||
{
|
||||
warData.dpData.bIsEdp = true;
|
||||
@ -6067,7 +6111,6 @@ void ConnectorImpl::flushTimeslotsToHardware()
|
||||
|
||||
void ConnectorImpl::beforeDeleteStream(GroupImpl * group, bool forFlushMode)
|
||||
{
|
||||
|
||||
//
|
||||
// During flush entry, if the link is not trained, retrain
|
||||
// the link so that ACT can be ack'd by the sink.
|
||||
@ -6079,11 +6122,18 @@ void ConnectorImpl::beforeDeleteStream(GroupImpl * group, bool forFlushMode)
|
||||
// head is not actively driving pixels and this needs to be handled
|
||||
// differently .
|
||||
//
|
||||
if(forFlushMode && linkUseMultistream())
|
||||
if (forFlushMode && linkUseMultistream())
|
||||
{
|
||||
if(isLinkLost())
|
||||
{
|
||||
train(activeLinkConfig, false);
|
||||
if(!this->bDisable5019537Fix)
|
||||
{
|
||||
train(highestAssessedLC, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
train(activeLinkConfig, false);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -7307,8 +7357,11 @@ void ConnectorImpl::notifyShortPulse()
|
||||
{
|
||||
return;
|
||||
}
|
||||
//save the previous highest assessed LC
|
||||
|
||||
// Save the previous highest assessed LC
|
||||
LinkConfiguration previousAssessedLC = highestAssessedLC;
|
||||
// Save original active link configuration.
|
||||
LinkConfiguration originalActiveLinkConfig = activeLinkConfig;
|
||||
|
||||
if (main->isConnectorUSBTypeC() &&
|
||||
activeLinkConfig.bIs128b132bChannelCoding &&
|
||||
@ -7316,11 +7369,27 @@ void ConnectorImpl::notifyShortPulse()
|
||||
{
|
||||
if (activeLinkConfig.isValid() && enableFlush())
|
||||
{
|
||||
train(activeLinkConfig, true);
|
||||
if (!this->bDisable5019537Fix)
|
||||
{
|
||||
train(originalActiveLinkConfig, true);
|
||||
}
|
||||
else
|
||||
{
|
||||
train(activeLinkConfig, true);
|
||||
}
|
||||
disableFlush();
|
||||
}
|
||||
main->invalidateLinkRatesInFallbackTable(activeLinkConfig.peakRate);
|
||||
hal->overrideCableIdCap(activeLinkConfig.peakRate, false);
|
||||
|
||||
if (!this->bDisable5019537Fix)
|
||||
{
|
||||
main->invalidateLinkRatesInFallbackTable(originalActiveLinkConfig.peakRate);
|
||||
hal->overrideCableIdCap(originalActiveLinkConfig.peakRate, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
main->invalidateLinkRatesInFallbackTable(activeLinkConfig.peakRate);
|
||||
hal->overrideCableIdCap(activeLinkConfig.peakRate, false);
|
||||
}
|
||||
|
||||
highestAssessedLC = getMaxLinkConfig();
|
||||
|
||||
@ -7334,8 +7403,16 @@ void ConnectorImpl::notifyShortPulse()
|
||||
|
||||
if (activeLinkConfig.isValid() && enableFlush())
|
||||
{
|
||||
LinkConfiguration originalActiveLinkConfig = activeLinkConfig;
|
||||
if (!train(activeLinkConfig, false))
|
||||
bool bTrainSuccess = false;
|
||||
if (!this->bDisable5019537Fix)
|
||||
{
|
||||
bTrainSuccess = train(originalActiveLinkConfig, false);
|
||||
}
|
||||
else
|
||||
{
|
||||
bTrainSuccess = train(activeLinkConfig, false);
|
||||
}
|
||||
if (!bTrainSuccess)
|
||||
{
|
||||
//
|
||||
// If original link config could not be restored force
|
||||
@ -8210,6 +8287,7 @@ void ConnectorImpl::configInit()
|
||||
allocatedDpTunnelBwShadow = 0;
|
||||
bDP2XPreferNonDSCForLowPClk = false;
|
||||
bForceHeadShutdownPerMonitor = false;
|
||||
bDisableDscMaxBppLimit = false;
|
||||
}
|
||||
|
||||
bool ConnectorImpl::dpUpdateDscStream(Group *target, NvU32 dscBpp)
|
||||
|
@ -1713,5 +1713,10 @@ void ConnectorImpl2x::handleEdidWARs(Edid & edid, DiscoveryManager::Device & dev
|
||||
bForceHeadShutdownPerMonitor = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (edid.WARFlags.bDisableDscMaxBppLimit)
|
||||
{
|
||||
bDisableDscMaxBppLimit = true;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -104,7 +104,8 @@ const struct
|
||||
{NV_DP_REGKEY_DISABLE_DOWNSPREAD, &dpRegkeyDatabase.bDownspreadDisabled, DP_REG_VAL_BOOL},
|
||||
{NV_DP_REGKEY_SKIP_ZERO_OUI_CACHE, &dpRegkeyDatabase.bSkipZeroOuiCache, DP_REG_VAL_BOOL},
|
||||
{NV_DP_REGKEY_DISABLE_FIX_FOR_5019537, &dpRegkeyDatabase.bDisable5019537Fix, DP_REG_VAL_BOOL},
|
||||
{NV_DP_REGKEY_FORCE_HEAD_SHUTDOWN, &dpRegkeyDatabase.bForceHeadShutdown, DP_REG_VAL_BOOL}
|
||||
{NV_DP_REGKEY_FORCE_HEAD_SHUTDOWN, &dpRegkeyDatabase.bForceHeadShutdown, DP_REG_VAL_BOOL},
|
||||
{NV_DP_REGKEY_ENABLE_LOWER_BPP_CHECK_FOR_DSC, &dpRegkeyDatabase.bEnableLowerBppCheckForDsc, DP_REG_VAL_BOOL}
|
||||
};
|
||||
|
||||
EvoMainLink::EvoMainLink(EvoInterface * provider, Timer * timer) :
|
||||
|
@ -596,6 +596,11 @@ void Edid::applyEdidWorkArounds(NvU32 warFlag, const DpMonitorDenylistData *pDen
|
||||
DP_PRINTF(DP_NOTICE, "DP-WAR> Panel incorrectly exposing DSC capability. Ignoring it.");
|
||||
DP_PRINTF(DP_NOTICE, "DP-WAR> Bug 3543158");
|
||||
}
|
||||
else if (ProductID == 0x5B9A)
|
||||
{
|
||||
this->WARFlags.bDisableDscMaxBppLimit = true;
|
||||
DP_PRINTF(DP_NOTICE, "DP-WAR> Disable DSC max BPP limit of 16 for DSC.");
|
||||
}
|
||||
break;
|
||||
case 0xB306:
|
||||
if (ProductID == 0x3228)
|
||||
|
@ -36,25 +36,25 @@
|
||||
// and then checked back in. You cannot make changes to these sections without
|
||||
// corresponding changes to the buildmeister script
|
||||
#ifndef NV_BUILD_BRANCH
|
||||
#define NV_BUILD_BRANCH r572_46
|
||||
#define NV_BUILD_BRANCH r572_77
|
||||
#endif
|
||||
#ifndef NV_PUBLIC_BRANCH
|
||||
#define NV_PUBLIC_BRANCH r572_46
|
||||
#define NV_PUBLIC_BRANCH r572_77
|
||||
#endif
|
||||
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r572_46-344"
|
||||
#define NV_BUILD_CHANGELIST_NUM (35599303)
|
||||
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r572_77-376"
|
||||
#define NV_BUILD_CHANGELIST_NUM (35688848)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r570/r572_46-344"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35599303)
|
||||
#define NV_BUILD_NAME "rel/gpu_drv/r570/r572_77-376"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35688848)
|
||||
|
||||
#else /* Windows builds */
|
||||
#define NV_BUILD_BRANCH_VERSION "r572_46-7"
|
||||
#define NV_BUILD_CHANGELIST_NUM (35597621)
|
||||
#define NV_BUILD_BRANCH_VERSION "r572_77-2"
|
||||
#define NV_BUILD_CHANGELIST_NUM (35681611)
|
||||
#define NV_BUILD_TYPE "Official"
|
||||
#define NV_BUILD_NAME "572.61"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35597621)
|
||||
#define NV_BUILD_NAME "572.80"
|
||||
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35681611)
|
||||
#define NV_BUILD_BRANCH_BASE_VERSION R570
|
||||
#endif
|
||||
// End buildmeister python edited section
|
||||
|
@ -4,7 +4,7 @@
|
||||
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
|
||||
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
|
||||
|
||||
#define NV_VERSION_STRING "570.124.06"
|
||||
#define NV_VERSION_STRING "570.133.07"
|
||||
|
||||
#else
|
||||
|
||||
|
@ -2347,6 +2347,7 @@ DSC_GeneratePPS
|
||||
in->bits_per_component = pModesetInfo->bitsPerComponent;
|
||||
in->linebuf_depth = MIN((pDscInfo->sinkCaps.lineBufferBitDepth), (pDscInfo->gpuCaps.lineBufferBitDepth));
|
||||
in->block_pred_enable = pDscInfo->sinkCaps.bBlockPrediction;
|
||||
in->multi_tile = (pDscInfo->gpuCaps.maxNumHztSlices > 4U) ? 1 : 0;
|
||||
|
||||
switch (pModesetInfo->colorFormat)
|
||||
{
|
||||
@ -2526,8 +2527,9 @@ DSC_GeneratePPS
|
||||
// because of architectural limitation we can't use bits_per_pixel
|
||||
// more than 16.
|
||||
//
|
||||
if ((pModesetInfo->bDualMode || (pDscInfo->gpuCaps.maxNumHztSlices > 4U)) &&
|
||||
(in->bits_per_pixel > 256 /*bits_per_pixel = 16*/))
|
||||
if ((pModesetInfo->bDualMode ||
|
||||
(in->multi_tile && (!pWARData || (pWARData && !pWARData->dpData.bDisableDscMaxBppLimit))))
|
||||
&& (in->bits_per_pixel > 256 /*bits_per_pixel = 16*/))
|
||||
{
|
||||
ret = NVT_STATUS_INVALID_BPP;
|
||||
goto done;
|
||||
@ -2547,8 +2549,9 @@ DSC_GeneratePPS
|
||||
// because of architectural limitation we can't use bits_per_pixel more
|
||||
// than 16. So forcing it to 16.
|
||||
//
|
||||
if ((pModesetInfo->bDualMode || (pDscInfo->gpuCaps.maxNumHztSlices > 4U)) &&
|
||||
(in->bits_per_pixel > 256 /*bits_per_pixel = 16*/))
|
||||
if ((pModesetInfo->bDualMode ||
|
||||
(in->multi_tile && (!pWARData || (pWARData && !pWARData->dpData.bDisableDscMaxBppLimit))))
|
||||
&& (in->bits_per_pixel > 256 /*bits_per_pixel = 16*/))
|
||||
{
|
||||
// ERROR - DSC Dual Mode, because of architectural limitation we can't use bits_per_pixel more than 16.
|
||||
// ERROR - Forcing it to 16.
|
||||
@ -2590,7 +2593,6 @@ DSC_GeneratePPS
|
||||
in->pixel_clkMHz = (NvU32)(pModesetInfo->pixelClockHz / 1000000L);
|
||||
in->dual_mode = pModesetInfo->bDualMode;
|
||||
in->drop_mode = pModesetInfo->bDropMode;
|
||||
in->multi_tile = (pDscInfo->gpuCaps.maxNumHztSlices > 4U) ? 1 : 0;
|
||||
in->slice_count_mask = pDscInfo->sinkCaps.sliceCountSupportedMask;
|
||||
in->peak_throughput_mode0 = pDscInfo->sinkCaps.peakThroughputMode0;
|
||||
in->peak_throughput_mode1 = pDscInfo->sinkCaps.peakThroughputMode1;
|
||||
|
@ -254,6 +254,7 @@ typedef struct
|
||||
DSC_DP_MODE dpMode;
|
||||
NvU32 hBlank;
|
||||
NvBool bIsEdp;
|
||||
NvBool bDisableDscMaxBppLimit;
|
||||
NvBool bIs128b132bChannelCoding;
|
||||
}dpData;
|
||||
} WAR_DATA;
|
||||
|
@ -8230,6 +8230,12 @@ nvswitch_initialize_interrupt_tree_ls10
|
||||
|
||||
// NVLIPT
|
||||
_nvswitch_initialize_nvlipt_interrupts_ls10(device);
|
||||
|
||||
// Disable non-fatal and legacy interrupts in TNVL mode
|
||||
if (nvswitch_is_tnvl_mode_enabled(device))
|
||||
{
|
||||
nvswitch_tnvl_disable_interrupts(device);
|
||||
}
|
||||
}
|
||||
|
||||
//
|
||||
|
@ -1250,6 +1250,14 @@ nvswitch_tnvl_disable_interrupts_ls10
|
||||
nvswitch_device *device
|
||||
)
|
||||
{
|
||||
|
||||
if (!nvswitch_is_tnvl_mode_enabled(device))
|
||||
{
|
||||
NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_HOST_TNVL_ERROR,
|
||||
"Failed to disable non-fatal/legacy interrupts. TNVL mode is not enabled\n");
|
||||
return;
|
||||
}
|
||||
|
||||
//
|
||||
// In TNVL locked disable non-fatal NVLW, NPG, and legacy interrupt,
|
||||
// disable additional non-fatals on those partitions.
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -3437,6 +3437,18 @@ typedef struct NV2080_CTRL_INTERNAL_FIFO_GET_NUM_SECURE_CHANNELS_PARAMS {
|
||||
NvU32 maxCeSecureChannels;
|
||||
} NV2080_CTRL_INTERNAL_FIFO_GET_NUM_SECURE_CHANNELS_PARAMS;
|
||||
|
||||
/*!
|
||||
* NV2080_CTRL_CMD_INTERNAL_PERF_PFM_REQ_HNDLR_PRH_DEPENDENCY_CHECK
|
||||
*
|
||||
* This command checks if all the dependant modules to PRH have been initialized.
|
||||
*
|
||||
* Possible status values returned are:
|
||||
* NV_OK
|
||||
* NV_ERR_INVALID_STATE
|
||||
* NV_ERR_NOT_SUPPORTED
|
||||
*/
|
||||
#define NV2080_CTRL_CMD_INTERNAL_PERF_PFM_REQ_HNDLR_PRH_DEPENDENCY_CHECK (0x20800a18) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_INTERNAL_INTERFACE_ID << 8) | 0x18" */
|
||||
|
||||
/*
|
||||
* NV2080_CTRL_CMD_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS
|
||||
*
|
||||
@ -3454,7 +3466,7 @@ typedef struct NV2080_CTRL_INTERNAL_FIFO_GET_NUM_SECURE_CHANNELS_PARAMS {
|
||||
* NV_ERR_INVALID_ARGUMENT
|
||||
* NV_ERR_NOT_SUPPORTED
|
||||
*/
|
||||
#define NV2080_CTRL_CMD_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS (0x20800adb) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_INTERNAL_INTERFACE_ID << 8) | NV2080_CTRL_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS_PARAMS_MESSAGE_ID" */
|
||||
#define NV2080_CTRL_CMD_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS (0x20800adb) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_INTERNAL_INTERFACE_ID << 8) | NV2080_CTRL_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS_PARAMS_MESSAGE_ID" */
|
||||
|
||||
#define NV2080_CTRL_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS_PARAMS_MESSAGE_ID (0xDBU)
|
||||
|
||||
|
@ -149,8 +149,11 @@
|
||||
#define RESERVED8_ERROR (153)
|
||||
#define GPU_RECOVERY_ACTION_CHANGED (154)
|
||||
#define NVLINK_SW_DEFINED_ERROR (155)
|
||||
#define ROBUST_CHANNEL_LAST_ERROR (157)
|
||||
|
||||
#define RESOURCE_RETIREMENT_EVENT (156)
|
||||
#define RESOURCE_RETIREMENT_FAILURE (157)
|
||||
#define CHANNEL_RETIREMENT_EVENT (160)
|
||||
#define CHANNEL_RETIREMENT_FAILURE (161)
|
||||
#define ROBUST_CHANNEL_LAST_ERROR (161)
|
||||
|
||||
// Indexed CE reference
|
||||
#define ROBUST_CHANNEL_CE_ERROR(x) \
|
||||
|
@ -159,6 +159,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabri
|
||||
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
|
||||
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
|
||||
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
|
||||
NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR, 0x00000086, "An error occurred while trying to retire a resource")
|
||||
|
||||
// Warnings:
|
||||
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")
|
||||
|
@ -615,6 +615,25 @@ ENTRY(0x2238, 0x16B7, 0x10de, "NVIDIA A10M-5C"),
|
||||
ENTRY(0x2238, 0x16B8, 0x10de, "NVIDIA A10M-10C"),
|
||||
ENTRY(0x2238, 0x16B9, 0x10de, "NVIDIA A10M-20C"),
|
||||
ENTRY(0x2238, 0x16E6, 0x10de, "NVIDIA A10M-1"),
|
||||
ENTRY(0x230E, 0x20F5, 0x10de, "NVIDIA H20L-1-15CME"),
|
||||
ENTRY(0x230E, 0x20F6, 0x10de, "NVIDIA H20L-1-15C"),
|
||||
ENTRY(0x230E, 0x20F7, 0x10de, "NVIDIA H20L-1-30C"),
|
||||
ENTRY(0x230E, 0x20F8, 0x10de, "NVIDIA H20L-2-30C"),
|
||||
ENTRY(0x230E, 0x20F9, 0x10de, "NVIDIA H20L-3-60C"),
|
||||
ENTRY(0x230E, 0x20FA, 0x10de, "NVIDIA H20L-4-60C"),
|
||||
ENTRY(0x230E, 0x20FB, 0x10de, "NVIDIA H20L-7-120C"),
|
||||
ENTRY(0x230E, 0x20FC, 0x10de, "NVIDIA H20L-4C"),
|
||||
ENTRY(0x230E, 0x20FD, 0x10de, "NVIDIA H20L-5C"),
|
||||
ENTRY(0x230E, 0x20FE, 0x10de, "NVIDIA H20L-6C"),
|
||||
ENTRY(0x230E, 0x20FF, 0x10de, "NVIDIA H20L-8C"),
|
||||
ENTRY(0x230E, 0x2100, 0x10de, "NVIDIA H20L-10C"),
|
||||
ENTRY(0x230E, 0x2101, 0x10de, "NVIDIA H20L-12C"),
|
||||
ENTRY(0x230E, 0x2102, 0x10de, "NVIDIA H20L-15C"),
|
||||
ENTRY(0x230E, 0x2103, 0x10de, "NVIDIA H20L-20C"),
|
||||
ENTRY(0x230E, 0x2104, 0x10de, "NVIDIA H20L-30C"),
|
||||
ENTRY(0x230E, 0x2105, 0x10de, "NVIDIA H20L-40C"),
|
||||
ENTRY(0x230E, 0x2106, 0x10de, "NVIDIA H20L-60C"),
|
||||
ENTRY(0x230E, 0x2107, 0x10de, "NVIDIA H20L-120C"),
|
||||
ENTRY(0x2321, 0x1853, 0x10de, "NVIDIA H100L-1-12CME"),
|
||||
ENTRY(0x2321, 0x1854, 0x10de, "NVIDIA H100L-1-12C"),
|
||||
ENTRY(0x2321, 0x1855, 0x10de, "NVIDIA H100L-1-24C"),
|
||||
|
@ -17,6 +17,7 @@ static inline void _get_chip_id_for_alias_pgpu(NvU32 *dev_id, NvU32 *subdev_id)
|
||||
{ 0x20B7, 0x1804, 0x20B7, 0x1532 },
|
||||
{ 0x20B9, 0x157F, 0x20B7, 0x1532 },
|
||||
{ 0x20FD, 0x17F8, 0x20F5, 0x0 },
|
||||
{ 0x230E, 0x20DF, 0x230E, 0x20DF },
|
||||
{ 0x2324, 0x17A8, 0x2324, 0x17A6 },
|
||||
{ 0x2329, 0x198C, 0x2329, 0x198B },
|
||||
{ 0x232C, 0x2064, 0x232C, 0x2063 },
|
||||
@ -119,6 +120,13 @@ static const struct {
|
||||
{0x20F610DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1094}, // GRID A800-4-20C
|
||||
{0x20F610DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1095}, // GRID A800-7-40C
|
||||
{0x20F610DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1091}, // GRID A800-1-10C
|
||||
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1461}, // NVIDIA H20L-1-15CME
|
||||
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1462}, // NVIDIA H20L-1-15C
|
||||
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1463}, // NVIDIA H20L-1-30C
|
||||
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU , 1464}, // NVIDIA H20L-2-30C
|
||||
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1465}, // NVIDIA H20L-3-60C
|
||||
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1466}, // NVIDIA H20L-4-60C
|
||||
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1467}, // NVIDIA H20L-7-120C
|
||||
{0x232110DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1061}, // NVIDIA H100L-1-12CME
|
||||
{0x232110DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1062}, // NVIDIA H100L-1-12C
|
||||
{0x232110DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1063}, // NVIDIA H100L-1-24C
|
||||
|
@ -690,6 +690,10 @@ void nvEvo1SendHdmiInfoFrame(const NVDispEvoRec *pDispEvo,
|
||||
nvkms_memcpy(&infoframe[1], &((const NvU8*) pInfoFrameHeader)[1],
|
||||
headerSize - 1);
|
||||
|
||||
/* copy the payload, starting after the 3-byte header and checksum */
|
||||
nvkms_memcpy(&infoframe[headerSize + (needChecksum ? sizeof(checksum) : 0)],
|
||||
pPayload, infoframeSize - headerSize /* payload size */);
|
||||
|
||||
/*
|
||||
* XXX Redundant since needsChecksum implies
|
||||
* _HDMI_PKT_TRANSMIT_CTRL_CHKSUM_HW_EN via
|
||||
@ -705,10 +709,6 @@ void nvEvo1SendHdmiInfoFrame(const NVDispEvoRec *pDispEvo,
|
||||
infoframe[headerSize] = ~checksum + 1;
|
||||
}
|
||||
|
||||
/* copy the payload, starting after the 3-byte header and checksum */
|
||||
nvkms_memcpy(&infoframe[headerSize + (needChecksum ? sizeof(checksum) : 0)],
|
||||
pPayload, infoframeSize - headerSize /* payload size */);
|
||||
|
||||
ret = NvHdmiPkt_PacketWrite(pDevEvo->hdmiLib.handle,
|
||||
pDispEvo->displayOwner,
|
||||
pHeadState->activeRmId,
|
||||
|
@ -2521,7 +2521,8 @@ static NvBool ConstructAdvancedInfoFramePacket(
|
||||
* XXX If required, add support for the large infoframe with
|
||||
* multiple infoframes grouped together.
|
||||
*/
|
||||
nvAssert((infoframeSize + (needChecksum ? 1 : 0)) <= packetLen);
|
||||
nvAssert((infoframeSize + 1 /* + HB3 */ + (needChecksum ? 1 : 0)) <=
|
||||
packetLen);
|
||||
|
||||
pPacket[0] = hdmiPacketType; /* HB0 */
|
||||
|
||||
@ -2554,10 +2555,8 @@ static NvBool ConstructAdvancedInfoFramePacket(
|
||||
if (needChecksum) {
|
||||
pPacket[4] = 0; /* PB0: checksum */
|
||||
|
||||
/*
|
||||
* XXX Redundant since we always call with swChecksum=FALSE and
|
||||
* _HDMI_PKT_TRANSMIT_CTRL_CHKSUM_HW_EN
|
||||
*/
|
||||
nvkms_memcpy(&pPacket[5], pPayload, payloadLen); /* PB1~ */
|
||||
|
||||
if (swChecksum) {
|
||||
NvU8 checksum = 0;
|
||||
|
||||
@ -2566,8 +2565,6 @@ static NvBool ConstructAdvancedInfoFramePacket(
|
||||
}
|
||||
pPacket[4] = ~checksum + 1;
|
||||
}
|
||||
|
||||
nvkms_memcpy(&pPacket[5], pPayload, payloadLen); /* PB1~ */
|
||||
} else {
|
||||
nvAssert(!swChecksum);
|
||||
nvkms_memcpy(&pPacket[4], pPayload, payloadLen); /* PB0~ */
|
||||
@ -2587,6 +2584,7 @@ static void SendHdmiInfoFrameCA(const NVDispEvoRec *pDispEvo,
|
||||
NVHDMIPKT_TYPE hdmiLibType;
|
||||
NVHDMIPKT_RESULT ret;
|
||||
ADVANCED_INFOFRAME advancedInfoFrame = { };
|
||||
NvBool swChecksum;
|
||||
/*
|
||||
* These structures are weird. The NVT_VIDEO_INFOFRAME,
|
||||
* NVT_VENDOR_SPECIFIC_INFOFRAME,
|
||||
@ -2616,10 +2614,21 @@ static void SendHdmiInfoFrameCA(const NVDispEvoRec *pDispEvo,
|
||||
advancedInfoFrame.location = INFOFRAME_CTRL_LOC_VBLANK;
|
||||
advancedInfoFrame.hwChecksum = needChecksum;
|
||||
|
||||
// Large infoframes are incompatible with hwChecksum
|
||||
nvAssert(!(advancedInfoFrame.isLargeInfoframe &&
|
||||
advancedInfoFrame.hwChecksum));
|
||||
|
||||
// XXX WAR bug 5124145 by always computing checksum in software if needed.
|
||||
swChecksum = needChecksum;
|
||||
|
||||
// If we need a checksum: hwChecksum, swChecksum, or both must be enabled.
|
||||
nvAssert(!needChecksum ||
|
||||
(advancedInfoFrame.hwChecksum || swChecksum));
|
||||
|
||||
if (!ConstructAdvancedInfoFramePacket(pInfoFrameHeader,
|
||||
infoFrameSize,
|
||||
needChecksum,
|
||||
FALSE /* swChecksum */,
|
||||
swChecksum,
|
||||
packet,
|
||||
sizeof(packet))) {
|
||||
return;
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -33,6 +33,7 @@
|
||||
// management partition and CPU-RM/other uprocs.
|
||||
//
|
||||
|
||||
#define NVDM_TYPE_RESET 0x4
|
||||
#define NVDM_TYPE_HULK 0x11
|
||||
#define NVDM_TYPE_FIRMWARE_UPDATE 0x12
|
||||
#define NVDM_TYPE_PRC 0x13
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2000-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-FileCopyrightText: Copyright (c) 2000-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
|
||||
* SPDX-License-Identifier: MIT
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a
|
||||
@ -211,21 +211,18 @@
|
||||
// to any specific hardware.
|
||||
//
|
||||
//
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0 0x000000C8
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_ID 7:0
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_NEXT 15:8
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_LENGTH 23:16
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_SIG_LO 31:24
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1 0x000000CC
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_SIG_HI 15:0
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_VERSION 18:16
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_PEER_CLIQUE_ID 22:19
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING 23:23
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DEFAULT 0x00000000
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DISABLE 0x00000001
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RSVD 31:24
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0 0x000000C8
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_ID 7:0
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_NEXT 15:8
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_LENGTH 23:16
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_SIG_LO 31:24
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1 0x000000CC
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_SIG_HI 15:0
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_VERSION 18:16
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_PEER_CLIQUE_ID 22:19
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RSVD 31:23
|
||||
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_SIGNATURE 0x00503250
|
||||
#define NV_PCI_VIRTUAL_P2P_APPROVAL_SIGNATURE 0x00503250
|
||||
|
||||
// Chipset-specific definitions.
|
||||
// Intel SantaRosa definitions
|
||||
|
@ -498,6 +498,9 @@ typedef struct nv_state_t
|
||||
NvU32 dispIsoStreamId;
|
||||
NvU32 dispNisoStreamId;
|
||||
} iommus;
|
||||
|
||||
/* Console is managed by drm drivers or NVKMS */
|
||||
NvBool client_managed_console;
|
||||
} nv_state_t;
|
||||
|
||||
#define NVFP_TYPE_NONE 0x0
|
||||
@ -542,9 +545,9 @@ typedef struct UvmGpuNvlinkInfo_tag *nvgpuNvlinkInfo_t;
|
||||
typedef struct UvmGpuEccInfo_tag *nvgpuEccInfo_t;
|
||||
typedef struct UvmGpuFaultInfo_tag *nvgpuFaultInfo_t;
|
||||
typedef struct UvmGpuAccessCntrInfo_tag *nvgpuAccessCntrInfo_t;
|
||||
typedef struct UvmGpuAccessCntrConfig_tag *nvgpuAccessCntrConfig_t;
|
||||
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
|
||||
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
|
||||
typedef struct UvmGpuAccessCntrConfig_tag nvgpuAccessCntrConfig_t;
|
||||
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
|
||||
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
|
||||
typedef struct UvmPmaAllocationOptions_tag *nvgpuPmaAllocationOptions_t;
|
||||
typedef struct UvmPmaStatistics_tag *nvgpuPmaStatistics_t;
|
||||
typedef struct UvmGpuMemoryInfo_tag *nvgpuMemoryInfo_t;
|
||||
|
@ -2398,7 +2398,7 @@ NV_STATUS NV_API_CALL rm_power_management(
|
||||
// For GPU driving console, disable console access here, to ensure no console
|
||||
// writes through BAR1 can interfere with physical RM's setup of BAR1
|
||||
//
|
||||
if (rm_get_uefi_console_status(pNv))
|
||||
if (pNv->client_managed_console)
|
||||
{
|
||||
os_disable_console_access();
|
||||
bConsoleDisabled = NV_TRUE;
|
||||
|
@ -5555,3 +5555,21 @@ void osAllocatedRmClient(void *pOsInfo)
|
||||
if (nvfp != NULL)
|
||||
nvfp->bCleanupRmapi = NV_TRUE;
|
||||
}
|
||||
|
||||
/*!
|
||||
* @brief Update variable to indicate console managed by drm driver.
|
||||
*
|
||||
* @param[in] OBJGPU GPU object pointer
|
||||
*
|
||||
* @returns void
|
||||
*/
|
||||
void
|
||||
osDisableConsoleManagement
|
||||
(
|
||||
OBJGPU *pGpu
|
||||
)
|
||||
{
|
||||
nv_state_t *nv = NV_GET_NV_STATE(pGpu);
|
||||
|
||||
nv->client_managed_console = NV_TRUE;
|
||||
}
|
||||
|
@ -913,7 +913,6 @@ static void
|
||||
RmDeterminePrimaryDevice(OBJGPU *pGpu)
|
||||
{
|
||||
nv_state_t *nv = NV_GET_NV_STATE(pGpu);
|
||||
NvBool bFrameBufferConsoleDevice = NV_FALSE;
|
||||
|
||||
// Skip updating nv->primary_vga while RM is recovering after GPU reset
|
||||
if (nv->flags & NV_FLAG_IN_RECOVERY)
|
||||
@ -946,15 +945,15 @@ RmDeterminePrimaryDevice(OBJGPU *pGpu)
|
||||
|
||||
//
|
||||
// If GPU is driving any frame buffer console(vesafb, efifb etc)
|
||||
// mark the GPU as Primary.
|
||||
// mark the console as client driven and GPU as Primary.
|
||||
//
|
||||
bFrameBufferConsoleDevice = rm_get_uefi_console_status(nv);
|
||||
nv->client_managed_console = rm_get_uefi_console_status(nv);
|
||||
|
||||
NV_DEV_PRINTF(NV_DBG_SETUP, nv, " is %s UEFI console device\n",
|
||||
bFrameBufferConsoleDevice ? "primary" : "not primary");
|
||||
nv->client_managed_console ? "primary" : "not primary");
|
||||
|
||||
pGpu->setProperty(pGpu, PDB_PROP_GPU_PRIMARY_DEVICE,
|
||||
(bFrameBufferConsoleDevice || !!nv->primary_vga));
|
||||
(nv->client_managed_console || !!nv->primary_vga));
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1839,7 +1838,7 @@ NvBool RmInitAdapter(
|
||||
// For GPU driving console, disable console access here, to ensure no console
|
||||
// writes through BAR1 can interfere with physical RM's setup of BAR1
|
||||
//
|
||||
if (rm_get_uefi_console_status(nv))
|
||||
if (nv->client_managed_console)
|
||||
{
|
||||
os_disable_console_access();
|
||||
consoleDisabled = NV_TRUE;
|
||||
|
@ -87,7 +87,7 @@ RmSaveDisplayState
|
||||
NV2080_CTRL_CMD_INTERNAL_DISPLAY_PRE_UNIX_CONSOLE_PARAMS preUnixConsoleParams = {0};
|
||||
NV2080_CTRL_CMD_INTERNAL_DISPLAY_POST_UNIX_CONSOLE_PARAMS postUnixConsoleParams = {0};
|
||||
|
||||
if (IS_VIRTUAL(pGpu) || pKernelDisplay == NULL)
|
||||
if (IS_VIRTUAL(pGpu) || (pKernelDisplay == NULL) || nv->client_managed_console)
|
||||
{
|
||||
return;
|
||||
}
|
||||
@ -157,20 +157,12 @@ static void RmRestoreDisplayState
|
||||
NV2080_CTRL_CMD_INTERNAL_DISPLAY_PRE_UNIX_CONSOLE_PARAMS preUnixConsoleParams = {0};
|
||||
NV2080_CTRL_CMD_INTERNAL_DISPLAY_POST_UNIX_CONSOLE_PARAMS postUnixConsoleParams = {0};
|
||||
|
||||
NV_ASSERT_OR_RETURN_VOID(pKernelDisplay != NULL);
|
||||
|
||||
//
|
||||
// vGPU:
|
||||
// Since vGPU does all real hardware management in the host,
|
||||
// there is nothing to do at this point in the guest OS.
|
||||
//
|
||||
// Since vGPU does all real hardware management in the
|
||||
// host, there is nothing to do at this point in the
|
||||
// guest OS (where IS_VIRTUAL(pGpu) is true).
|
||||
//
|
||||
if (IS_VIRTUAL(pGpu))
|
||||
if (IS_VIRTUAL(pGpu) || (pKernelDisplay == NULL) || nv->client_managed_console)
|
||||
{
|
||||
// we don't have VGA state that's needing to be restored.
|
||||
NV_PRINTF(LEVEL_INFO, "skipping RestoreDisplayState on VGPU (0x%x)\n",
|
||||
pGpu->gpuId);
|
||||
return;
|
||||
}
|
||||
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user