570.133.07

This commit is contained in:
Bernhard Stoeckner 2025-03-19 14:13:05 +01:00
parent 25bef4626e
commit c5e439fea4
No known key found for this signature in database
GPG Key ID: 7D23DC2750FAC2E1
146 changed files with 53971 additions and 54755 deletions

1537
README.md
View File

@ -1,7 +1,7 @@
# NVIDIA Linux Open GPU Kernel Module Source
This is the source release of the NVIDIA Linux open GPU kernel modules,
version 570.124.06.
version 570.133.07.
## How to Build
@ -17,7 +17,7 @@ as root:
Note that the kernel modules built here must be used with GSP
firmware and user-space NVIDIA GPU driver components from a corresponding
570.124.06 driver release. This can be achieved by installing
570.133.07 driver release. This can be achieved by installing
the NVIDIA GPU driver from the .run file using the `--no-kernel-modules`
option. E.g.,
@ -185,7 +185,7 @@ table below).
For details on feature support and limitations, see the NVIDIA GPU driver
end user README here:
https://us.download.nvidia.com/XFree86/Linux-x86_64/570.124.06/README/kernel_open.html
https://us.download.nvidia.com/XFree86/Linux-x86_64/570.133.07/README/kernel_open.html
For vGPU support, please refer to the README.vgpu packaged in the vGPU Host
Package for more details.
@ -194,761 +194,776 @@ In the below table, if three IDs are listed, the first is the PCI Device
ID, the second is the PCI Subsystem Vendor ID, and the third is the PCI
Subsystem Device ID.
| Product Name | PCI ID |
| ----------------------------------------------- | -------------- |
| NVIDIA TITAN RTX | 1E02 |
| NVIDIA GeForce RTX 2080 Ti | 1E04 |
| NVIDIA GeForce RTX 2080 Ti | 1E07 |
| NVIDIA CMP 50HX | 1E09 |
| Quadro RTX 6000 | 1E30 |
| Quadro RTX 8000 | 1E30 1028 129E |
| Quadro RTX 8000 | 1E30 103C 129E |
| Quadro RTX 8000 | 1E30 10DE 129E |
| Quadro RTX 6000 | 1E36 |
| Quadro RTX 8000 | 1E78 10DE 13D8 |
| Quadro RTX 6000 | 1E78 10DE 13D9 |
| NVIDIA GeForce RTX 2080 SUPER | 1E81 |
| NVIDIA GeForce RTX 2080 | 1E82 |
| NVIDIA GeForce RTX 2070 SUPER | 1E84 |
| NVIDIA GeForce RTX 2080 | 1E87 |
| NVIDIA GeForce RTX 2060 | 1E89 |
| NVIDIA GeForce RTX 2080 | 1E90 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1025 1375 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08A1 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08A2 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EA |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EB |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EC |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08ED |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EE |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EF |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 093B |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 093C |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8572 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8573 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8602 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8606 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 86C6 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 86C7 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 87A6 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 87A7 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 131F |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 137F |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 141F |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 1751 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1660 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1661 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1662 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 75A6 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 75A7 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 86A6 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 86A7 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1462 1274 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1462 1277 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 152D 1220 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1558 95E1 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1558 97E1 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2002 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2005 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2007 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 3000 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 3001 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1D05 1069 |
| NVIDIA GeForce RTX 2070 Super | 1E91 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8607 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8736 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8738 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8772 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 878A |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 878B |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1043 1E61 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 1511 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 75B3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 75B4 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 76B2 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 76B3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 78A2 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 78A3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 86B2 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 86B3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12AE |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12B0 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12C6 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 17AA 22C3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 17AA 22C5 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 2009 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 200A |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 3002 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 8086 3012 |
| NVIDIA GeForce RTX 2080 Super | 1E93 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1025 1401 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1025 149C |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1028 09D2 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8607 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 86C7 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8736 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8738 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8772 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 87A6 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 87A7 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 75B1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 75B2 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 76B0 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 76B1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 78A0 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 78A1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 86B0 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 86B1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12AE |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12B0 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12B4 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12C6 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1558 50D3 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1558 70D1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 17AA 22C3 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 17AA 22C5 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 2009 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 200A |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 3002 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1D05 1089 |
| Quadro RTX 5000 | 1EB0 |
| Quadro RTX 4000 | 1EB1 |
| Quadro RTX 5000 | 1EB5 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1375 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1401 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 149C |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1028 09C3 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8736 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8738 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8772 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8780 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8782 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8783 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8785 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1043 1DD1 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 1274 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 12B0 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 12C6 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 17AA 22B8 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 17AA 22BA |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2005 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2007 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2008 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 200A |
| Quadro RTX 4000 | 1EB6 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1028 09C3 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8736 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8738 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8772 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8780 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8782 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8783 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8785 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 1274 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 1277 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 12B0 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 12C6 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22B8 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22BA |
| NVIDIA GeForce RTX 2070 SUPER | 1EC2 |
| NVIDIA GeForce RTX 2070 SUPER | 1EC7 |
| NVIDIA GeForce RTX 2080 | 1ED0 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1025 132D |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08ED |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08EE |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08EF |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8572 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8573 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8600 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8605 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1043 138F |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1043 15C1 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 17AA 3FEE |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 17AA 3FFE |
| NVIDIA GeForce RTX 2070 Super | 1ED1 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 1025 1432 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 103C 8746 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 103C 878A |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 1043 165F |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 144D C192 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FCE |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FCF |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FD0 |
| NVIDIA GeForce RTX 2080 Super | 1ED3 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1025 1432 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1028 09D1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 103C 8746 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 103C 878A |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1D61 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1E51 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1F01 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FCE |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FCF |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FD0 |
| Quadro RTX 5000 | 1EF5 |
| NVIDIA GeForce RTX 2070 | 1F02 |
| NVIDIA GeForce RTX 2060 | 1F03 |
| NVIDIA GeForce RTX 2060 SUPER | 1F06 |
| NVIDIA GeForce RTX 2070 | 1F07 |
| NVIDIA GeForce RTX 2060 | 1F08 |
| NVIDIA GeForce GTX 1650 | 1F0A |
| NVIDIA CMP 40HX | 1F0B |
| NVIDIA GeForce RTX 2070 | 1F10 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 132D |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 1342 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08A1 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08A2 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EA |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EB |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EC |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08ED |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EF |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 093B |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 093C |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8572 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8573 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8602 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8606 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 132F |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 136F |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 1881 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 1E6E |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1658 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1663 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1664 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 75A4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 75A5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 86A4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 86A5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1462 1274 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1462 1277 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1558 95E1 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1558 97E1 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2002 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2005 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2007 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 3000 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 3001 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 105E |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 1070 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 2087 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 8086 2087 |
| NVIDIA GeForce RTX 2060 | 1F11 |
| NVIDIA GeForce RTX 2060 | 1F12 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1028 098F |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 8741 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 8744 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 878E |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 880E |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1043 1E11 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1043 1F11 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1462 12D9 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3801 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3802 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3803 |
| NVIDIA GeForce RTX 2070 | 1F14 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1401 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1432 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1442 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1446 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 147D |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1028 09E2 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1028 09F3 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8607 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 86C6 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 86C7 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8736 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8738 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8746 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8772 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 878A |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 878B |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 87A6 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 87A7 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1043 174F |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 1512 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 75B5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 75B6 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 76B4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 76B5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 78A4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 78A5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 86B4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 86B5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12AE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12B0 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12C6 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1558 50D3 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1558 70D1 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 200C |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 2011 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 3002 |
| NVIDIA GeForce RTX 2060 | 1F15 |
| Quadro RTX 3000 | 1F36 |
| Quadro RTX 3000 with Max-Q Design | 1F36 1028 0990 |
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8736 |
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8738 |
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8772 |
| Quadro RTX 3000 with Max-Q Design | 1F36 1043 13CF |
| Quadro RTX 3000 with Max-Q Design | 1F36 1414 0032 |
| NVIDIA GeForce RTX 2060 SUPER | 1F42 |
| NVIDIA GeForce RTX 2060 SUPER | 1F47 |
| NVIDIA GeForce RTX 2070 | 1F50 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08ED |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08EE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08EF |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8572 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8573 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8574 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8600 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8605 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 17AA 3FEE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 17AA 3FFE |
| NVIDIA GeForce RTX 2060 | 1F51 |
| NVIDIA GeForce RTX 2070 | 1F54 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 103C 878A |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FCE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FCF |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FD0 |
| NVIDIA GeForce RTX 2060 | 1F55 |
| Quadro RTX 3000 | 1F76 |
| Matrox D-Series D2450 | 1F76 102B 2800 |
| Matrox D-Series D2480 | 1F76 102B 2900 |
| NVIDIA GeForce GTX 1650 | 1F82 |
| NVIDIA GeForce GTX 1630 | 1F83 |
| NVIDIA GeForce GTX 1650 | 1F91 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 863E |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 86E7 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 86E8 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1043 12CF |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1043 156F |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1414 0032 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 144D C822 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 127E |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1281 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1284 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1285 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 129C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 229F |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3802 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3806 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3F1A |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1A58 1001 |
| NVIDIA GeForce GTX 1650 Ti | 1F95 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 1479 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147A |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147B |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147C |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 86E7 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 86E8 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 8815 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1043 1DFF |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1043 1E1F |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 144D C838 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12BD |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12C5 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12D2 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 22C0 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 22C1 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 3837 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 3F95 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1003 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1006 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1007 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1E83 3E30 |
| NVIDIA GeForce GTX 1650 | 1F96 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F96 1462 1297 |
| NVIDIA GeForce MX450 | 1F97 |
| NVIDIA GeForce MX450 | 1F98 |
| NVIDIA GeForce GTX 1650 | 1F99 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 1479 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147A |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147B |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 103C 8815 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 13B2 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 1402 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 1902 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12BD |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12C5 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12D2 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 17AA 22DA |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 17AA 3F93 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1E83 3E30 |
| NVIDIA GeForce MX450 | 1F9C |
| NVIDIA GeForce GTX 1650 | 1F9D |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 128D |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 130D |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 149C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 185C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 189C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 12F4 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 1302 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 131B |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 1326 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 132A |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 132E |
| NVIDIA GeForce MX550 | 1F9F |
| NVIDIA GeForce MX550 | 1FA0 |
| NVIDIA T1000 | 1FB0 1028 12DB |
| NVIDIA T1000 | 1FB0 103C 12DB |
| NVIDIA T1000 | 1FB0 103C 8A80 |
| NVIDIA T1000 | 1FB0 10DE 12DB |
| NVIDIA DGX Display | 1FB0 10DE 1485 |
| NVIDIA T1000 | 1FB0 17AA 12DB |
| NVIDIA T600 | 1FB1 1028 1488 |
| NVIDIA T600 | 1FB1 103C 1488 |
| NVIDIA T600 | 1FB1 103C 8A80 |
| NVIDIA T600 | 1FB1 10DE 1488 |
| NVIDIA T600 | 1FB1 17AA 1488 |
| NVIDIA T400 | 1FB2 1028 1489 |
| NVIDIA T400 | 1FB2 103C 1489 |
| NVIDIA T400 | 1FB2 103C 8A80 |
| NVIDIA T400 | 1FB2 10DE 1489 |
| NVIDIA T400 | 1FB2 17AA 1489 |
| NVIDIA T600 Laptop GPU | 1FB6 |
| NVIDIA T550 Laptop GPU | 1FB7 |
| Quadro T2000 | 1FB8 |
| Quadro T2000 with Max-Q Design | 1FB8 1028 097E |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8736 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8738 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8772 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8780 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8782 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8783 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8785 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 87F0 |
| Quadro T2000 with Max-Q Design | 1FB8 1462 1281 |
| Quadro T2000 with Max-Q Design | 1FB8 1462 12BD |
| Quadro T2000 with Max-Q Design | 1FB8 17AA 22C0 |
| Quadro T2000 with Max-Q Design | 1FB8 17AA 22C1 |
| Quadro T1000 | 1FB9 |
| Quadro T1000 with Max-Q Design | 1FB9 1025 1479 |
| Quadro T1000 with Max-Q Design | 1FB9 1025 147A |
| Quadro T1000 with Max-Q Design | 1FB9 1025 147B |
| Quadro T1000 with Max-Q Design | 1FB9 1025 147C |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8736 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8738 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8772 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8780 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8782 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8783 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8785 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 87F0 |
| Quadro T1000 with Max-Q Design | 1FB9 1462 12BD |
| Quadro T1000 with Max-Q Design | 1FB9 17AA 22C0 |
| Quadro T1000 with Max-Q Design | 1FB9 17AA 22C1 |
| NVIDIA T600 Laptop GPU | 1FBA |
| NVIDIA T500 | 1FBB |
| NVIDIA T1200 Laptop GPU | 1FBC |
| NVIDIA GeForce GTX 1650 | 1FDD |
| NVIDIA T1000 8GB | 1FF0 1028 1612 |
| NVIDIA T1000 8GB | 1FF0 103C 1612 |
| NVIDIA T1000 8GB | 1FF0 103C 8A80 |
| NVIDIA T1000 8GB | 1FF0 10DE 1612 |
| NVIDIA T1000 8GB | 1FF0 17AA 1612 |
| NVIDIA T400 4GB | 1FF2 1028 1613 |
| NVIDIA T400 4GB | 1FF2 103C 1613 |
| NVIDIA T400E | 1FF2 103C 18FF |
| NVIDIA T400 4GB | 1FF2 103C 8A80 |
| NVIDIA T400 4GB | 1FF2 10DE 1613 |
| NVIDIA T400E | 1FF2 10DE 18FF |
| NVIDIA T400 4GB | 1FF2 17AA 1613 |
| NVIDIA T400E | 1FF2 17AA 18FF |
| Quadro T1000 | 1FF9 |
| NVIDIA A100-SXM4-40GB | 20B0 |
| NVIDIA A100-PG509-200 | 20B0 10DE 1450 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1463 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
| NVIDIA PG509-210 | 20B2 10DE 1625 |
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
| NVIDIA PG506-232 | 20B6 10DE 1492 |
| NVIDIA A30 | 20B7 10DE 1532 |
| NVIDIA A30 | 20B7 10DE 1804 |
| NVIDIA A30 | 20B7 10DE 1852 |
| NVIDIA A800-SXM4-40GB | 20BD 10DE 17F4 |
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
| NVIDIA A800 40GB Active | 20F6 1028 180A |
| NVIDIA A800 40GB Active | 20F6 103C 180A |
| NVIDIA A800 40GB Active | 20F6 10DE 180A |
| NVIDIA A800 40GB Active | 20F6 17AA 180A |
| NVIDIA AX800 | 20FD 10DE 17F8 |
| NVIDIA GeForce GTX 1660 Ti | 2182 |
| NVIDIA GeForce GTX 1660 | 2184 |
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
| NVIDIA GeForce GTX 1650 | 2188 |
| NVIDIA CMP 30HX | 2189 |
| NVIDIA GeForce GTX 1660 Ti | 2191 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1028 0949 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FB |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FE |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 86D6 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 8741 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 8744 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 878D |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 87AF |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 87B3 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 171F |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 17EF |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 18D1 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1414 0032 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 128A |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 128B |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12C6 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12CB |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12CC |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12D9 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 380C |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 381D |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 381E |
| NVIDIA GeForce GTX 1650 Ti | 2192 |
| NVIDIA GeForce GTX 1660 SUPER | 21C4 |
| NVIDIA GeForce GTX 1660 Ti | 21D1 |
| NVIDIA GeForce RTX 3090 Ti | 2203 |
| NVIDIA GeForce RTX 3090 | 2204 |
| NVIDIA GeForce RTX 3080 | 2206 |
| NVIDIA GeForce RTX 3070 Ti | 2207 |
| NVIDIA GeForce RTX 3080 Ti | 2208 |
| NVIDIA GeForce RTX 3080 | 220A |
| NVIDIA CMP 90HX | 220D |
| NVIDIA GeForce RTX 3080 | 2216 |
| NVIDIA RTX A6000 | 2230 1028 1459 |
| NVIDIA RTX A6000 | 2230 103C 1459 |
| NVIDIA RTX A6000 | 2230 10DE 1459 |
| NVIDIA RTX A6000 | 2230 17AA 1459 |
| NVIDIA RTX A5000 | 2231 1028 147E |
| NVIDIA RTX A5000 | 2231 103C 147E |
| NVIDIA RTX A5000 | 2231 10DE 147E |
| NVIDIA RTX A5000 | 2231 17AA 147E |
| NVIDIA RTX A4500 | 2232 1028 163C |
| NVIDIA RTX A4500 | 2232 103C 163C |
| NVIDIA RTX A4500 | 2232 10DE 163C |
| NVIDIA RTX A4500 | 2232 17AA 163C |
| NVIDIA RTX A5500 | 2233 1028 165A |
| NVIDIA RTX A5500 | 2233 103C 165A |
| NVIDIA RTX A5500 | 2233 10DE 165A |
| NVIDIA RTX A5500 | 2233 17AA 165A |
| NVIDIA A40 | 2235 10DE 145A |
| NVIDIA A10 | 2236 10DE 1482 |
| NVIDIA A10G | 2237 10DE 152F |
| NVIDIA A10M | 2238 10DE 1677 |
| NVIDIA H100 NVL | 2321 10DE 1839 |
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
| NVIDIA H800 | 2324 10DE 17A6 |
| NVIDIA H800 | 2324 10DE 17A8 |
| NVIDIA H20 | 2329 10DE 198B |
| NVIDIA H20 | 2329 10DE 198C |
| NVIDIA H20-3e | 232C 10DE 2063 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
| NVIDIA H100 PCIe | 2331 10DE 1626 |
| NVIDIA H200 | 2335 10DE 18BE |
| NVIDIA H200 | 2335 10DE 18BF |
| NVIDIA H100 | 2339 10DE 17FC |
| NVIDIA H800 NVL | 233A 10DE 183A |
| NVIDIA H200 NVL | 233B 10DE 1996 |
| NVIDIA GH200 120GB | 2342 10DE 16EB |
| NVIDIA GH200 120GB | 2342 10DE 1805 |
| NVIDIA GH200 480GB | 2342 10DE 1809 |
| NVIDIA GH200 144G HBM3e | 2348 10DE 18D2 |
| NVIDIA GeForce RTX 3060 Ti | 2414 |
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
| NVIDIA RTX A5500 Laptop GPU | 2438 |
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2460 |
| NVIDIA GeForce RTX 3070 Ti | 2482 |
| NVIDIA GeForce RTX 3070 | 2484 |
| NVIDIA GeForce RTX 3060 Ti | 2486 |
| NVIDIA GeForce RTX 3060 | 2487 |
| NVIDIA GeForce RTX 3070 | 2488 |
| NVIDIA GeForce RTX 3060 Ti | 2489 |
| NVIDIA CMP 70HX | 248A |
| NVIDIA GeForce RTX 3080 Laptop GPU | 249C |
| NVIDIA GeForce RTX 3060 Laptop GPU | 249C 1D05 1194 |
| NVIDIA GeForce RTX 3070 Laptop GPU | 249D |
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24A0 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 24A0 1D05 1192 |
| NVIDIA RTX A4000 | 24B0 1028 14AD |
| NVIDIA RTX A4000 | 24B0 103C 14AD |
| NVIDIA RTX A4000 | 24B0 10DE 14AD |
| NVIDIA RTX A4000 | 24B0 17AA 14AD |
| NVIDIA RTX A4000H | 24B1 10DE 1658 |
| NVIDIA RTX A5000 Laptop GPU | 24B6 |
| NVIDIA RTX A4000 Laptop GPU | 24B7 |
| NVIDIA RTX A3000 Laptop GPU | 24B8 |
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
| NVIDIA RTX A4500 Laptop GPU | 24BA |
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
| NVIDIA GeForce RTX 3060 | 24C7 |
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24E0 |
| NVIDIA RTX A4500 Embedded GPU | 24FA |
| NVIDIA GeForce RTX 3060 | 2503 |
| NVIDIA GeForce RTX 3060 | 2504 |
| NVIDIA GeForce RTX 3050 | 2507 |
| NVIDIA GeForce RTX 3050 OEM | 2508 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2520 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2521 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2523 |
| NVIDIA RTX A2000 | 2531 1028 151D |
| NVIDIA RTX A2000 | 2531 103C 151D |
| NVIDIA RTX A2000 | 2531 10DE 151D |
| NVIDIA RTX A2000 | 2531 17AA 151D |
| NVIDIA GeForce RTX 3060 | 2544 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2560 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2563 |
| NVIDIA RTX A2000 12GB | 2571 1028 1611 |
| NVIDIA RTX A2000 12GB | 2571 103C 1611 |
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
| NVIDIA GeForce RTX 3050 | 2582 |
| NVIDIA GeForce RTX 3050 | 2584 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 25A0 1D05 1196 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25A2 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A2 1028 0BAF |
| NVIDIA GeForce RTX 3060 Laptop GPU | 25A2 1D05 1195 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25A5 |
| NVIDIA GeForce MX570 | 25A6 |
| NVIDIA GeForce RTX 2050 | 25A7 |
| NVIDIA GeForce RTX 2050 | 25A9 |
| NVIDIA GeForce MX570 A | 25AA |
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
| NVIDIA GeForce RTX 2050 | 25AD |
| NVIDIA RTX A1000 | 25B0 1028 1878 |
| NVIDIA RTX A1000 | 25B0 103C 1878 |
| NVIDIA RTX A1000 | 25B0 103C 8D96 |
| NVIDIA RTX A1000 | 25B0 10DE 1878 |
| NVIDIA RTX A1000 | 25B0 17AA 1878 |
| NVIDIA RTX A400 | 25B2 1028 1879 |
| NVIDIA RTX A400 | 25B2 103C 1879 |
| NVIDIA RTX A400 | 25B2 103C 8D95 |
| NVIDIA RTX A400 | 25B2 10DE 1879 |
| NVIDIA RTX A400 | 25B2 17AA 1879 |
| NVIDIA A16 | 25B6 10DE 14A9 |
| NVIDIA A2 | 25B6 10DE 157E |
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
| NVIDIA RTX A1000 Laptop GPU | 25B9 |
| NVIDIA RTX A2000 8GB Laptop GPU | 25BA |
| NVIDIA RTX A500 Laptop GPU | 25BB |
| NVIDIA RTX A1000 6GB Laptop GPU | 25BC |
| NVIDIA RTX A500 Laptop GPU | 25BD |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25EC |
| NVIDIA GeForce RTX 2050 | 25ED |
| NVIDIA RTX A1000 Embedded GPU | 25F9 |
| NVIDIA RTX A2000 Embedded GPU | 25FA |
| NVIDIA RTX A500 Embedded GPU | 25FB |
| NVIDIA GeForce RTX 4090 | 2684 |
| NVIDIA GeForce RTX 4090 D | 2685 |
| NVIDIA GeForce RTX 4070 Ti SUPER | 2689 |
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
| NVIDIA RTX 5000 Ada Generation | 26B2 1028 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
| NVIDIA RTX 5880 Ada Generation | 26B3 1028 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 103C 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 10DE 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 17AA 1934 |
| NVIDIA L40 | 26B5 10DE 169D |
| NVIDIA L40 | 26B5 10DE 17DA |
| NVIDIA L40S | 26B9 10DE 1851 |
| NVIDIA L40S | 26B9 10DE 18CF |
| NVIDIA L20 | 26BA 10DE 1957 |
| NVIDIA L20 | 26BA 10DE 1990 |
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
| NVIDIA GeForce RTX 4080 | 2704 |
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
| NVIDIA GeForce RTX 4070 | 2709 |
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
| NVIDIA RTX 5000 Ada Generation Embedded GPU | 2770 |
| NVIDIA GeForce RTX 4070 Ti | 2782 |
| NVIDIA GeForce RTX 4070 SUPER | 2783 |
| NVIDIA GeForce RTX 4070 | 2786 |
| NVIDIA GeForce RTX 4060 Ti | 2788 |
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
| NVIDIA RTX 4500 Ada Generation | 27B1 1028 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 103C 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 10DE 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 17AA 180C |
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 17AA 181B |
| NVIDIA L2 | 27B6 10DE 1933 |
| NVIDIA L4 | 27B8 10DE 16CA |
| NVIDIA L4 | 27B8 10DE 16EE |
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |
| NVIDIA RTX 3500 Ada Generation Laptop GPU | 27BB |
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
| NVIDIA GeForce RTX 4060 Ti | 2803 |
| NVIDIA GeForce RTX 4060 Ti | 2805 |
| NVIDIA GeForce RTX 4060 | 2808 |
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
| NVIDIA GeForce RTX 3050 A Laptop GPU | 2822 |
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
| NVIDIA GeForce RTX 4060 | 2882 |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28A3 |
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
| NVIDIA RTX 2000E Ada Generation | 28B0 103C 1871 |
| NVIDIA RTX 2000 Ada Generation | 28B0 10DE 1870 |
| NVIDIA RTX 2000E Ada Generation | 28B0 10DE 1871 |
| NVIDIA RTX 2000 Ada Generation | 28B0 17AA 1870 |
| NVIDIA RTX 2000E Ada Generation | 28B0 17AA 1871 |
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
| NVIDIA RTX 1000 Ada Generation Laptop GPU | 28B9 |
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BA |
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BB |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28E3 |
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
| NVIDIA B200 | 2901 10DE 1999 |
| NVIDIA B200 | 2901 10DE 199B |
| NVIDIA B200 | 2901 10DE 20DA |
| HGX GB200 | 2941 10DE 2046 |
| HGX GB200 | 2941 10DE 20CA |
| HGX GB200 | 2941 10DE 20D5 |
| HGX GB200 | 2941 10DE 21C9 |
| HGX GB200 | 2941 10DE 21CA |
| NVIDIA GeForce RTX 5090 | 2B85 |
| NVIDIA GeForce RTX 5090 D | 2B87 |
| NVIDIA GeForce RTX 5080 | 2C02 |
| NVIDIA GeForce RTX 5070 Ti | 2C05 |
| Product Name | PCI ID |
| ------------------------------------------------------- | -------------- |
| NVIDIA TITAN RTX | 1E02 |
| NVIDIA GeForce RTX 2080 Ti | 1E04 |
| NVIDIA GeForce RTX 2080 Ti | 1E07 |
| NVIDIA CMP 50HX | 1E09 |
| Quadro RTX 6000 | 1E30 |
| Quadro RTX 8000 | 1E30 1028 129E |
| Quadro RTX 8000 | 1E30 103C 129E |
| Quadro RTX 8000 | 1E30 10DE 129E |
| Quadro RTX 6000 | 1E36 |
| Quadro RTX 8000 | 1E78 10DE 13D8 |
| Quadro RTX 6000 | 1E78 10DE 13D9 |
| NVIDIA GeForce RTX 2080 SUPER | 1E81 |
| NVIDIA GeForce RTX 2080 | 1E82 |
| NVIDIA GeForce RTX 2070 SUPER | 1E84 |
| NVIDIA GeForce RTX 2080 | 1E87 |
| NVIDIA GeForce RTX 2060 | 1E89 |
| NVIDIA GeForce RTX 2080 | 1E90 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1025 1375 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08A1 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08A2 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EA |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EB |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EC |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08ED |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EE |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 08EF |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 093B |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1028 093C |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8572 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8573 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8602 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 8606 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 86C6 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 86C7 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 87A6 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 103C 87A7 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 131F |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 137F |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 141F |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1043 1751 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1660 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1661 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 1662 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 75A6 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 75A7 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 86A6 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1458 86A7 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1462 1274 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1462 1277 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 152D 1220 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1558 95E1 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1558 97E1 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2002 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2005 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 2007 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 3000 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1A58 3001 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1E90 1D05 1069 |
| NVIDIA GeForce RTX 2070 Super | 1E91 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8607 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8736 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8738 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 8772 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 878A |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 103C 878B |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1043 1E61 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 1511 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 75B3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 75B4 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 76B2 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 76B3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 78A2 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 78A3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 86B2 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1458 86B3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12AE |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12B0 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1462 12C6 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 17AA 22C3 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 17AA 22C5 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 2009 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 200A |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 1A58 3002 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1E91 8086 3012 |
| NVIDIA GeForce RTX 2080 Super | 1E93 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1025 1401 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1025 149C |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1028 09D2 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8607 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 86C7 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8736 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8738 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 8772 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 87A6 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 103C 87A7 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 75B1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 75B2 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 76B0 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 76B1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 78A0 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 78A1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 86B0 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1458 86B1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12AE |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12B0 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12B4 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1462 12C6 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1558 50D3 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1558 70D1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 17AA 22C3 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 17AA 22C5 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 2009 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 200A |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1A58 3002 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1E93 1D05 1089 |
| Quadro RTX 5000 | 1EB0 |
| Quadro RTX 4000 | 1EB1 |
| Quadro RTX 5000 | 1EB5 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1375 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 1401 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1025 149C |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1028 09C3 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8736 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8738 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8772 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8780 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8782 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8783 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 103C 8785 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1043 1DD1 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 1274 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 12B0 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1462 12C6 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 17AA 22B8 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 17AA 22BA |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2005 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2007 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 2008 |
| Quadro RTX 5000 with Max-Q Design | 1EB5 1A58 200A |
| Quadro RTX 4000 | 1EB6 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1028 09C3 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8736 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8738 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8772 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8780 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8782 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8783 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 103C 8785 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 1274 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 1277 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 12B0 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 1462 12C6 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22B8 |
| Quadro RTX 4000 with Max-Q Design | 1EB6 17AA 22BA |
| NVIDIA GeForce RTX 2070 SUPER | 1EC2 |
| NVIDIA GeForce RTX 2070 SUPER | 1EC7 |
| NVIDIA GeForce RTX 2080 | 1ED0 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1025 132D |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08ED |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08EE |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1028 08EF |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8572 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8573 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8600 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 103C 8605 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1043 138F |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 1043 15C1 |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 17AA 3FEE |
| NVIDIA GeForce RTX 2080 with Max-Q Design | 1ED0 17AA 3FFE |
| NVIDIA GeForce RTX 2070 Super | 1ED1 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 1025 1432 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 103C 8746 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 103C 878A |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 1043 165F |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 144D C192 |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FCE |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FCF |
| NVIDIA GeForce RTX 2070 Super with Max-Q Design | 1ED1 17AA 3FD0 |
| NVIDIA GeForce RTX 2080 Super | 1ED3 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1025 1432 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1028 09D1 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 103C 8746 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 103C 878A |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1D61 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1E51 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 1043 1F01 |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FCE |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FCF |
| NVIDIA GeForce RTX 2080 Super with Max-Q Design | 1ED3 17AA 3FD0 |
| Quadro RTX 5000 | 1EF5 |
| NVIDIA GeForce RTX 2070 | 1F02 |
| NVIDIA GeForce RTX 2060 | 1F03 |
| NVIDIA GeForce RTX 2060 SUPER | 1F06 |
| NVIDIA GeForce RTX 2070 | 1F07 |
| NVIDIA GeForce RTX 2060 | 1F08 |
| NVIDIA GeForce GTX 1650 | 1F0A |
| NVIDIA CMP 40HX | 1F0B |
| NVIDIA GeForce RTX 2070 | 1F10 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 132D |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1025 1342 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08A1 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08A2 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EA |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EB |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EC |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08ED |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 08EF |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 093B |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1028 093C |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8572 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8573 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8602 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 103C 8606 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 132F |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 136F |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 1881 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1043 1E6E |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1658 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1663 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 1664 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 75A4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 75A5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 86A4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1458 86A5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1462 1274 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1462 1277 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1558 95E1 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1558 97E1 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2002 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2005 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 2007 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 3000 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1A58 3001 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 105E |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 1070 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 1D05 2087 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F10 8086 2087 |
| NVIDIA GeForce RTX 2060 | 1F11 |
| NVIDIA GeForce RTX 2060 | 1F12 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1028 098F |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 8741 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 8744 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 878E |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 103C 880E |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1043 1E11 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1043 1F11 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 1462 12D9 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3801 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3802 |
| NVIDIA GeForce RTX 2060 with Max-Q Design | 1F12 17AA 3803 |
| NVIDIA GeForce RTX 2070 | 1F14 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1401 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1432 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1442 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 1446 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1025 147D |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1028 09E2 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1028 09F3 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8607 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 86C6 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 86C7 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8736 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8738 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8746 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 8772 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 878A |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 878B |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 87A6 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 103C 87A7 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1043 174F |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 1512 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 75B5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 75B6 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 76B4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 76B5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 78A4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 78A5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 86B4 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1458 86B5 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12AE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12B0 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1462 12C6 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1558 50D3 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1558 70D1 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 200C |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 2011 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F14 1A58 3002 |
| NVIDIA GeForce RTX 2060 | 1F15 |
| Quadro RTX 3000 | 1F36 |
| Quadro RTX 3000 with Max-Q Design | 1F36 1028 0990 |
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8736 |
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8738 |
| Quadro RTX 3000 with Max-Q Design | 1F36 103C 8772 |
| Quadro RTX 3000 with Max-Q Design | 1F36 1043 13CF |
| Quadro RTX 3000 with Max-Q Design | 1F36 1414 0032 |
| NVIDIA GeForce RTX 2060 SUPER | 1F42 |
| NVIDIA GeForce RTX 2060 SUPER | 1F47 |
| NVIDIA GeForce RTX 2070 | 1F50 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08ED |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08EE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 1028 08EF |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8572 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8573 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8574 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8600 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 103C 8605 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 17AA 3FEE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F50 17AA 3FFE |
| NVIDIA GeForce RTX 2060 | 1F51 |
| NVIDIA GeForce RTX 2070 | 1F54 |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 103C 878A |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FCE |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FCF |
| NVIDIA GeForce RTX 2070 with Max-Q Design | 1F54 17AA 3FD0 |
| NVIDIA GeForce RTX 2060 | 1F55 |
| Quadro RTX 3000 | 1F76 |
| Matrox D-Series D2450 | 1F76 102B 2800 |
| Matrox D-Series D2480 | 1F76 102B 2900 |
| NVIDIA GeForce GTX 1650 | 1F82 |
| NVIDIA GeForce GTX 1630 | 1F83 |
| NVIDIA GeForce GTX 1650 | 1F91 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 863E |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 86E7 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 103C 86E8 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1043 12CF |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1043 156F |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1414 0032 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 144D C822 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 127E |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1281 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1284 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 1285 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1462 129C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 229F |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3802 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3806 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 17AA 3F1A |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F91 1A58 1001 |
| NVIDIA GeForce GTX 1650 Ti | 1F95 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 1479 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147A |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147B |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1025 147C |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 86E7 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 86E8 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 103C 8815 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1043 1DFF |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1043 1E1F |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 144D C838 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12BD |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12C5 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1462 12D2 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 22C0 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 22C1 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 3837 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 17AA 3F95 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1003 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1006 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1A58 1007 |
| NVIDIA GeForce GTX 1650 Ti with Max-Q Design | 1F95 1E83 3E30 |
| NVIDIA GeForce GTX 1650 | 1F96 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F96 1462 1297 |
| NVIDIA GeForce MX450 | 1F97 |
| NVIDIA GeForce MX450 | 1F98 |
| NVIDIA GeForce GTX 1650 | 1F99 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 1479 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147A |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147B |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1025 147C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 103C 8815 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 13B2 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 1402 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1043 1902 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12BD |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12C5 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1462 12D2 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 17AA 22DA |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 17AA 3F93 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F99 1E83 3E30 |
| NVIDIA GeForce MX450 | 1F9C |
| NVIDIA GeForce GTX 1650 | 1F9D |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 128D |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 130D |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 149C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 185C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1043 189C |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 12F4 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 1302 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 131B |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 1326 |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 132A |
| NVIDIA GeForce GTX 1650 with Max-Q Design | 1F9D 1462 132E |
| NVIDIA GeForce MX550 | 1F9F |
| NVIDIA GeForce MX550 | 1FA0 |
| NVIDIA T1000 | 1FB0 1028 12DB |
| NVIDIA T1000 | 1FB0 103C 12DB |
| NVIDIA T1000 | 1FB0 103C 8A80 |
| NVIDIA T1000 | 1FB0 10DE 12DB |
| NVIDIA DGX Display | 1FB0 10DE 1485 |
| NVIDIA T1000 | 1FB0 17AA 12DB |
| NVIDIA T600 | 1FB1 1028 1488 |
| NVIDIA T600 | 1FB1 103C 1488 |
| NVIDIA T600 | 1FB1 103C 8A80 |
| NVIDIA T600 | 1FB1 10DE 1488 |
| NVIDIA T600 | 1FB1 17AA 1488 |
| NVIDIA T400 | 1FB2 1028 1489 |
| NVIDIA T400 | 1FB2 103C 1489 |
| NVIDIA T400 | 1FB2 103C 8A80 |
| NVIDIA T400 | 1FB2 10DE 1489 |
| NVIDIA T400 | 1FB2 17AA 1489 |
| NVIDIA T600 Laptop GPU | 1FB6 |
| NVIDIA T550 Laptop GPU | 1FB7 |
| Quadro T2000 | 1FB8 |
| Quadro T2000 with Max-Q Design | 1FB8 1028 097E |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8736 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8738 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8772 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8780 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8782 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8783 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 8785 |
| Quadro T2000 with Max-Q Design | 1FB8 103C 87F0 |
| Quadro T2000 with Max-Q Design | 1FB8 1462 1281 |
| Quadro T2000 with Max-Q Design | 1FB8 1462 12BD |
| Quadro T2000 with Max-Q Design | 1FB8 17AA 22C0 |
| Quadro T2000 with Max-Q Design | 1FB8 17AA 22C1 |
| Quadro T1000 | 1FB9 |
| Quadro T1000 with Max-Q Design | 1FB9 1025 1479 |
| Quadro T1000 with Max-Q Design | 1FB9 1025 147A |
| Quadro T1000 with Max-Q Design | 1FB9 1025 147B |
| Quadro T1000 with Max-Q Design | 1FB9 1025 147C |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8736 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8738 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8772 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8780 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8782 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8783 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 8785 |
| Quadro T1000 with Max-Q Design | 1FB9 103C 87F0 |
| Quadro T1000 with Max-Q Design | 1FB9 1462 12BD |
| Quadro T1000 with Max-Q Design | 1FB9 17AA 22C0 |
| Quadro T1000 with Max-Q Design | 1FB9 17AA 22C1 |
| NVIDIA T600 Laptop GPU | 1FBA |
| NVIDIA T500 | 1FBB |
| NVIDIA T1200 Laptop GPU | 1FBC |
| NVIDIA GeForce GTX 1650 | 1FDD |
| NVIDIA T1000 8GB | 1FF0 1028 1612 |
| NVIDIA T1000 8GB | 1FF0 103C 1612 |
| NVIDIA T1000 8GB | 1FF0 103C 8A80 |
| NVIDIA T1000 8GB | 1FF0 10DE 1612 |
| NVIDIA T1000 8GB | 1FF0 17AA 1612 |
| NVIDIA T400 4GB | 1FF2 1028 1613 |
| NVIDIA T400 4GB | 1FF2 103C 1613 |
| NVIDIA T400E | 1FF2 103C 18FF |
| NVIDIA T400 4GB | 1FF2 103C 8A80 |
| NVIDIA T400 4GB | 1FF2 10DE 1613 |
| NVIDIA T400E | 1FF2 10DE 18FF |
| NVIDIA T400 4GB | 1FF2 17AA 1613 |
| NVIDIA T400E | 1FF2 17AA 18FF |
| Quadro T1000 | 1FF9 |
| NVIDIA A100-SXM4-40GB | 20B0 |
| NVIDIA A100-PG509-200 | 20B0 10DE 1450 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1463 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 147F |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1622 |
| NVIDIA A100-SXM4-80GB | 20B2 10DE 1623 |
| NVIDIA PG509-210 | 20B2 10DE 1625 |
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A7 |
| NVIDIA A100-SXM-64GB | 20B3 10DE 14A8 |
| NVIDIA A100 80GB PCIe | 20B5 10DE 1533 |
| NVIDIA A100 80GB PCIe | 20B5 10DE 1642 |
| NVIDIA PG506-232 | 20B6 10DE 1492 |
| NVIDIA A30 | 20B7 10DE 1532 |
| NVIDIA A30 | 20B7 10DE 1804 |
| NVIDIA A30 | 20B7 10DE 1852 |
| NVIDIA A800-SXM4-40GB | 20BD 10DE 17F4 |
| NVIDIA A100-PCIE-40GB | 20F1 10DE 145F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179B |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179C |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179D |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179E |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 179F |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A0 |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A1 |
| NVIDIA A800-SXM4-80GB | 20F3 10DE 17A2 |
| NVIDIA A800 80GB PCIe | 20F5 10DE 1799 |
| NVIDIA A800 80GB PCIe LC | 20F5 10DE 179A |
| NVIDIA A800 40GB Active | 20F6 1028 180A |
| NVIDIA A800 40GB Active | 20F6 103C 180A |
| NVIDIA A800 40GB Active | 20F6 10DE 180A |
| NVIDIA A800 40GB Active | 20F6 17AA 180A |
| NVIDIA AX800 | 20FD 10DE 17F8 |
| NVIDIA GeForce GTX 1660 Ti | 2182 |
| NVIDIA GeForce GTX 1660 | 2184 |
| NVIDIA GeForce GTX 1650 SUPER | 2187 |
| NVIDIA GeForce GTX 1650 | 2188 |
| NVIDIA CMP 30HX | 2189 |
| NVIDIA GeForce GTX 1660 Ti | 2191 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1028 0949 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FB |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 85FE |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 86D6 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 8741 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 8744 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 878D |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 87AF |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 103C 87B3 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 171F |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 17EF |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1043 18D1 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1414 0032 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 128A |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 128B |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12C6 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12CB |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12CC |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 1462 12D9 |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 380C |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 381D |
| NVIDIA GeForce GTX 1660 Ti with Max-Q Design | 2191 17AA 381E |
| NVIDIA GeForce GTX 1650 Ti | 2192 |
| NVIDIA GeForce GTX 1660 SUPER | 21C4 |
| NVIDIA GeForce GTX 1660 Ti | 21D1 |
| NVIDIA GeForce RTX 3090 Ti | 2203 |
| NVIDIA GeForce RTX 3090 | 2204 |
| NVIDIA GeForce RTX 3080 | 2206 |
| NVIDIA GeForce RTX 3070 Ti | 2207 |
| NVIDIA GeForce RTX 3080 Ti | 2208 |
| NVIDIA GeForce RTX 3080 | 220A |
| NVIDIA CMP 90HX | 220D |
| NVIDIA GeForce RTX 3080 | 2216 |
| NVIDIA RTX A6000 | 2230 1028 1459 |
| NVIDIA RTX A6000 | 2230 103C 1459 |
| NVIDIA RTX A6000 | 2230 10DE 1459 |
| NVIDIA RTX A6000 | 2230 17AA 1459 |
| NVIDIA RTX A5000 | 2231 1028 147E |
| NVIDIA RTX A5000 | 2231 103C 147E |
| NVIDIA RTX A5000 | 2231 10DE 147E |
| NVIDIA RTX A5000 | 2231 17AA 147E |
| NVIDIA RTX A4500 | 2232 1028 163C |
| NVIDIA RTX A4500 | 2232 103C 163C |
| NVIDIA RTX A4500 | 2232 10DE 163C |
| NVIDIA RTX A4500 | 2232 17AA 163C |
| NVIDIA RTX A5500 | 2233 1028 165A |
| NVIDIA RTX A5500 | 2233 103C 165A |
| NVIDIA RTX A5500 | 2233 10DE 165A |
| NVIDIA RTX A5500 | 2233 17AA 165A |
| NVIDIA A40 | 2235 10DE 145A |
| NVIDIA A10 | 2236 10DE 1482 |
| NVIDIA A10G | 2237 10DE 152F |
| NVIDIA A10M | 2238 10DE 1677 |
| NVIDIA H100 NVL | 2321 10DE 1839 |
| NVIDIA H800 PCIe | 2322 10DE 17A4 |
| NVIDIA H800 | 2324 10DE 17A6 |
| NVIDIA H800 | 2324 10DE 17A8 |
| NVIDIA H20 | 2329 10DE 198B |
| NVIDIA H20 | 2329 10DE 198C |
| NVIDIA H20-3e | 232C 10DE 2063 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C0 |
| NVIDIA H100 80GB HBM3 | 2330 10DE 16C1 |
| NVIDIA H100 PCIe | 2331 10DE 1626 |
| NVIDIA H200 | 2335 10DE 18BE |
| NVIDIA H200 | 2335 10DE 18BF |
| NVIDIA H100 | 2339 10DE 17FC |
| NVIDIA H800 NVL | 233A 10DE 183A |
| NVIDIA H200 NVL | 233B 10DE 1996 |
| NVIDIA GH200 120GB | 2342 10DE 16EB |
| NVIDIA GH200 120GB | 2342 10DE 1805 |
| NVIDIA GH200 480GB | 2342 10DE 1809 |
| NVIDIA GH200 144G HBM3e | 2348 10DE 18D2 |
| NVIDIA GeForce RTX 3060 Ti | 2414 |
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2420 |
| NVIDIA RTX A5500 Laptop GPU | 2438 |
| NVIDIA GeForce RTX 3080 Ti Laptop GPU | 2460 |
| NVIDIA GeForce RTX 3070 Ti | 2482 |
| NVIDIA GeForce RTX 3070 | 2484 |
| NVIDIA GeForce RTX 3060 Ti | 2486 |
| NVIDIA GeForce RTX 3060 | 2487 |
| NVIDIA GeForce RTX 3070 | 2488 |
| NVIDIA GeForce RTX 3060 Ti | 2489 |
| NVIDIA CMP 70HX | 248A |
| NVIDIA GeForce RTX 3080 Laptop GPU | 249C |
| NVIDIA GeForce RTX 3060 Laptop GPU | 249C 1D05 1194 |
| NVIDIA GeForce RTX 3070 Laptop GPU | 249D |
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24A0 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 24A0 1D05 1192 |
| NVIDIA RTX A4000 | 24B0 1028 14AD |
| NVIDIA RTX A4000 | 24B0 103C 14AD |
| NVIDIA RTX A4000 | 24B0 10DE 14AD |
| NVIDIA RTX A4000 | 24B0 17AA 14AD |
| NVIDIA RTX A4000H | 24B1 10DE 1658 |
| NVIDIA RTX A5000 Laptop GPU | 24B6 |
| NVIDIA RTX A4000 Laptop GPU | 24B7 |
| NVIDIA RTX A3000 Laptop GPU | 24B8 |
| NVIDIA RTX A3000 12GB Laptop GPU | 24B9 |
| NVIDIA RTX A4500 Laptop GPU | 24BA |
| NVIDIA RTX A3000 12GB Laptop GPU | 24BB |
| NVIDIA GeForce RTX 3060 | 24C7 |
| NVIDIA GeForce RTX 3060 Ti | 24C9 |
| NVIDIA GeForce RTX 3080 Laptop GPU | 24DC |
| NVIDIA GeForce RTX 3070 Laptop GPU | 24DD |
| NVIDIA GeForce RTX 3070 Ti Laptop GPU | 24E0 |
| NVIDIA RTX A4500 Embedded GPU | 24FA |
| NVIDIA GeForce RTX 3060 | 2503 |
| NVIDIA GeForce RTX 3060 | 2504 |
| NVIDIA GeForce RTX 3050 | 2507 |
| NVIDIA GeForce RTX 3050 OEM | 2508 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2520 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2521 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2523 |
| NVIDIA RTX A2000 | 2531 1028 151D |
| NVIDIA RTX A2000 | 2531 103C 151D |
| NVIDIA RTX A2000 | 2531 10DE 151D |
| NVIDIA RTX A2000 | 2531 17AA 151D |
| NVIDIA GeForce RTX 3060 | 2544 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 2560 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 2563 |
| NVIDIA RTX A2000 12GB | 2571 1028 1611 |
| NVIDIA RTX A2000 12GB | 2571 103C 1611 |
| NVIDIA RTX A2000 12GB | 2571 10DE 1611 |
| NVIDIA RTX A2000 12GB | 2571 17AA 1611 |
| NVIDIA GeForce RTX 3050 | 2582 |
| NVIDIA GeForce RTX 3050 | 2584 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A0 |
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 8928 |
| NVIDIA GeForce RTX 3050Ti Laptop GPU | 25A0 103C 89F9 |
| NVIDIA GeForce RTX 3060 Laptop GPU | 25A0 1D05 1196 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25A2 |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25A2 1028 0BAF |
| NVIDIA GeForce RTX 3060 Laptop GPU | 25A2 1D05 1195 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25A5 |
| NVIDIA GeForce MX570 | 25A6 |
| NVIDIA GeForce RTX 2050 | 25A7 |
| NVIDIA GeForce RTX 2050 | 25A9 |
| NVIDIA GeForce MX570 A | 25AA |
| NVIDIA GeForce RTX 3050 4GB Laptop GPU | 25AB |
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25AC |
| NVIDIA GeForce RTX 2050 | 25AD |
| NVIDIA RTX A1000 | 25B0 1028 1878 |
| NVIDIA RTX A1000 | 25B0 103C 1878 |
| NVIDIA RTX A1000 | 25B0 103C 8D96 |
| NVIDIA RTX A1000 | 25B0 10DE 1878 |
| NVIDIA RTX A1000 | 25B0 17AA 1878 |
| NVIDIA RTX A400 | 25B2 1028 1879 |
| NVIDIA RTX A400 | 25B2 103C 1879 |
| NVIDIA RTX A400 | 25B2 103C 8D95 |
| NVIDIA RTX A400 | 25B2 10DE 1879 |
| NVIDIA RTX A400 | 25B2 17AA 1879 |
| NVIDIA A16 | 25B6 10DE 14A9 |
| NVIDIA A2 | 25B6 10DE 157E |
| NVIDIA RTX A2000 Laptop GPU | 25B8 |
| NVIDIA RTX A1000 Laptop GPU | 25B9 |
| NVIDIA RTX A2000 8GB Laptop GPU | 25BA |
| NVIDIA RTX A500 Laptop GPU | 25BB |
| NVIDIA RTX A1000 6GB Laptop GPU | 25BC |
| NVIDIA RTX A500 Laptop GPU | 25BD |
| NVIDIA GeForce RTX 3050 Ti Laptop GPU | 25E0 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E2 |
| NVIDIA GeForce RTX 3050 Laptop GPU | 25E5 |
| NVIDIA GeForce RTX 3050 6GB Laptop GPU | 25EC |
| NVIDIA GeForce RTX 2050 | 25ED |
| NVIDIA RTX A1000 Embedded GPU | 25F9 |
| NVIDIA RTX A2000 Embedded GPU | 25FA |
| NVIDIA RTX A500 Embedded GPU | 25FB |
| NVIDIA GeForce RTX 4090 | 2684 |
| NVIDIA GeForce RTX 4090 D | 2685 |
| NVIDIA GeForce RTX 4070 Ti SUPER | 2689 |
| NVIDIA RTX 6000 Ada Generation | 26B1 1028 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 103C 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 10DE 16A1 |
| NVIDIA RTX 6000 Ada Generation | 26B1 17AA 16A1 |
| NVIDIA RTX 5000 Ada Generation | 26B2 1028 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 103C 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 10DE 17FA |
| NVIDIA RTX 5000 Ada Generation | 26B2 17AA 17FA |
| NVIDIA RTX 5880 Ada Generation | 26B3 1028 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 103C 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 10DE 1934 |
| NVIDIA RTX 5880 Ada Generation | 26B3 17AA 1934 |
| NVIDIA L40 | 26B5 10DE 169D |
| NVIDIA L40 | 26B5 10DE 17DA |
| NVIDIA L40S | 26B9 10DE 1851 |
| NVIDIA L40S | 26B9 10DE 18CF |
| NVIDIA L20 | 26BA 10DE 1957 |
| NVIDIA L20 | 26BA 10DE 1990 |
| NVIDIA GeForce RTX 4080 SUPER | 2702 |
| NVIDIA GeForce RTX 4080 | 2704 |
| NVIDIA GeForce RTX 4070 Ti SUPER | 2705 |
| NVIDIA GeForce RTX 4070 | 2709 |
| NVIDIA GeForce RTX 4090 Laptop GPU | 2717 |
| NVIDIA RTX 5000 Ada Generation Laptop GPU | 2730 |
| NVIDIA GeForce RTX 4090 Laptop GPU | 2757 |
| NVIDIA RTX 5000 Ada Generation Embedded GPU | 2770 |
| NVIDIA GeForce RTX 4070 Ti | 2782 |
| NVIDIA GeForce RTX 4070 SUPER | 2783 |
| NVIDIA GeForce RTX 4070 | 2786 |
| NVIDIA GeForce RTX 4060 Ti | 2788 |
| NVIDIA GeForce RTX 4080 Laptop GPU | 27A0 |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 1028 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 103C 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 10DE 16FA |
| NVIDIA RTX 4000 SFF Ada Generation | 27B0 17AA 16FA |
| NVIDIA RTX 4500 Ada Generation | 27B1 1028 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 103C 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 10DE 180C |
| NVIDIA RTX 4500 Ada Generation | 27B1 17AA 180C |
| NVIDIA RTX 4000 Ada Generation | 27B2 1028 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 103C 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 10DE 181B |
| NVIDIA RTX 4000 Ada Generation | 27B2 17AA 181B |
| NVIDIA L2 | 27B6 10DE 1933 |
| NVIDIA L4 | 27B8 10DE 16CA |
| NVIDIA L4 | 27B8 10DE 16EE |
| NVIDIA RTX 4000 Ada Generation Laptop GPU | 27BA |
| NVIDIA RTX 3500 Ada Generation Laptop GPU | 27BB |
| NVIDIA GeForce RTX 4080 Laptop GPU | 27E0 |
| NVIDIA RTX 3500 Ada Generation Embedded GPU | 27FB |
| NVIDIA GeForce RTX 4060 Ti | 2803 |
| NVIDIA GeForce RTX 4060 Ti | 2805 |
| NVIDIA GeForce RTX 4060 | 2808 |
| NVIDIA GeForce RTX 4070 Laptop GPU | 2820 |
| NVIDIA GeForce RTX 3050 A Laptop GPU | 2822 |
| NVIDIA RTX 3000 Ada Generation Laptop GPU | 2838 |
| NVIDIA GeForce RTX 4070 Laptop GPU | 2860 |
| NVIDIA GeForce RTX 4060 | 2882 |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28A0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28A1 |
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28A3 |
| NVIDIA RTX 2000 Ada Generation | 28B0 1028 1870 |
| NVIDIA RTX 2000 Ada Generation | 28B0 103C 1870 |
| NVIDIA RTX 2000E Ada Generation | 28B0 103C 1871 |
| NVIDIA RTX 2000 Ada Generation | 28B0 10DE 1870 |
| NVIDIA RTX 2000E Ada Generation | 28B0 10DE 1871 |
| NVIDIA RTX 2000 Ada Generation | 28B0 17AA 1870 |
| NVIDIA RTX 2000E Ada Generation | 28B0 17AA 1871 |
| NVIDIA RTX 2000 Ada Generation Laptop GPU | 28B8 |
| NVIDIA RTX 1000 Ada Generation Laptop GPU | 28B9 |
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BA |
| NVIDIA RTX 500 Ada Generation Laptop GPU | 28BB |
| NVIDIA GeForce RTX 4060 Laptop GPU | 28E0 |
| NVIDIA GeForce RTX 4050 Laptop GPU | 28E1 |
| NVIDIA GeForce RTX 3050 A Laptop GPU | 28E3 |
| NVIDIA RTX 2000 Ada Generation Embedded GPU | 28F8 |
| NVIDIA B200 | 2901 10DE 1999 |
| NVIDIA B200 | 2901 10DE 199B |
| NVIDIA B200 | 2901 10DE 20DA |
| NVIDIA HGX GB200 | 2941 10DE 2046 |
| NVIDIA HGX GB200 | 2941 10DE 20CA |
| NVIDIA HGX GB200 | 2941 10DE 20D5 |
| NVIDIA HGX GB200 | 2941 10DE 21C9 |
| NVIDIA HGX GB200 | 2941 10DE 21CA |
| NVIDIA GeForce RTX 5090 | 2B85 |
| NVIDIA GeForce RTX 5090 D | 2B87 |
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 1028 204B |
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 103C 204B |
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 10DE 204B |
| NVIDIA RTX PRO 6000 Blackwell Workstation Edition | 2BB1 17AA 204B |
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 1028 204C |
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 103C 204C |
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 10DE 204C |
| NVIDIA RTX PRO 6000 Blackwell Max-Q Workstation Edition | 2BB4 17AA 204C |
| NVIDIA GeForce RTX 5080 | 2C02 |
| NVIDIA GeForce RTX 5070 Ti | 2C05 |
| NVIDIA GeForce RTX 5090 Laptop GPU | 2C18 |
| NVIDIA GeForce RTX 5080 Laptop GPU | 2C19 |
| NVIDIA GeForce RTX 5090 Laptop GPU | 2C58 |
| NVIDIA GeForce RTX 5080 Laptop GPU | 2C59 |
| NVIDIA GeForce RTX 5070 | 2F04 |
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F18 |
| NVIDIA GeForce RTX 5070 Ti Laptop GPU | 2F58 |

View File

@ -86,7 +86,7 @@ EXTRA_CFLAGS += -I$(src)/common/inc
EXTRA_CFLAGS += -I$(src)
EXTRA_CFLAGS += -Wall $(DEFINES) $(INCLUDES) -Wno-cast-qual -Wno-format-extra-args
EXTRA_CFLAGS += -D__KERNEL__ -DMODULE -DNVRM
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.124.06\"
EXTRA_CFLAGS += -DNV_VERSION_STRING=\"570.133.07\"
ifneq ($(SYSSRCHOST1X),)
EXTRA_CFLAGS += -I$(SYSSRCHOST1X)

View File

@ -498,6 +498,9 @@ typedef struct nv_state_t
NvU32 dispIsoStreamId;
NvU32 dispNisoStreamId;
} iommus;
/* Console is managed by drm drivers or NVKMS */
NvBool client_managed_console;
} nv_state_t;
#define NVFP_TYPE_NONE 0x0
@ -542,9 +545,9 @@ typedef struct UvmGpuNvlinkInfo_tag *nvgpuNvlinkInfo_t;
typedef struct UvmGpuEccInfo_tag *nvgpuEccInfo_t;
typedef struct UvmGpuFaultInfo_tag *nvgpuFaultInfo_t;
typedef struct UvmGpuAccessCntrInfo_tag *nvgpuAccessCntrInfo_t;
typedef struct UvmGpuAccessCntrConfig_tag *nvgpuAccessCntrConfig_t;
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
typedef struct UvmGpuAccessCntrConfig_tag nvgpuAccessCntrConfig_t;
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
typedef struct UvmPmaAllocationOptions_tag *nvgpuPmaAllocationOptions_t;
typedef struct UvmPmaStatistics_tag *nvgpuPmaStatistics_t;
typedef struct UvmGpuMemoryInfo_tag *nvgpuMemoryInfo_t;

View File

@ -1056,7 +1056,7 @@ NV_STATUS nvUvmInterfaceDestroyAccessCntrInfo(uvmGpuDeviceHandle device,
*/
NV_STATUS nvUvmInterfaceEnableAccessCntr(uvmGpuDeviceHandle device,
UvmGpuAccessCntrInfo *pAccessCntrInfo,
UvmGpuAccessCntrConfig *pAccessCntrConfig);
const UvmGpuAccessCntrConfig *pAccessCntrConfig);
/*******************************************************************************
nvUvmInterfaceDisableAccessCntr

View File

@ -1103,24 +1103,9 @@ typedef enum
UVM_ACCESS_COUNTER_GRANULARITY_16G = 4,
} UVM_ACCESS_COUNTER_GRANULARITY;
typedef enum
{
UVM_ACCESS_COUNTER_USE_LIMIT_NONE = 1,
UVM_ACCESS_COUNTER_USE_LIMIT_QTR = 2,
UVM_ACCESS_COUNTER_USE_LIMIT_HALF = 3,
UVM_ACCESS_COUNTER_USE_LIMIT_FULL = 4,
} UVM_ACCESS_COUNTER_USE_LIMIT;
typedef struct UvmGpuAccessCntrConfig_tag
{
NvU32 mimcGranularity;
NvU32 momcGranularity;
NvU32 mimcUseLimit;
NvU32 momcUseLimit;
NvU32 granularity;
NvU32 threshold;
} UvmGpuAccessCntrConfig;

View File

@ -159,6 +159,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabri
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR, 0x00000086, "An error occurred while trying to retire a resource")
// Warnings:
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")

View File

@ -81,7 +81,7 @@ NV_STATUS NV_API_CALL rm_gpu_ops_has_pending_non_replayable_faults(nvidia_stack
NV_STATUS NV_API_CALL rm_gpu_ops_init_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_destroy_access_cntr_info(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_own_access_cntr_intr(nvidia_stack_t *, nvgpuSessionHandle_t, nvgpuAccessCntrInfo_t, NvBool);
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, nvgpuAccessCntrConfig_t);
NV_STATUS NV_API_CALL rm_gpu_ops_enable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t, const nvgpuAccessCntrConfig_t *);
NV_STATUS NV_API_CALL rm_gpu_ops_disable_access_cntr(nvidia_stack_t *, nvgpuDeviceHandle_t, nvgpuAccessCntrInfo_t);
NV_STATUS NV_API_CALL rm_gpu_ops_set_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t, NvU64, unsigned, NvBool, NvU32);
NV_STATUS NV_API_CALL rm_gpu_ops_unset_page_directory (nvidia_stack_t *, nvgpuAddressSpaceHandle_t);

View File

@ -5289,6 +5289,45 @@ compile_test() {
compile_check_conftest "$CODE" "NV_FOLLOW_PFN_PRESENT" "" "functions"
;;
follow_pte_arg_vma)
#
# Determine if the first argument of follow_pte is
# mm_struct or vm_area_struct.
#
# The first argument was changed from mm_struct to vm_area_struct by
# commit 29ae7d96d166 ("mm: pass VMA instead of MM to follow_pte()")
#
CODE="
#include <linux/mm.h>
typeof(follow_pte) conftest_follow_pte_has_vma_arg;
int conftest_follow_pte_has_vma_arg(struct vm_area_struct *vma,
unsigned long address,
pte_t **ptep,
spinlock_t **ptl) {
return 0;
}"
compile_check_conftest "$CODE" "NV_FOLLOW_PTE_ARG1_VMA" "" "types"
;;
ptep_get)
#
# Determine if ptep_get() is present.
#
# ptep_get() was added by commit 481e980a7c19
# ("mm: Allow arches to provide ptep_get()")
#
CODE="
#include <linux/mm.h>
void conftest_ptep_get(void) {
ptep_get();
}"
compile_check_conftest "$CODE" "NV_PTEP_GET_PRESENT" "" "functions"
;;
drm_plane_atomic_check_has_atomic_state_arg)
#
# Determine if drm_plane_helper_funcs::atomic_check takes 'state'

View File

@ -59,7 +59,6 @@ NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_host.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_mmu.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_fault_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_volta_access_counter_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_access_counter_buffer.c
NVIDIA_UVM_SOURCES += nvidia-uvm/uvm_turing_fault_buffer.c

View File

@ -240,7 +240,7 @@ static void uvm_release_deferred(void *data)
// Since this function is only scheduled to run when uvm_release() fails
// to trylock-acquire the pm.lock, the following acquisition attempt
// is expected to block this thread, and cause it to remain blocked until
// uvm_resume() releases the lock. As a result, the deferred release
// uvm_resume() releases the lock. As a result, the deferred release
// kthread queue may stall for long periods of time.
uvm_down_read(&g_uvm_global.pm.lock);
@ -292,14 +292,14 @@ static int uvm_release(struct inode *inode, struct file *filp)
// Because the kernel discards the status code returned from this release
// callback, early exit in case of a pm.lock acquisition failure is not
// an option. Instead, the teardown work normally performed synchronously
// an option. Instead, the teardown work normally performed synchronously
// needs to be scheduled to run after uvm_resume() releases the lock.
if (uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
uvm_va_space_destroy(va_space);
uvm_up_read(&g_uvm_global.pm.lock);
}
else {
// Remove references to this inode from the address_space. This isn't
// Remove references to this inode from the address_space. This isn't
// strictly necessary, as any CPU mappings of this file have already
// been destroyed, and va_space->mapping won't be used again. Still,
// the va_space survives the inode if its destruction is deferred, in
@ -867,8 +867,8 @@ static int uvm_mmap(struct file *filp, struct vm_area_struct *vma)
}
// If the PM lock cannot be acquired, disable the VMA and report success
// to the caller. The caller is expected to determine whether the
// map operation succeeded via an ioctl() call. This is necessary to
// to the caller. The caller is expected to determine whether the
// map operation succeeded via an ioctl() call. This is necessary to
// safely handle MAP_FIXED, which needs to complete atomically to prevent
// the loss of the virtual address range.
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock)) {
@ -1233,19 +1233,8 @@ static int uvm_init(void)
goto error;
}
pr_info("Loaded the UVM driver, major device number %d.\n", MAJOR(g_uvm_base_dev));
if (uvm_enable_builtin_tests)
pr_info("Built-in UVM tests are enabled. This is a security risk.\n");
// After Open RM is released, both the enclosing "#if" and this comment
// block should be removed, because the uvm_hmm_is_enabled_system_wide()
// check is both necessary and sufficient for reporting functionality.
// Until that time, however, we need to avoid advertisting UVM's ability to
// enable HMM functionality.
if (uvm_hmm_is_enabled_system_wide())
UVM_INFO_PRINT("HMM (Heterogeneous Memory Management) is enabled in the UVM driver.\n");
UVM_INFO_PRINT("Built-in UVM tests are enabled. This is a security risk.\n");
return 0;
@ -1274,8 +1263,6 @@ static void uvm_exit(void)
uvm_global_exit();
uvm_test_unload_state_exit();
pr_info("Unloaded the UVM driver.\n");
}
static void __exit uvm_exit_entry(void)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2024 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -38,12 +38,10 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_ada_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) *
8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Ada covers 128 TB and that's the minimum size
@ -82,8 +80,6 @@ void uvm_hal_ada_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2018-2024 NVIDIA Corporation
Copyright (c) 2018-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -38,12 +38,10 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_ampere_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Ampere covers 128 TB and that's the minimum
@ -86,8 +84,6 @@ void uvm_hal_ampere_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2023 NVIDIA Corporation
Copyright (c) 2024-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -139,9 +139,9 @@ static void flush_tlb_va_region(uvm_gpu_va_space_t *gpu_va_space,
uvm_ats_fault_invalidate_t *ats_invalidate;
if (client_type == UVM_FAULT_CLIENT_TYPE_GPC)
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.replayable.ats_invalidate;
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.replayable.ats_invalidate;
else
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer_info.non_replayable.ats_invalidate;
ats_invalidate = &gpu_va_space->gpu->parent->fault_buffer.non_replayable.ats_invalidate;
if (!ats_invalidate->tlb_batch_pending) {
uvm_tlb_batch_begin(&gpu_va_space->page_tables, &ats_invalidate->tlb_batch);

View File

@ -38,12 +38,10 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_blackwell_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Blackwell covers 64 PB and that's the minimum
@ -85,8 +83,6 @@ void uvm_hal_blackwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -110,16 +110,22 @@ typedef enum
bool uvm_channel_pool_is_p2p(uvm_channel_pool_t *pool)
{
uvm_channel_manager_t *manager = pool->manager;
uvm_gpu_t *gpu = manager->gpu;
uvm_gpu_id_t id;
if (manager->pool_to_use.default_for_type[UVM_CHANNEL_TYPE_GPU_TO_GPU] == pool)
return true;
for_each_gpu_id_in_mask(id, &manager->gpu->peer_info.peer_gpu_mask) {
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool)
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
if (manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(id)] == pool) {
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return true;
}
}
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return false;
}
@ -1974,6 +1980,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
{
uvm_channel_pool_t *pool;
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu = channel_manager->gpu;
uvm_gpu_id_t gpu_id;
DECLARE_BITMAP(suspended_pools, UVM_COPY_ENGINE_COUNT_MAX);
@ -1981,7 +1988,9 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
// Use bitmap to track which were suspended.
bitmap_zero(suspended_pools, channel_manager->num_channel_pools);
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
if (pool && !test_bit(uvm_channel_pool_index_in_channel_manager(pool), suspended_pools)) {
status = channel_pool_suspend_p2p(pool);
@ -2014,6 +2023,7 @@ NV_STATUS uvm_channel_manager_suspend_p2p(uvm_channel_manager_t *channel_manager
void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
{
uvm_channel_pool_t *pool;
uvm_gpu_t *gpu = channel_manager->gpu;
uvm_gpu_id_t gpu_id;
DECLARE_BITMAP(resumed_pools, UVM_COPY_ENGINE_COUNT_MAX);
@ -2021,7 +2031,9 @@ void uvm_channel_manager_resume_p2p(uvm_channel_manager_t *channel_manager)
// Use bitmap to track which were suspended.
bitmap_zero(resumed_pools, channel_manager->num_channel_pools);
for_each_gpu_id_in_mask(gpu_id, &channel_manager->gpu->peer_info.peer_gpu_mask) {
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
for_each_gpu_id_in_mask(gpu_id, &gpu->peer_info.peer_gpu_mask) {
pool = channel_manager->pool_to_use.gpu_to_gpu[uvm_id_gpu_index(gpu_id)];
if (pool && !test_and_set_bit(uvm_channel_pool_index_in_channel_manager(pool), resumed_pools))
channel_pool_resume_p2p(pool);
@ -3243,9 +3255,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
manager->conf.num_gpfifo_entries = UVM_CHANNEL_NUM_GPFIFO_ENTRIES_DEFAULT;
if (manager->conf.num_gpfifo_entries != uvm_channel_num_gpfifo_entries) {
pr_info("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
uvm_channel_num_gpfifo_entries,
manager->conf.num_gpfifo_entries);
UVM_INFO_PRINT("Invalid value for uvm_channel_num_gpfifo_entries = %u, using %u instead\n",
uvm_channel_num_gpfifo_entries,
manager->conf.num_gpfifo_entries);
}
// 2- Allocation locations
@ -3285,9 +3297,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
pushbuffer_loc_value = uvm_channel_pushbuffer_loc;
if (!is_string_valid_location(pushbuffer_loc_value)) {
pushbuffer_loc_value = UVM_CHANNEL_PUSHBUFFER_LOC_DEFAULT;
pr_info("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
uvm_channel_pushbuffer_loc,
pushbuffer_loc_value);
UVM_INFO_PRINT("Invalid value for uvm_channel_pushbuffer_loc = %s, using %s instead\n",
uvm_channel_pushbuffer_loc,
pushbuffer_loc_value);
}
// Override the default value if requested by the user
@ -3297,8 +3309,8 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
// so force the location to sys for now.
// TODO: Bug 2904133: Remove the following "if" after the bug is fixed.
if (NVCPU_IS_AARCH64) {
pr_info("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
pushbuffer_loc_value);
UVM_INFO_PRINT("uvm_channel_pushbuffer_loc = %s is not supported on AARCH64, using sys instead\n",
pushbuffer_loc_value);
manager->conf.pushbuffer_loc = UVM_BUFFER_LOCATION_SYS;
}
else {
@ -3310,8 +3322,9 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
// Only support the knobs for GPFIFO/GPPut on Volta+
if (!gpu->parent->gpfifo_in_vidmem_supported) {
if (manager->conf.gpput_loc == UVM_BUFFER_LOCATION_SYS) {
pr_info("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s instead\n",
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
UVM_INFO_PRINT("CAUTION: allocating GPPut in sysmem is NOT supported and may crash the system, using %s "
"instead\n",
buffer_location_to_string(UVM_BUFFER_LOCATION_DEFAULT));
}
manager->conf.gpfifo_loc = UVM_BUFFER_LOCATION_DEFAULT;
@ -3323,17 +3336,17 @@ static void init_channel_manager_conf(uvm_channel_manager_t *manager)
gpfifo_loc_value = uvm_channel_gpfifo_loc;
if (!is_string_valid_location(gpfifo_loc_value)) {
gpfifo_loc_value = UVM_CHANNEL_GPFIFO_LOC_DEFAULT;
pr_info("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
uvm_channel_gpfifo_loc,
gpfifo_loc_value);
UVM_INFO_PRINT("Invalid value for uvm_channel_gpfifo_loc = %s, using %s instead\n",
uvm_channel_gpfifo_loc,
gpfifo_loc_value);
}
gpput_loc_value = uvm_channel_gpput_loc;
if (!is_string_valid_location(gpput_loc_value)) {
gpput_loc_value = UVM_CHANNEL_GPPUT_LOC_DEFAULT;
pr_info("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
uvm_channel_gpput_loc,
gpput_loc_value);
UVM_INFO_PRINT("Invalid value for uvm_channel_gpput_loc = %s, using %s instead\n",
uvm_channel_gpput_loc,
gpput_loc_value);
}
// On coherent platforms where the GPU does not cache sysmem but the CPU

View File

@ -57,6 +57,7 @@ enum {
// NULL.
void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
// Long prefix - typically for debugging and tests.
#define UVM_PRINT_FUNC_PREFIX(func, prefix, fmt, ...) \
func(prefix "%s:%u %s[pid:%d]" fmt, \
kbasename(__FILE__), \
@ -65,10 +66,15 @@ void uvm_uuid_string(char *buffer, const NvProcessorUuid *uuid);
current->pid, \
##__VA_ARGS__)
// Short prefix - typically for information.
#define UVM_PRINT_FUNC_SHORT_PREFIX(func, prefix, fmt, ...) \
func(prefix fmt, ##__VA_ARGS__)
// No prefix - used by kernel panic messages.
#define UVM_PRINT_FUNC(func, fmt, ...) \
UVM_PRINT_FUNC_PREFIX(func, "", fmt, ##__VA_ARGS__)
// Check whether UVM_{ERR,DBG,INFO)_PRINT* should be enabled
// Check whether UVM_{ERR,DBG)_PRINT* should be enabled.
bool uvm_debug_prints_enabled(void);
// A printing helper like UVM_PRINT_FUNC_PREFIX that only prints if
@ -80,10 +86,10 @@ bool uvm_debug_prints_enabled(void);
} \
} while (0)
#define UVM_ASSERT_PRINT(fmt, ...) \
#define UVM_ERR_PRINT_ALWAYS(fmt, ...) \
UVM_PRINT_FUNC_PREFIX(printk, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ASSERT_PRINT_RL(fmt, ...) \
#define UVM_ERR_PRINT_ALWAYS_RL(fmt, ...) \
UVM_PRINT_FUNC_PREFIX(printk_ratelimited, KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ERR_PRINT(fmt, ...) \
@ -95,13 +101,16 @@ bool uvm_debug_prints_enabled(void);
#define UVM_DBG_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_DBG_PRINT_RL(fmt, ...) \
#define UVM_DBG_PRINT_RL(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk_ratelimited, KERN_DEBUG NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
// UVM_INFO_PRINT prints in all modes (including in the release mode.) It is
// used for relaying driver-level information, rather than detailed debugging
// information; therefore, it does not add the "pretty long prefix".
#define UVM_INFO_PRINT(fmt, ...) \
UVM_PRINT_FUNC_PREFIX_CHECK(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
UVM_PRINT_FUNC_SHORT_PREFIX(printk, KERN_INFO NVIDIA_UVM_PRETTY_PRINTING_PREFIX, " " fmt, ##__VA_ARGS__)
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
#define UVM_ERR_PRINT_NV_STATUS(msg, rmStatus, ...) \
UVM_ERR_PRINT("ERROR: %s : " msg "\n", nvstatusToString(rmStatus), ##__VA_ARGS__)
#define UVM_PANIC() UVM_PRINT_FUNC(panic, "\n")
@ -134,13 +143,13 @@ void on_uvm_test_fail(void);
// Unlike on_uvm_test_fail it provides 'panic' coverity semantics
void on_uvm_assert(void);
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
do { \
if (unlikely(!(expr))) { \
UVM_ASSERT_PRINT("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
dump_stack(); \
on_uvm_assert(); \
} \
#define _UVM_ASSERT_MSG(expr, cond, fmt, ...) \
do { \
if (unlikely(!(expr))) { \
UVM_ERR_PRINT_ALWAYS("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
dump_stack(); \
on_uvm_assert(); \
} \
} while (0)
// Prevent function calls in expr and the print argument list from being
@ -151,7 +160,8 @@ void on_uvm_assert(void);
UVM_NO_PRINT(fmt, ##__VA_ARGS__); \
} while (0)
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity builds
// UVM_ASSERT and UVM_ASSERT_MSG are only enabled on non-release and Coverity
// builds.
#if UVM_IS_DEBUG() || defined __COVERITY__
#define UVM_ASSERT_MSG(expr, fmt, ...) _UVM_ASSERT_MSG(expr, #expr, ": " fmt, ##__VA_ARGS__)
#define UVM_ASSERT(expr) _UVM_ASSERT_MSG(expr, #expr, "\n")
@ -174,16 +184,16 @@ extern bool uvm_release_asserts_set_global_error_for_tests;
// Given these are enabled for release builds, we need to be more cautious than
// in UVM_ASSERT(). Use a ratelimited print and only dump the stack if a module
// param is enabled.
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
do { \
if (uvm_release_asserts && unlikely(!(expr))) { \
UVM_ASSERT_PRINT_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
if (uvm_release_asserts_dump_stack) \
dump_stack(); \
on_uvm_assert(); \
} \
#define _UVM_ASSERT_MSG_RELEASE(expr, cond, fmt, ...) \
do { \
if (uvm_release_asserts && unlikely(!(expr))) { \
UVM_ERR_PRINT_ALWAYS_RL("Assert failed, condition %s not true" fmt, cond, ##__VA_ARGS__); \
if (uvm_release_asserts_set_global_error || uvm_release_asserts_set_global_error_for_tests) \
uvm_global_set_fatal_error(NV_ERR_INVALID_STATE); \
if (uvm_release_asserts_dump_stack) \
dump_stack(); \
on_uvm_assert(); \
} \
} while (0)
#define UVM_ASSERT_MSG_RELEASE(expr, fmt, ...) _UVM_ASSERT_MSG_RELEASE(expr, #expr, ": " fmt, ##__VA_ARGS__)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2024 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -532,7 +532,7 @@ NV_STATUS uvm_conf_computing_fault_decrypt(uvm_parent_gpu_t *parent_gpu,
{
NV_STATUS status;
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
// There is no dedicated lock for the CSL context associated with replayable
// faults. The mutual exclusion required by the RM CSL API is enforced by
@ -571,7 +571,7 @@ void uvm_conf_computing_fault_increment_decrypt_iv(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status;
NvU32 fault_entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
UvmCslContext *csl_context = &parent_gpu->fault_buffer_info.rm_info.replayable.cslCtx;
UvmCslContext *csl_context = &parent_gpu->fault_buffer.rm_info.replayable.cslCtx;
// See comment in uvm_conf_computing_fault_decrypt
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -93,11 +93,11 @@ typedef struct uvm_service_block_context_struct uvm_service_block_context_t;
typedef struct uvm_ats_fault_invalidate_struct uvm_ats_fault_invalidate_t;
typedef struct uvm_replayable_fault_buffer_info_struct uvm_replayable_fault_buffer_info_t;
typedef struct uvm_non_replayable_fault_buffer_info_struct uvm_non_replayable_fault_buffer_info_t;
typedef struct uvm_replayable_fault_buffer_struct uvm_replayable_fault_buffer_t;
typedef struct uvm_non_replayable_fault_buffer_struct uvm_non_replayable_fault_buffer_t;
typedef struct uvm_access_counter_buffer_entry_struct uvm_access_counter_buffer_entry_t;
typedef struct uvm_access_counter_buffer_struct uvm_access_counter_buffer_t;
typedef struct uvm_access_counter_service_batch_context_struct uvm_access_counter_service_batch_context_t;
typedef struct uvm_pmm_sysmem_mappings_struct uvm_pmm_sysmem_mappings_t;
typedef struct uvm_reverse_map_struct uvm_reverse_map_t;

View File

@ -194,6 +194,12 @@ NV_STATUS uvm_global_init(void)
goto error;
}
status = uvm_access_counters_init();
if (status != NV_OK) {
UVM_ERR_PRINT("uvm_access_counters_init failed: %s\n", nvstatusToString(status));
goto error;
}
// This sets up the ISR (interrupt service routine), by hooking into RM's
// top-half ISR callback. As soon as this call completes, GPU interrupts
// will start arriving, so it's important to be prepared to receive
@ -224,8 +230,8 @@ void uvm_global_exit(void)
nv_kthread_q_stop(&g_uvm_global.deferred_release_q);
uvm_unregister_callbacks();
uvm_access_counters_exit();
uvm_service_block_context_exit();
uvm_perf_heuristics_exit();
uvm_perf_events_exit();
uvm_migrate_exit();
@ -287,7 +293,7 @@ static NV_STATUS uvm_suspend(void)
// * Flush relevant kthread queues (bottom half, etc.)
// Some locks acquired by this function, such as pm.lock, are released
// by uvm_resume(). This is contrary to the lock tracking code's
// by uvm_resume(). This is contrary to the lock tracking code's
// expectations, so lock tracking is disabled.
uvm_thread_context_lock_disable_tracking();
@ -304,7 +310,7 @@ static NV_STATUS uvm_suspend(void)
gpu = uvm_gpu_get(gpu_id);
// Since fault buffer state may be lost across sleep cycles, UVM must
// ensure any outstanding replayable faults are dismissed. The RM
// ensure any outstanding replayable faults are dismissed. The RM
// guarantees that all user channels have been preempted before
// uvm_suspend() is called, which implies that no user channels can be
// stalled on faults when this point is reached.
@ -330,7 +336,7 @@ static NV_STATUS uvm_suspend(void)
}
// Acquire each VA space's lock in write mode to lock out VMA open and
// release callbacks. These entry points do not have feasible early exit
// release callbacks. These entry points do not have feasible early exit
// options, and so aren't suitable for synchronization with pm.lock.
uvm_mutex_lock(&g_uvm_global.va_spaces.lock);
@ -360,7 +366,7 @@ static NV_STATUS uvm_resume(void)
g_uvm_global.pm.is_suspended = false;
// Some locks released by this function, such as pm.lock, were acquired
// by uvm_suspend(). This is contrary to the lock tracking code's
// by uvm_suspend(). This is contrary to the lock tracking code's
// expectations, so lock tracking is disabled.
uvm_thread_context_lock_disable_tracking();
@ -392,7 +398,7 @@ static NV_STATUS uvm_resume(void)
uvm_thread_context_lock_enable_tracking();
// Force completion of any release callbacks successfully queued for
// deferred completion while suspended. The deferred release
// deferred completion while suspended. The deferred release
// queue is not guaranteed to remain empty following this flush since
// some threads that failed to acquire pm.lock in uvm_release() may
// not have scheduled their handlers yet.
@ -424,7 +430,8 @@ void uvm_global_set_fatal_error_impl(NV_STATUS error)
}
else {
UVM_ERR_PRINT("Encountered a global fatal error: %s after a global error has been already set: %s\n",
nvstatusToString(error), nvstatusToString(previous_error));
nvstatusToString(error),
nvstatusToString(previous_error));
}
nvUvmInterfaceReportFatalError(error);

View File

@ -538,7 +538,9 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
NvU64 num_pages_in;
NvU64 num_pages_out;
NvU64 mapped_cpu_pages_size;
NvU32 get, put;
NvU32 get;
NvU32 put;
NvU32 i;
unsigned int cpu;
UVM_SEQ_OR_DBG_PRINT(s, "GPU %s\n", uvm_gpu_name(gpu));
@ -608,19 +610,19 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu->parent->isr.replayable_faults.stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_buffer_entries %u\n",
gpu->parent->fault_buffer_info.replayable.max_faults);
gpu->parent->fault_buffer.replayable.max_faults);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_get %u\n",
gpu->parent->fault_buffer_info.replayable.cached_get);
gpu->parent->fault_buffer.replayable.cached_get);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_cached_put %u\n",
gpu->parent->fault_buffer_info.replayable.cached_put);
gpu->parent->fault_buffer.replayable.cached_put);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_get %u\n",
gpu->parent->fault_buffer_hal->read_get(gpu->parent));
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_put %u\n",
gpu->parent->fault_buffer_hal->read_put(gpu->parent));
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_fault_batch_size %u\n",
gpu->parent->fault_buffer_info.max_batch_size);
gpu->parent->fault_buffer.max_batch_size);
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_replay_policy %s\n",
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer_info.replayable.replay_policy));
uvm_perf_fault_replay_policy_string(gpu->parent->fault_buffer.replayable.replay_policy));
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults_num_faults %llu\n",
gpu->parent->stats.num_replayable_faults);
}
@ -634,32 +636,35 @@ static void gpu_info_print_common(uvm_gpu_t *gpu, struct seq_file *s)
gpu->parent->isr.non_replayable_faults.stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_buffer_entries %u\n",
gpu->parent->fault_buffer_info.non_replayable.max_faults);
gpu->parent->fault_buffer.non_replayable.max_faults);
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults_num_faults %llu\n",
gpu->parent->stats.num_non_replayable_faults);
}
if (gpu->parent->isr.access_counters.handling_ref_count > 0) {
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh %llu\n",
gpu->parent->isr.access_counters.stats.bottom_half_count);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_bh/cpu\n");
for_each_cpu(cpu, &gpu->parent->isr.access_counters.stats.cpus_used_mask) {
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
cpu,
gpu->parent->isr.access_counters.stats.cpu_exec_count[cpu]);
for (i = 0; i < gpu_info->accessCntrBufferCount; i++) {
if (gpu->parent->access_counters_supported && gpu->parent->isr.access_counters[i].handling_ref_count > 0) {
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_notif_buffer_index %u\n", i);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh %llu\n",
gpu->parent->isr.access_counters[i].stats.bottom_half_count);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_bh/cpu\n");
for_each_cpu(cpu, &gpu->parent->isr.access_counters[i].stats.cpus_used_mask) {
UVM_SEQ_OR_DBG_PRINT(s, " cpu%02u %llu\n",
cpu,
gpu->parent->isr.access_counters[i].stats.cpu_exec_count[cpu]);
}
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_buffer_entries %u\n",
gpu->parent->access_counter_buffer[i].max_notifications);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_get %u\n",
gpu->parent->access_counter_buffer[i].cached_get);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_cached_put %u\n",
gpu->parent->access_counter_buffer[i].cached_put);
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferGet);
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer[i].rm_info.pAccessCntrBufferPut);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_get %u\n", get);
UVM_SEQ_OR_DBG_PRINT(s, " access_counters_put %u\n", put);
}
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_buffer_entries %u\n",
gpu->parent->access_counter_buffer_info.max_notifications);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_get %u\n",
gpu->parent->access_counter_buffer_info.cached_get);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_cached_put %u\n",
gpu->parent->access_counter_buffer_info.cached_put);
get = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferGet);
put = UVM_GPU_READ_ONCE(*gpu->parent->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_get %u\n", get);
UVM_SEQ_OR_DBG_PRINT(s, "access_counters_put %u\n", put);
}
num_pages_out = atomic64_read(&gpu->parent->stats.num_pages_out);
@ -694,18 +699,18 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
UVM_SEQ_OR_DBG_PRINT(s, "replayable_faults %llu\n", parent_gpu->stats.num_replayable_faults);
UVM_SEQ_OR_DBG_PRINT(s, "duplicates %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults);
parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults);
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
UVM_SEQ_OR_DBG_PRINT(s, " prefetch %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults);
parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults);
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_read_faults);
parent_gpu->fault_buffer.replayable.stats.num_read_faults);
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_write_faults);
parent_gpu->fault_buffer.replayable.stats.num_write_faults);
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.replayable.stats.num_pages_in);
parent_gpu->fault_buffer.replayable.stats.num_atomic_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.replayable.stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@ -713,25 +718,25 @@ gpu_fault_stats_print_common(uvm_parent_gpu_t *parent_gpu, struct seq_file *s)
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
UVM_SEQ_OR_DBG_PRINT(s, "replays:\n");
UVM_SEQ_OR_DBG_PRINT(s, " start %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_replays);
parent_gpu->fault_buffer.replayable.stats.num_replays);
UVM_SEQ_OR_DBG_PRINT(s, " start_ack_all %llu\n",
parent_gpu->fault_buffer_info.replayable.stats.num_replays_ack_all);
parent_gpu->fault_buffer.replayable.stats.num_replays_ack_all);
UVM_SEQ_OR_DBG_PRINT(s, "non_replayable_faults %llu\n", parent_gpu->stats.num_non_replayable_faults);
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_access_type:\n");
UVM_SEQ_OR_DBG_PRINT(s, " read %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_read_faults);
UVM_SEQ_OR_DBG_PRINT(s, " write %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_write_faults);
UVM_SEQ_OR_DBG_PRINT(s, " atomic %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults);
UVM_SEQ_OR_DBG_PRINT(s, "faults_by_addressing:\n");
UVM_SEQ_OR_DBG_PRINT(s, " virtual %llu\n",
parent_gpu->stats.num_non_replayable_faults -
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
UVM_SEQ_OR_DBG_PRINT(s, " physical %llu\n",
parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer_info.non_replayable.stats.num_pages_in);
parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults);
num_pages_out = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->fault_buffer.non_replayable.stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
@ -743,16 +748,25 @@ static void gpu_access_counters_print_common(uvm_parent_gpu_t *parent_gpu, struc
{
NvU64 num_pages_in;
NvU64 num_pages_out;
NvU32 i;
UVM_ASSERT(uvm_procfs_is_debug_enabled());
num_pages_out = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_out);
num_pages_in = atomic64_read(&parent_gpu->access_counter_buffer_info.stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations:\n");
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
// procfs_files are created before gpu_init_isr, we need to check if the
// access_counter_buffer is allocated.
if (parent_gpu->access_counter_buffer) {
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++) {
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[i];
num_pages_out = atomic64_read(&access_counters->stats.num_pages_out);
num_pages_in = atomic64_read(&access_counters->stats.num_pages_in);
UVM_SEQ_OR_DBG_PRINT(s, "migrations - buffer index %u:\n", i);
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_in %llu (%llu MB)\n", num_pages_in,
(num_pages_in * (NvU64)PAGE_SIZE) / (1024u * 1024u));
UVM_SEQ_OR_DBG_PRINT(s, " num_pages_out %llu (%llu MB)\n", num_pages_out,
(num_pages_out * (NvU64)PAGE_SIZE) / (1024u * 1024u));
}
}
}
// This function converts an index of 2D array of size [N x N] into an index
@ -892,7 +906,7 @@ static int nv_procfs_read_gpu_info(struct seq_file *s, void *v)
uvm_gpu_t *gpu = (uvm_gpu_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
return -EAGAIN;
gpu_info_print_common(gpu, s);
@ -911,7 +925,7 @@ static int nv_procfs_read_gpu_fault_stats(struct seq_file *s, void *v)
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
return -EAGAIN;
gpu_fault_stats_print_common(parent_gpu, s);
@ -930,7 +944,7 @@ static int nv_procfs_read_gpu_access_counters(struct seq_file *s, void *v)
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)s->private;
if (!uvm_down_read_trylock(&g_uvm_global.pm.lock))
return -EAGAIN;
return -EAGAIN;
gpu_access_counters_print_common(parent_gpu, s);
@ -1182,7 +1196,7 @@ static NV_STATUS alloc_parent_gpu(const NvProcessorUuid *gpu_uuid,
uvm_uuid_copy(&parent_gpu->uuid, gpu_uuid);
uvm_sema_init(&parent_gpu->isr.replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_sema_init(&parent_gpu->isr.non_replayable_faults.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_sema_init(&parent_gpu->isr.access_counters.service_lock, 1, UVM_LOCK_ORDER_ISR);
uvm_mutex_init(&parent_gpu->access_counters_enablement_lock, UVM_LOCK_ORDER_ACCESS_COUNTERS);
uvm_spin_lock_irqsave_init(&parent_gpu->isr.interrupts_lock, UVM_LOCK_ORDER_LEAF);
uvm_spin_lock_init(&parent_gpu->instance_ptr_table_lock, UVM_LOCK_ORDER_LEAF);
uvm_rb_tree_init(&parent_gpu->instance_ptr_table);
@ -1221,7 +1235,7 @@ static uvm_gpu_t *alloc_gpu(uvm_parent_gpu_t *parent_gpu, uvm_gpu_id_t gpu_id)
// Initialize enough of the gpu struct for remove_gpu to be called
gpu->magic = UVM_GPU_MAGIC_VALUE;
uvm_spin_lock_init(&gpu->peer_info.peer_gpus_lock, UVM_LOCK_ORDER_LEAF);
uvm_spin_lock_init(&gpu->peer_info.peer_gpu_lock, UVM_LOCK_ORDER_LEAF);
sub_processor_index = uvm_id_sub_processor_index(gpu_id);
parent_gpu->gpus[sub_processor_index] = gpu;
@ -1545,12 +1559,6 @@ static NV_STATUS init_gpu(uvm_gpu_t *gpu, const UvmGpuInfo *gpu_info)
return status;
}
status = uvm_pmm_sysmem_mappings_init(gpu, &gpu->pmm_reverse_sysmem_mappings);
if (status != NV_OK) {
UVM_ERR_PRINT("CPU PMM MMIO initialization failed: %s, GPU %s\n", nvstatusToString(status), uvm_gpu_name(gpu));
return status;
}
uvm_pmm_gpu_device_p2p_init(gpu);
status = init_semaphore_pools(gpu);
@ -1616,7 +1624,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
// trackers.
if (sync_replay_tracker) {
uvm_parent_gpu_replayable_faults_isr_lock(parent_gpu);
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.replayable.replay_tracker);
status = uvm_tracker_wait(&parent_gpu->fault_buffer.replayable.replay_tracker);
uvm_parent_gpu_replayable_faults_isr_unlock(parent_gpu);
if (status != NV_OK)
@ -1627,7 +1635,7 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
// VA block trackers, too.
if (sync_clear_faulted_tracker) {
uvm_parent_gpu_non_replayable_faults_isr_lock(parent_gpu);
status = uvm_tracker_wait(&parent_gpu->fault_buffer_info.non_replayable.clear_faulted_tracker);
status = uvm_tracker_wait(&parent_gpu->fault_buffer.non_replayable.clear_faulted_tracker);
uvm_parent_gpu_non_replayable_faults_isr_unlock(parent_gpu);
if (status != NV_OK)
@ -1635,13 +1643,20 @@ static void sync_parent_gpu_trackers(uvm_parent_gpu_t *parent_gpu,
}
// Sync the access counter clear tracker too.
if (parent_gpu->access_counters_supported) {
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
status = uvm_tracker_wait(&parent_gpu->access_counter_buffer_info.clear_tracker);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
if (parent_gpu->access_counters_supported && parent_gpu->access_counter_buffer) {
NvU32 notif_buf_index;
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = &parent_gpu->access_counter_buffer[notif_buf_index];
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
if (access_counters->rm_info.accessCntrBufferHandle != 0) {
uvm_access_counters_isr_lock(access_counters);
status = uvm_tracker_wait(&access_counters->clear_tracker);
uvm_access_counters_isr_unlock(access_counters);
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
}
}
}
}
@ -1680,15 +1695,11 @@ static void deinit_parent_gpu(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->instance_ptr_table));
UVM_ASSERT(uvm_rb_tree_empty(&parent_gpu->tsg_table));
// Access counters should have been disabled when the GPU is no longer
// registered in any VA space.
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
deinit_parent_procfs_files(parent_gpu);
// Return ownership to RM
uvm_parent_gpu_deinit_isr(parent_gpu);
deinit_parent_procfs_files(parent_gpu);
uvm_pmm_devmem_deinit(parent_gpu);
uvm_ats_remove_gpu(parent_gpu);
@ -1746,8 +1757,6 @@ static void deinit_gpu(uvm_gpu_t *gpu)
uvm_pmm_gpu_device_p2p_deinit(gpu);
uvm_pmm_sysmem_mappings_deinit(&gpu->pmm_reverse_sysmem_mappings);
uvm_pmm_gpu_deinit(&gpu->pmm);
if (gpu->rm_address_space != 0)
@ -1794,14 +1803,14 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
switch (fault_entry->fault_access_type)
{
case UVM_FAULT_ACCESS_TYPE_READ:
++parent_gpu->fault_buffer_info.non_replayable.stats.num_read_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_read_faults;
break;
case UVM_FAULT_ACCESS_TYPE_WRITE:
++parent_gpu->fault_buffer_info.non_replayable.stats.num_write_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_write_faults;
break;
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
++parent_gpu->fault_buffer_info.non_replayable.stats.num_atomic_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_atomic_faults;
break;
default:
UVM_ASSERT_MSG(false, "Invalid access type for non-replayable faults\n");
@ -1809,7 +1818,7 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
}
if (!fault_entry->is_virtual)
++parent_gpu->fault_buffer_info.non_replayable.stats.num_physical_faults;
++parent_gpu->fault_buffer.non_replayable.stats.num_physical_faults;
++parent_gpu->stats.num_non_replayable_faults;
@ -1821,23 +1830,23 @@ static void update_stats_parent_gpu_fault_instance(uvm_parent_gpu_t *parent_gpu,
switch (fault_entry->fault_access_type)
{
case UVM_FAULT_ACCESS_TYPE_PREFETCH:
++parent_gpu->fault_buffer_info.replayable.stats.num_prefetch_faults;
++parent_gpu->fault_buffer.replayable.stats.num_prefetch_faults;
break;
case UVM_FAULT_ACCESS_TYPE_READ:
++parent_gpu->fault_buffer_info.replayable.stats.num_read_faults;
++parent_gpu->fault_buffer.replayable.stats.num_read_faults;
break;
case UVM_FAULT_ACCESS_TYPE_WRITE:
++parent_gpu->fault_buffer_info.replayable.stats.num_write_faults;
++parent_gpu->fault_buffer.replayable.stats.num_write_faults;
break;
case UVM_FAULT_ACCESS_TYPE_ATOMIC_WEAK:
case UVM_FAULT_ACCESS_TYPE_ATOMIC_STRONG:
++parent_gpu->fault_buffer_info.replayable.stats.num_atomic_faults;
++parent_gpu->fault_buffer.replayable.stats.num_atomic_faults;
break;
default:
break;
}
if (is_duplicate || fault_entry->filtered)
++parent_gpu->fault_buffer_info.replayable.stats.num_duplicate_faults;
++parent_gpu->fault_buffer.replayable.stats.num_duplicate_faults;
++parent_gpu->stats.num_replayable_faults;
}
@ -1901,21 +1910,29 @@ static void update_stats_migration_cb(uvm_perf_event_t event_id, uvm_perf_event_
if (gpu_dst) {
atomic64_add(pages, &gpu_dst->parent->stats.num_pages_in);
if (is_replayable_fault)
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.replayable.stats.num_pages_in);
else if (is_non_replayable_fault)
atomic64_add(pages, &gpu_dst->parent->fault_buffer_info.non_replayable.stats.num_pages_in);
else if (is_access_counter)
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer_info.stats.num_pages_in);
if (is_replayable_fault) {
atomic64_add(pages, &gpu_dst->parent->fault_buffer.replayable.stats.num_pages_in);
}
else if (is_non_replayable_fault) {
atomic64_add(pages, &gpu_dst->parent->fault_buffer.non_replayable.stats.num_pages_in);
}
else if (is_access_counter) {
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
atomic64_add(pages, &gpu_dst->parent->access_counter_buffer[index].stats.num_pages_in);
}
}
if (gpu_src) {
atomic64_add(pages, &gpu_src->parent->stats.num_pages_out);
if (is_replayable_fault)
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.replayable.stats.num_pages_out);
else if (is_non_replayable_fault)
atomic64_add(pages, &gpu_src->parent->fault_buffer_info.non_replayable.stats.num_pages_out);
else if (is_access_counter)
atomic64_add(pages, &gpu_src->parent->access_counter_buffer_info.stats.num_pages_out);
if (is_replayable_fault) {
atomic64_add(pages, &gpu_src->parent->fault_buffer.replayable.stats.num_pages_out);
}
else if (is_non_replayable_fault) {
atomic64_add(pages, &gpu_src->parent->fault_buffer.non_replayable.stats.num_pages_out);
}
else if (is_access_counter) {
NvU32 index = event_data->migration.make_resident_context->access_counters_buffer_index;
atomic64_add(pages, &gpu_src->parent->access_counter_buffer[index].stats.num_pages_out);
}
}
}
@ -1929,8 +1946,9 @@ static void uvm_param_conf(void)
}
else {
if (strcmp(uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL) != 0) {
pr_info("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
uvm_peer_copy, UVM_PARAM_PEER_COPY_PHYSICAL);
UVM_INFO_PRINT("Invalid value for uvm_peer_copy = %s, using %s instead.\n",
uvm_peer_copy,
UVM_PARAM_PEER_COPY_PHYSICAL);
}
g_uvm_global.peer_copy_mode = UVM_GPU_PEER_COPY_MODE_PHYSICAL;
@ -2397,6 +2415,7 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
{
NV_STATUS status;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(peer_caps->ref_count == 0);
status = parent_peers_retain(gpu0->parent, gpu1->parent);
@ -2419,25 +2438,13 @@ static NV_STATUS peers_init(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *pe
UVM_ASSERT(uvm_gpu_get(gpu0->id) == gpu0);
UVM_ASSERT(uvm_gpu_get(gpu1->id) == gpu1);
// In the case of NVLINK peers, this initialization will happen during
// add_gpu. As soon as the peer info table is assigned below, the access
// counter bottom half could start operating on the GPU being newly
// added and inspecting the peer caps, so all of the appropriate
// initialization must happen before this point.
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
uvm_processor_mask_set(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
UVM_ASSERT(gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] == NULL);
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = gpu1;
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
uvm_processor_mask_set(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
UVM_ASSERT(gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] == NULL);
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = gpu0;
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
return NV_OK;
@ -2465,18 +2472,18 @@ static NV_STATUS peers_retain(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1)
static void peers_destroy(uvm_gpu_t *gpu0, uvm_gpu_t *gpu1, uvm_gpu_peer_t *peer_caps)
{
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
uvm_mmu_destroy_peer_identity_mappings(gpu0, gpu1);
uvm_mmu_destroy_peer_identity_mappings(gpu1, gpu0);
uvm_spin_lock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu0->peer_info.peer_gpu_lock);
uvm_processor_mask_clear(&gpu0->peer_info.peer_gpu_mask, gpu1->id);
gpu0->peer_info.peer_gpus[uvm_id_gpu_index(gpu1->id)] = NULL;
uvm_spin_unlock(&gpu0->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu0->peer_info.peer_gpu_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_lock(&gpu1->peer_info.peer_gpu_lock);
uvm_processor_mask_clear(&gpu1->peer_info.peer_gpu_mask, gpu0->id);
gpu1->peer_info.peer_gpus[uvm_id_gpu_index(gpu0->id)] = NULL;
uvm_spin_unlock(&gpu1->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu1->peer_info.peer_gpu_lock);
// Flush the access counter buffer to avoid getting stale notifications for
// accesses to GPUs to which peer access is being disabled. This is also
@ -2690,7 +2697,7 @@ static void remove_gpu(uvm_gpu_t *gpu)
uvm_processor_mask_clear(&g_uvm_global.retained_gpus, gpu->id);
// If the parent is being freed, stop scheduling new bottom halves and
// update relevant software state. Else flush any pending bottom halves
// update relevant software state. Else flush any pending bottom halves
// before continuing.
if (free_parent)
uvm_parent_gpu_disable_isr(parent_gpu);
@ -2713,6 +2720,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
const UvmGpuInfo *gpu_info,
const UvmGpuPlatformInfo *gpu_platform_info,
uvm_parent_gpu_t *parent_gpu,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out)
{
NV_STATUS status;
@ -2725,6 +2733,9 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
status = alloc_parent_gpu(gpu_uuid, uvm_parent_gpu_id_from_gpu_id(gpu_id), &parent_gpu);
if (status != NV_OK)
return status;
if (uvm_enable_builtin_tests)
parent_gpu->test = *parent_gpu_error;
}
gpu = alloc_gpu(parent_gpu, gpu_id);
@ -2794,7 +2805,7 @@ static NV_STATUS add_gpu(const NvProcessorUuid *gpu_uuid,
// Clear the interrupt bit and force the re-evaluation of the interrupt
// condition to ensure that we don't miss any pending interrupt
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
parent_gpu->fault_buffer_info.replayable.cached_get);
parent_gpu->fault_buffer.replayable.cached_get);
}
// Access counters are enabled on demand
@ -2837,6 +2848,7 @@ error:
// the partition.
static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out)
{
NV_STATUS status = NV_OK;
@ -2888,7 +2900,7 @@ static NV_STATUS gpu_retain_by_uuid_locked(const NvProcessorUuid *gpu_uuid,
if (status != NV_OK)
goto error_unregister;
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, &gpu);
status = add_gpu(gpu_uuid, gpu_id, gpu_info, &gpu_platform_info, parent_gpu, parent_gpu_error, &gpu);
if (status != NV_OK)
goto error_unregister;
}
@ -2913,11 +2925,12 @@ error_free_gpu_info:
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out)
{
NV_STATUS status;
uvm_mutex_lock(&g_uvm_global.global_lock);
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, gpu_out);
status = gpu_retain_by_uuid_locked(gpu_uuid, user_rm_device, parent_gpu_error, gpu_out);
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
@ -3072,60 +3085,63 @@ bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address)
return (address.address >= gpu->parent->peer_va_base &&
address.address < (gpu->parent->peer_va_base + gpu->parent->peer_va_size));
}
} else {
}
else {
uvm_parent_gpu_t *parent_gpu;
phys_addr_t phys_addr;
if (uvm_aperture_is_peer(address.aperture)) {
bool is_peer = true;
uvm_parent_processor_mask_t parent_gpus;
uvm_parent_gpu_t *parent_peer_gpu;
// Local EGM accesses don't go over NVLINK
if (gpu->parent->egm.enabled && address.aperture == gpu->parent->egm.local_peer_id)
return false;
// EGM uses peer IDs but they are different from VIDMEM peer IDs.
// Check if the address aperture is an EGM aperture.
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
uvm_parent_gpus_from_processor_mask(&parent_gpus, &gpu->peer_info.peer_gpu_mask);
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
for_each_parent_gpu_in_mask(parent_peer_gpu, &parent_gpus) {
uvm_aperture_t egm_peer_aperture;
if (!parent_peer_gpu->egm.enabled)
continue;
egm_peer_aperture = uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu);
if (address.aperture == egm_peer_aperture) {
is_peer = false;
break;
}
// EGM uses peer IDs but they are different from VIDMEM peer
// IDs.
// Check if the address aperture is an EGM aperture.
// We should not use remote EGM addresses internally until
// NVLINK STO handling is updated to handle EGM.
// TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
// when accessing EGM memory
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
// systems
UVM_ASSERT(address.aperture != uvm_gpu_egm_peer_aperture(gpu->parent, parent_peer_gpu));
}
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return true;
} else if (address.aperture == UVM_APERTURE_SYS) {
bool is_peer = false;
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
// either inline, or via ATS.
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
// Exposed coherent vidmem can be accessed via sys aperture
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
for_each_parent_gpu(parent_gpu) {
if (parent_gpu == gpu->parent)
continue;
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
phys_addr <= parent_gpu->system_bus.memory_window_end) {
is_peer = true;
}
}
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
return is_peer;
}
if (address.aperture != UVM_APERTURE_SYS)
return false;
// GPU uses DMA addresses, which might be translated by IOMMU/SMMU,
// either inline, or via ATS.
phys_addr = dma_to_phys(&gpu->parent->pci_dev->dev, (dma_addr_t)address.address);
// Exposed coherent vidmem can be accessed via sys aperture
uvm_spin_lock_irqsave(&g_uvm_global.gpu_table_lock);
for_each_parent_gpu(parent_gpu) {
if (parent_gpu == gpu->parent)
continue;
if (phys_addr >= parent_gpu->system_bus.memory_window_start &&
phys_addr <= parent_gpu->system_bus.memory_window_end) {
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
return true;
}
}
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
UVM_ASSERT(address.aperture == UVM_APERTURE_VID);
}
return false;
@ -3141,49 +3157,6 @@ uvm_aperture_t uvm_get_page_tree_location(const uvm_parent_gpu_t *parent_gpu)
return UVM_APERTURE_DEFAULT;
}
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr)
{
uvm_processor_id_t id = UVM_ID_INVALID;
// TODO: Bug 1899622: On P9 systems with multiple CPU sockets, SYS aperture
// is also reported for accesses to remote GPUs connected to a different CPU
// NUMA domain. We will need to determine the actual processor id using the
// reported physical address.
if (addr.aperture == UVM_APERTURE_SYS)
return UVM_ID_CPU;
else if (addr.aperture == UVM_APERTURE_VID)
return gpu->id;
uvm_spin_lock(&gpu->peer_info.peer_gpus_lock);
for_each_gpu_id_in_mask(id, &gpu->peer_info.peer_gpu_mask) {
uvm_gpu_t *other_gpu = gpu->peer_info.peer_gpus[uvm_id_gpu_index(id)];
UVM_ASSERT(other_gpu);
UVM_ASSERT(!uvm_gpus_are_smc_peers(gpu, other_gpu));
if (uvm_parent_gpus_are_nvswitch_connected(gpu->parent, other_gpu->parent)) {
// NVSWITCH connected systems use an extended physical address to
// map to peers. Find the physical memory 'slot' containing the
// given physical address to find the peer gpu that owns the
// physical address
NvU64 fabric_window_end = other_gpu->parent->nvswitch_info.fabric_memory_window_start +
other_gpu->mem_info.max_allocatable_address;
if (other_gpu->parent->nvswitch_info.fabric_memory_window_start <= addr.address &&
fabric_window_end >= addr.address)
break;
}
else if (uvm_gpu_peer_aperture(gpu, other_gpu) == addr.aperture) {
break;
}
}
uvm_spin_unlock(&gpu->peer_info.peer_gpus_lock);
return id;
}
static NvU64 instance_ptr_to_key(uvm_gpu_phys_address_t instance_ptr)
{
NvU64 key;
@ -3570,20 +3543,19 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
*out_va_space = NULL;
*out_gpu = NULL;
UVM_ASSERT(entry->address.is_virtual);
uvm_spin_lock(&parent_gpu->instance_ptr_table_lock);
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->virtual_info.instance_ptr);
user_channel = instance_ptr_to_user_channel(parent_gpu, entry->instance_ptr);
if (!user_channel) {
status = NV_ERR_INVALID_CHANNEL;
goto exit_unlock;
}
if (!user_channel->in_subctx) {
UVM_ASSERT_MSG(entry->virtual_info.ve_id == 0,
UVM_ASSERT_MSG(entry->ve_id == 0,
"Access counter packet contains SubCTX %u for channel not in subctx\n",
entry->virtual_info.ve_id);
entry->ve_id);
gpu_va_space = user_channel->gpu_va_space;
UVM_ASSERT(uvm_gpu_va_space_state(gpu_va_space) == UVM_GPU_VA_SPACE_STATE_ACTIVE);
@ -3591,7 +3563,7 @@ NV_STATUS uvm_parent_gpu_access_counter_entry_to_va_space(uvm_parent_gpu_t *pare
*out_gpu = gpu_va_space->gpu;
}
else {
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->virtual_info.ve_id);
gpu_va_space = user_channel_and_subctx_to_gpu_va_space(user_channel, entry->ve_id);
if (gpu_va_space) {
*out_va_space = gpu_va_space->va_space;
*out_gpu = gpu_va_space->gpu;

View File

@ -189,6 +189,9 @@ struct uvm_service_block_context_struct
// Prefetch temporary state.
uvm_perf_prefetch_bitmap_tree_t prefetch_bitmap_tree;
// Access counters notification buffer index.
NvU32 access_counters_buffer_index;
};
typedef struct
@ -197,8 +200,8 @@ typedef struct
{
struct
{
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned region
// of a SAM VMA. Used for batching ATS faults in a vma.
// Mask of prefetch faulted pages in a UVM_VA_BLOCK_SIZE aligned
// region of a SAM VMA. Used for batching ATS faults in a vma.
uvm_page_mask_t prefetch_only_fault_mask;
// Mask of read faulted pages in a UVM_VA_BLOCK_SIZE aligned region
@ -350,7 +353,7 @@ typedef struct
// entries from the GPU buffer
NvU32 max_batch_size;
struct uvm_replayable_fault_buffer_info_struct
struct uvm_replayable_fault_buffer_struct
{
// Maximum number of faults entries that can be stored in the buffer
NvU32 max_faults;
@ -414,7 +417,7 @@ typedef struct
uvm_ats_fault_invalidate_t ats_invalidate;
} replayable;
struct uvm_non_replayable_fault_buffer_info_struct
struct uvm_non_replayable_fault_buffer_struct
{
// Maximum number of faults entries that can be stored in the buffer
NvU32 max_faults;
@ -468,7 +471,7 @@ typedef struct
// Timestamp when prefetch faults where disabled last time
NvU64 disable_prefetch_faults_timestamp;
} uvm_fault_buffer_info_t;
} uvm_fault_buffer_t;
struct uvm_access_counter_service_batch_context_struct
{
@ -476,30 +479,14 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 num_cached_notifications;
struct
{
uvm_access_counter_buffer_entry_t **notifications;
uvm_access_counter_buffer_entry_t **notifications;
NvU32 num_notifications;
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by instance_ptr if we
// determine at fetch time that all the access counter notifications in
// the batch report the same instance_ptr
bool is_single_instance_ptr;
} virt;
struct
{
uvm_access_counter_buffer_entry_t **notifications;
uvm_reverse_map_t *translations;
NvU32 num_notifications;
// Boolean used to avoid sorting the fault batch by aperture if we
// determine at fetch time that all the access counter notifications in
// the batch report the same aperture
bool is_single_aperture;
} phys;
// Boolean used to avoid sorting the fault batch by instance_ptr if we
// determine at fetch time that all the access counter notifications in
// the batch report the same instance_ptr
bool is_single_instance_ptr;
// Helper page mask to compute the accessed pages within a VA block
uvm_page_mask_t accessed_pages;
@ -514,31 +501,15 @@ struct uvm_access_counter_service_batch_context_struct
NvU32 batch_id;
};
typedef struct
struct uvm_access_counter_buffer_struct
{
// Values used to configure access counters in RM
struct
{
UVM_ACCESS_COUNTER_GRANULARITY granularity;
UVM_ACCESS_COUNTER_USE_LIMIT use_limit;
} rm;
uvm_parent_gpu_t *parent_gpu;
// The following values are precomputed by the access counter notification
// handling code. See comments for UVM_MAX_TRANSLATION_SIZE in
// uvm_gpu_access_counters.c for more details.
NvU64 translation_size;
NvU64 translations_per_counter;
NvU64 sub_granularity_region_size;
NvU64 sub_granularity_regions_per_translation;
} uvm_gpu_access_counter_type_config_t;
typedef struct
{
UvmGpuAccessCntrInfo rm_info;
// Access counters may have multiple notification buffers.
NvU32 index;
NvU32 max_notifications;
NvU32 max_batch_size;
@ -560,10 +531,22 @@ typedef struct
// may override it to try different configuration values.
struct
{
uvm_gpu_access_counter_type_config_t mimc;
uvm_gpu_access_counter_type_config_t momc;
// Values used to configure access counters in RM
struct
{
UVM_ACCESS_COUNTER_GRANULARITY granularity;
} rm;
NvU32 threshold;
// The following values are precomputed by the access counter
// notification handling code. See comments for UVM_MAX_TRANSLATION_SIZE
// in uvm_gpu_access_counters.c for more details.
NvU64 translation_size;
NvU64 sub_granularity_region_size;
NvU64 sub_granularity_regions_per_translation;
NvU32 threshold;
} current_config;
// Access counter statistics
@ -575,7 +558,7 @@ typedef struct
} stats;
// Ignoring access counters means that notifications are left in the HW
// buffer without being serviced. Requests to ignore access counters
// buffer without being serviced. Requests to ignore access counters
// are counted since the suspend path inhibits access counter interrupts,
// and the resume path needs to know whether to reenable them.
NvU32 notifications_ignored_count;
@ -583,13 +566,25 @@ typedef struct
// Context structure used to service a GPU access counter batch
uvm_access_counter_service_batch_context_t batch_service_context;
// VA space that reconfigured the access counters configuration, if any.
// Used in builtin tests only, to avoid reconfigurations from different
// processes
//
// Locking: both readers and writers must hold the access counters ISR lock
uvm_va_space_t *reconfiguration_owner;
} uvm_access_counter_buffer_info_t;
struct
{
// VA space that reconfigured the access counters configuration, if any.
// Used in builtin tests only, to avoid reconfigurations from different
// processes.
//
// Locking: both readers and writers must hold the access counters ISR
// lock.
uvm_va_space_t *reconfiguration_owner;
// The service access counters loop breaks after processing the first
// batch. It will be retriggered if there are pending notifications, but
// it releases the ISR service lock to check certain races that would be
// difficult to hit otherwise.
bool one_iteration_per_batch;
NvU32 sleep_per_iteration_us;
} test;
};
typedef struct
{
@ -745,15 +740,11 @@ struct uvm_gpu_struct
struct
{
// Mask of peer_gpus set
// Mask of peer_gpus set.
uvm_processor_mask_t peer_gpu_mask;
// lazily-populated array of peer GPUs, indexed by the peer's GPU index
uvm_gpu_t *peer_gpus[UVM_ID_MAX_GPUS];
// Leaf spinlock used to synchronize access to the peer_gpus table so
// that it can be safely accessed from the access counters bottom half
uvm_spinlock_t peer_gpus_lock;
// Leaf spinlock used to synchronize access to peer_gpu_mask.
uvm_spinlock_t peer_gpu_lock;
} peer_info;
// Maximum number of subcontexts supported
@ -828,14 +819,6 @@ struct uvm_gpu_struct
uvm_bit_locks_t bitlocks;
} sysmem_mappings;
// Reverse lookup table used to query the user mapping associated with a
// sysmem (DMA) physical address.
//
// The system memory mapping information referred to by this field is
// different from that of sysmem_mappings, because it relates to user
// mappings (instead of kernel), and it is used in most configurations.
uvm_pmm_sysmem_mappings_t pmm_reverse_sysmem_mappings;
struct
{
uvm_conf_computing_dma_buffer_pool_t dma_buffer_pool;
@ -957,6 +940,16 @@ struct uvm_gpu_struct
uvm_mutex_t device_p2p_lock;
};
typedef struct
{
bool access_counters_alloc_buffer;
bool access_counters_alloc_block_context;
bool isr_access_counters_alloc;
bool isr_access_counters_alloc_stats_cpu;
bool access_counters_batch_context_notifications;
bool access_counters_batch_context_notification_cache;
} uvm_test_parent_gpu_inject_error_t;
// In order to support SMC/MIG GPU partitions, we split UVM GPUs into two
// parts: parent GPUs (uvm_parent_gpu_t) which represent unique PCIe devices
// (including VFs), and sub/child GPUs (uvm_gpu_t) which represent individual
@ -965,8 +958,8 @@ struct uvm_gpu_struct
struct uvm_parent_gpu_struct
{
// Reference count for how many places are holding on to a parent GPU
// (internal to the UVM driver). This includes any GPUs we know about, not
// just GPUs that are registered with a VA space. Most GPUs end up being
// (internal to the UVM driver). This includes any GPUs we know about, not
// just GPUs that are registered with a VA space. Most GPUs end up being
// registered, but there are brief periods when they are not registered,
// such as during interrupt handling, and in add_gpu() or remove_gpu().
nv_kref_t gpu_kref;
@ -976,7 +969,7 @@ struct uvm_parent_gpu_struct
uvm_gpu_t *gpus[UVM_PARENT_ID_MAX_SUB_PROCESSORS];
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
// Bitmap of valid child entries in the gpus[] table. Used to retrieve a
// usable child GPU in bottom-halves.
DECLARE_BITMAP(valid_gpus, UVM_PARENT_ID_MAX_SUB_PROCESSORS);
@ -1079,11 +1072,6 @@ struct uvm_parent_gpu_struct
bool access_counters_supported;
// If this is true, physical address based access counter notifications are
// potentially generated. If false, only virtual address based notifications
// are generated (assuming access_counters_supported is true too).
bool access_counters_can_use_physical_addresses;
bool fault_cancel_va_supported;
// True if the GPU has hardware support for scoped atomics
@ -1205,17 +1193,17 @@ struct uvm_parent_gpu_struct
// Interrupt handling state and locks
uvm_isr_info_t isr;
// Fault buffer info. This is only valid if supports_replayable_faults is
// set to true.
uvm_fault_buffer_info_t fault_buffer_info;
// This is only valid if supports_replayable_faults is set to true.
uvm_fault_buffer_t fault_buffer;
// PMM lazy free processing queue.
// TODO: Bug 3881835: revisit whether to use nv_kthread_q_t or workqueue.
nv_kthread_q_t lazy_free_q;
// Access counter buffer info. This is only valid if
// supports_access_counters is set to true.
uvm_access_counter_buffer_info_t access_counter_buffer_info;
// This is only valid if supports_access_counters is set to true. This array
// has rm_info.accessCntrBufferCount entries.
uvm_access_counter_buffer_t *access_counter_buffer;
uvm_mutex_t access_counters_enablement_lock;
// Number of uTLBs per GPC. This information is only valid on Pascal+ GPUs.
NvU32 utlb_per_gpc_count;
@ -1348,6 +1336,8 @@ struct uvm_parent_gpu_struct
// GPUs.
NvU64 base_address;
} egm;
uvm_test_parent_gpu_inject_error_t test;
};
static const char *uvm_parent_gpu_name(uvm_parent_gpu_t *parent_gpu)
@ -1395,10 +1385,10 @@ typedef struct
// detected to be PCIe peers and uvm_gpu_retain_pcie_peer_access() was
// called.
//
// - The peer_gpus_lock is held on one of the GPUs. In this case, the other
// GPU must be read from the original GPU's peer_gpus table. The fields
// will not change while the lock is held, but they may no longer be valid
// because the other GPU might be in teardown.
// - The peer_gpu_lock is held on one of the GPUs. In this case, the other
// GPU must be referred from the original GPU's peer_gpu_mask reference.
// The fields will not change while the lock is held, but they may no
// longer be valid because the other GPU might be in teardown.
// This field is used to determine when this struct has been initialized
// (ref_count != 0). NVLink peers are initialized at GPU registration time.
@ -1510,7 +1500,7 @@ uvm_gpu_t *uvm_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid(const NvProcessorUuid *gpu_uuid);
// Like uvm_parent_gpu_get_by_uuid(), but this variant does not assertion-check
// that the caller is holding the global_lock. This is a narrower-purpose
// that the caller is holding the global_lock. This is a narrower-purpose
// function, and is only intended for use by the top-half ISR, or other very
// limited cases.
uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_uuid);
@ -1521,6 +1511,7 @@ uvm_parent_gpu_t *uvm_parent_gpu_get_by_uuid_locked(const NvProcessorUuid *gpu_u
// LOCKING: Takes and releases the global lock for the caller.
NV_STATUS uvm_gpu_retain_by_uuid(const NvProcessorUuid *gpu_uuid,
const uvm_rm_user_object_t *user_rm_device,
const uvm_test_parent_gpu_inject_error_t *parent_gpu_error,
uvm_gpu_t **gpu_out);
// Retain a gpu which is known to already be retained. Does NOT require the
@ -1578,10 +1569,6 @@ uvm_gpu_address_t uvm_gpu_peer_copy_address(uvm_gpu_t *owning_gpu, NvU64 address
// The two GPUs must have different parents.
NvU64 uvm_gpu_peer_ref_count(const uvm_gpu_t *gpu0, const uvm_gpu_t *gpu1);
// Get the processor id accessible by the given GPU for the given physical
// address.
uvm_processor_id_t uvm_gpu_get_processor_id_by_address(uvm_gpu_t *gpu, uvm_gpu_phys_address_t addr);
// Get the EGM aperture for local_gpu to use to map memory resident on the CPU
// NUMA node that remote_gpu is attached to.
// Note that local_gpu can be equal to remote_gpu when memory is resident in
@ -1655,7 +1642,8 @@ static uvm_gpu_identity_mapping_t *uvm_gpu_get_peer_mapping(uvm_gpu_t *gpu, uvm_
// Check whether the provided address points to peer memory:
// * Physical address using one of the PEER apertures
// * Physical address using SYS aperture that belongs to an exposed coherent memory
// * Physical address using SYS aperture that belongs to an exposed coherent
// memory
// * Virtual address in the region [peer_va_base, peer_va_base + peer_va_size)
bool uvm_gpu_address_is_peer(uvm_gpu_t *gpu, uvm_gpu_address_t address);
@ -1684,8 +1672,8 @@ NV_STATUS uvm_gpu_check_nvlink_error(uvm_gpu_t *gpu);
// Check for NVLINK errors without calling into RM
//
// Calling into RM is problematic in many places, this check is always safe to
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK error
// and it's required to call uvm_gpu_check_nvlink_error() to be sure.
// do. Returns NV_WARN_MORE_PROCESSING_REQUIRED if there might be an NVLINK
// error and it's required to call uvm_gpu_check_nvlink_error() to be sure.
NV_STATUS uvm_gpu_check_nvlink_error_no_rm(uvm_gpu_t *gpu);
// Map size bytes of contiguous sysmem on the GPU for physical access

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -31,7 +31,6 @@
#include "uvm_va_block.h"
#include "uvm_va_range.h"
#include "uvm_va_space_mm.h"
#include "uvm_pmm_sysmem.h"
#include "uvm_perf_module.h"
#include "uvm_ats.h"
#include "uvm_ats_faults.h"
@ -45,7 +44,6 @@
#define UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR 0x1
#define UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR 0x2
#define UVM_ACCESS_COUNTER_PHYS_ON_MANAGED 0x4
// Each page in a tracked physical range may belong to a different VA Block. We
// preallocate an array of reverse map translations. However, access counter
@ -56,27 +54,18 @@
#define UVM_MAX_TRANSLATION_SIZE (2 * 1024 * 1024ULL)
#define UVM_SUB_GRANULARITY_REGIONS 32
static unsigned g_uvm_access_counter_threshold;
// Per-VA space access counters information
typedef struct
{
// VA space-specific configuration settings. These override the global
// settings
struct
{
atomic_t enable_mimc_migrations;
atomic_t enable_momc_migrations;
} params;
atomic_t enable_migrations;
uvm_va_space_t *va_space;
} va_space_access_counters_info_t;
// Enable/disable access-counter-guided migrations
//
static int uvm_perf_access_counter_mimc_migration_enable = -1;
static int uvm_perf_access_counter_momc_migration_enable = -1;
static int uvm_perf_access_counter_migration_enable = -1;
// Number of entries that are fetched from the GPU access counter notification
// buffer and serviced in batch
@ -86,13 +75,9 @@ static unsigned uvm_perf_access_counter_batch_count = UVM_PERF_ACCESS_COUNTER_BA
static unsigned uvm_perf_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT;
// Module parameters for the tunables
module_param(uvm_perf_access_counter_mimc_migration_enable, int, S_IRUGO);
MODULE_PARM_DESC(uvm_perf_access_counter_mimc_migration_enable,
"Whether MIMC access counters will trigger migrations."
"Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
module_param(uvm_perf_access_counter_momc_migration_enable, int, S_IRUGO);
MODULE_PARM_DESC(uvm_perf_access_counter_momc_migration_enable,
"Whether MOMC access counters will trigger migrations."
module_param(uvm_perf_access_counter_migration_enable, int, S_IRUGO);
MODULE_PARM_DESC(uvm_perf_access_counter_migration_enable,
"Whether access counters will trigger migrations."
"Valid values: <= -1 (default policy), 0 (off), >= 1 (on)");
module_param(uvm_perf_access_counter_batch_count, uint, S_IRUGO);
module_param(uvm_perf_access_counter_threshold, uint, S_IRUGO);
@ -100,7 +85,7 @@ MODULE_PARM_DESC(uvm_perf_access_counter_threshold,
"Number of remote accesses on a region required to trigger a notification."
"Valid values: [1, 65535]");
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
static void access_counter_buffer_flush_locked(uvm_access_counter_buffer_t *access_counters,
uvm_gpu_buffer_flush_mode_t flush_mode);
static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};
@ -108,6 +93,15 @@ static uvm_perf_module_event_callback_desc_t g_callbacks_access_counters[] = {};
// Performance heuristics module for access_counters
static uvm_perf_module_t g_module_access_counters;
// Locking: default config values are defined in uvm_access_counters_init() at
// module init time, before any GPU is registered. After initialization, it is
// only consumed/read.
static UvmGpuAccessCntrConfig g_default_config =
{
.granularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
.threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_DEFAULT,
};
// Get the access counters tracking struct for the given VA space if it exists.
// This information is allocated at VA space creation and freed during VA space
// destruction.
@ -126,31 +120,35 @@ static va_space_access_counters_info_t *va_space_access_counters_info_get(uvm_va
return va_space_access_counters;
}
static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get(uvm_parent_gpu_t *parent_gpu,
NvU32 notif_buf_index)
{
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
UVM_ASSERT(parent_gpu->access_counter_buffer);
return &parent_gpu->access_counter_buffer[notif_buf_index];
}
static uvm_access_counter_buffer_t *parent_gpu_access_counter_buffer_get_or_null(uvm_parent_gpu_t *parent_gpu,
NvU32 notif_buf_index)
{
if (parent_gpu->access_counter_buffer)
return parent_gpu_access_counter_buffer_get(parent_gpu, notif_buf_index);
return NULL;
}
// Whether access counter migrations are enabled or not. The policy is as
// follows:
// - MIMC migrations are disabled by default on all non-ATS systems.
// - MOMC migrations are disabled by default on all systems
// - Migrations are disabled by default on all non-ATS systems.
// - Users can override this policy by specifying on/off
static bool is_migration_enabled(uvm_access_counter_type_t type)
static bool is_migration_enabled(void)
{
int val;
if (type == UVM_ACCESS_COUNTER_TYPE_MIMC) {
val = uvm_perf_access_counter_mimc_migration_enable;
}
else {
val = uvm_perf_access_counter_momc_migration_enable;
UVM_ASSERT(type == UVM_ACCESS_COUNTER_TYPE_MOMC);
}
if (val == 0)
if (uvm_perf_access_counter_migration_enable == 0)
return false;
else if (val > 0)
else if (uvm_perf_access_counter_migration_enable > 0)
return true;
if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
return false;
if (UVM_ATS_SUPPORTED())
return g_uvm_global.ats.supported;
@ -173,11 +171,9 @@ static va_space_access_counters_info_t *va_space_access_counters_info_create(uvm
va_space_access_counters,
UVM_PERF_MODULE_TYPE_ACCESS_COUNTERS);
// Snap the access_counters parameters so that they can be tuned per VA space
atomic_set(&va_space_access_counters->params.enable_mimc_migrations,
is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MIMC));
atomic_set(&va_space_access_counters->params.enable_momc_migrations,
is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MOMC));
// Snap the access_counters parameters so that they can be tuned per VA
// space
atomic_set(&va_space_access_counters->enable_migrations, is_migration_enabled());
va_space_access_counters->va_space = va_space;
}
@ -220,22 +216,23 @@ static NV_STATUS config_granularity_to_bytes(UVM_ACCESS_COUNTER_GRANULARITY gran
return NV_OK;
}
// Clear the access counter notifications and add it to the per-GPU clear
// tracker.
// Clear the access counter notifications and add it to the per-GPU
// per-notification-buffer clear tracker.
static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
uvm_access_counter_buffer_t *access_counters,
uvm_access_counter_buffer_entry_t **notification_start,
NvU32 num_notifications)
{
NvU32 i;
NV_STATUS status;
uvm_push_t push;
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
status = uvm_push_begin(gpu->channel_manager, UVM_CHANNEL_TYPE_MEMOPS, &push, "Clear access counter batch");
if (status != NV_OK) {
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
nvstatusToString(status),
uvm_gpu_name(gpu));
uvm_gpu_name(gpu),
access_counters->index);
return status;
}
@ -249,21 +246,22 @@ static NV_STATUS access_counter_clear_notifications(uvm_gpu_t *gpu,
return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
}
// Clear all access counters and add the operation to the per-GPU clear tracker
static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu)
// Clear all access counters and add the operation to the per-GPU
// per-notification-buffer clear tracker
static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu, uvm_access_counter_buffer_t *access_counters)
{
NV_STATUS status;
uvm_push_t push;
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
status = uvm_push_begin(gpu->channel_manager,
UVM_CHANNEL_TYPE_MEMOPS,
&push,
"Clear access counter: all");
if (status != NV_OK) {
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s\n",
UVM_ERR_PRINT("Error creating push to clear access counters: %s, GPU %s, notif buf index %u\n",
nvstatusToString(status),
uvm_gpu_name(gpu));
uvm_gpu_name(gpu),
access_counters->index);
return status;
}
@ -272,34 +270,27 @@ static NV_STATUS access_counter_clear_all(uvm_gpu_t *gpu)
uvm_push_end(&push);
uvm_tracker_remove_completed(&access_counters->clear_tracker);
return uvm_tracker_add_push_safe(&access_counters->clear_tracker, &push);
}
static const uvm_gpu_access_counter_type_config_t *
get_config_for_type(const uvm_access_counter_buffer_info_t *access_counters, uvm_access_counter_type_t counter_type)
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
return counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? &(access_counters)->current_config.mimc :
&(access_counters)->current_config.momc;
}
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu, index);
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(parent_gpu->access_counters_supported);
// Fast path 1: we left some notifications unserviced in the buffer in the last pass
if (parent_gpu->access_counter_buffer_info.cached_get != parent_gpu->access_counter_buffer_info.cached_put)
// Fast path 1: we left some notifications unserviced in the buffer in the
// last pass
if (access_counters->cached_get != access_counters->cached_put)
return true;
// Fast path 2: read the valid bit of the notification buffer entry pointed by the cached get pointer
if (!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu,
parent_gpu->access_counter_buffer_info.cached_get)) {
// Slow path: read the put pointer from the GPU register via BAR0 over PCIe
parent_gpu->access_counter_buffer_info.cached_put =
UVM_GPU_READ_ONCE(*parent_gpu->access_counter_buffer_info.rm_info.pAccessCntrBufferPut);
// Fast path 2: read the valid bit of the notification buffer entry pointed
// by the cached get pointer
if (!parent_gpu->access_counter_buffer_hal->entry_is_valid(access_counters, access_counters->cached_get)) {
// Slow path: read the put pointer from the GPU register via BAR0 over
// PCIe
access_counters->cached_put = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferPut);
// No interrupt pending
if (parent_gpu->access_counter_buffer_info.cached_get == parent_gpu->access_counter_buffer_info.cached_put)
if (access_counters->cached_get == access_counters->cached_put)
return false;
}
@ -308,73 +299,65 @@ bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu)
// Initialize the configuration and pre-compute some required values for the
// given access counter type
static void init_access_counter_types_config(const UvmGpuAccessCntrConfig *config,
uvm_access_counter_type_t counter_type,
uvm_gpu_access_counter_type_config_t *counter_type_config)
static void init_access_counter_config(const UvmGpuAccessCntrConfig *config,
uvm_access_counter_buffer_t *access_counters)
{
NV_STATUS status;
NvU64 tracking_size = 0;
UVM_ACCESS_COUNTER_GRANULARITY granularity = counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? config->mimcGranularity:
config->momcGranularity;
UVM_ACCESS_COUNTER_USE_LIMIT use_limit = counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC? config->mimcUseLimit:
config->momcUseLimit;
counter_type_config->rm.granularity = granularity;
counter_type_config->rm.use_limit = use_limit;
access_counters->current_config.rm.granularity = config->granularity;
// Precompute the maximum size to use in reverse map translations and the
// number of translations that are required per access counter notification.
status = config_granularity_to_bytes(granularity, &tracking_size);
status = config_granularity_to_bytes(config->granularity, &tracking_size);
UVM_ASSERT(status == NV_OK);
// sub_granularity field is only filled for tracking granularities larger
// than 64K
if (granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
counter_type_config->sub_granularity_region_size = tracking_size;
if (config->granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
access_counters->current_config.sub_granularity_region_size = tracking_size;
else
counter_type_config->sub_granularity_region_size = tracking_size / UVM_SUB_GRANULARITY_REGIONS;
access_counters->current_config.sub_granularity_region_size = tracking_size / UVM_SUB_GRANULARITY_REGIONS;
counter_type_config->translation_size = min(UVM_MAX_TRANSLATION_SIZE, tracking_size);
counter_type_config->translations_per_counter =
max(counter_type_config->translation_size / UVM_MAX_TRANSLATION_SIZE, 1ULL);
counter_type_config->sub_granularity_regions_per_translation =
max(counter_type_config->translation_size / counter_type_config->sub_granularity_region_size, 1ULL);
UVM_ASSERT(counter_type_config->sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
access_counters->current_config.translation_size = min(UVM_MAX_TRANSLATION_SIZE, tracking_size);
access_counters->current_config.sub_granularity_regions_per_translation =
max(access_counters->current_config.translation_size / access_counters->current_config.sub_granularity_region_size,
1ULL);
UVM_ASSERT(access_counters->current_config.sub_granularity_regions_per_translation <= UVM_SUB_GRANULARITY_REGIONS);
}
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
static NvU32 access_counters_max_batch_size(const uvm_access_counter_buffer_t *access_counters)
{
NvU32 max_batch_size = 0;
// Check provided module parameter value
max_batch_size = max(uvm_perf_access_counter_batch_count, (NvU32)UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN);
max_batch_size = min(max_batch_size, access_counters->max_notifications);
return max_batch_size;
}
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
{
NV_STATUS status = NV_OK;
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
NvU64 granularity_bytes = 0;
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu, notif_buf_index);
uvm_access_counter_service_batch_context_t *batch_context;
if (uvm_perf_access_counter_threshold < UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN) {
g_uvm_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN;
pr_info("Value %u too small for uvm_perf_access_counter_threshold, using %u instead\n",
uvm_perf_access_counter_threshold,
g_uvm_access_counter_threshold);
}
else if (uvm_perf_access_counter_threshold > UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX) {
g_uvm_access_counter_threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX;
pr_info("Value %u too large for uvm_perf_access_counter_threshold, using %u instead\n",
uvm_perf_access_counter_threshold,
g_uvm_access_counter_threshold);
}
else {
g_uvm_access_counter_threshold = uvm_perf_access_counter_threshold;
}
access_counters->parent_gpu = parent_gpu;
access_counters->index = notif_buf_index;
batch_context = &access_counters->batch_service_context;
uvm_assert_mutex_locked(&g_uvm_global.global_lock);
UVM_ASSERT(parent_gpu->access_counter_buffer_hal != NULL);
status = uvm_rm_locked_call(nvUvmInterfaceInitAccessCntrInfo(parent_gpu->rm_device,
&access_counters->rm_info,
0));
notif_buf_index));
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to init notify buffer info from RM: %s, GPU %s\n",
UVM_ERR_PRINT("Failed to init notify buffer from RM: %s, GPU %s, notif buf index %u\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu));
uvm_parent_gpu_name(parent_gpu),
notif_buf_index);
// nvUvmInterfaceInitAccessCntrInfo may leave fields in rm_info
// populated when it returns an error. Set the buffer handle to zero as
@ -387,32 +370,28 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(access_counters->rm_info.bufferSize %
parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu) == 0);
status = config_granularity_to_bytes(UVM_PERF_ACCESS_COUNTER_GRANULARITY, &granularity_bytes);
UVM_ASSERT(status == NV_OK);
if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
parent_gpu->access_counter_buffer_info.notifications_ignored_count = 0;
parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
access_counters->notifications_ignored_count = 0;
access_counters->test.reconfiguration_owner = NULL;
uvm_tracker_init(&access_counters->clear_tracker);
access_counters->max_notifications = parent_gpu->access_counter_buffer_info.rm_info.bufferSize /
access_counters->max_notifications = access_counters->rm_info.bufferSize /
parent_gpu->access_counter_buffer_hal->entry_size(parent_gpu);
// Check provided module parameter value
access_counters->max_batch_size = max(uvm_perf_access_counter_batch_count,
(NvU32)UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN);
access_counters->max_batch_size = min(access_counters->max_batch_size,
access_counters->max_notifications);
access_counters->max_batch_size = access_counters_max_batch_size(access_counters);
if (access_counters->max_batch_size != uvm_perf_access_counter_batch_count) {
pr_info("Invalid uvm_perf_access_counter_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_access_counter_batch_count,
UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN,
access_counters->max_notifications,
access_counters->max_batch_size);
UVM_INFO_PRINT("Invalid uvm_perf_access_counter_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u "
"instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_access_counter_batch_count,
UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN,
access_counters->max_notifications,
access_counters->max_batch_size);
}
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_batch_context_notification_cache) {
status = NV_ERR_NO_MEMORY;
goto fail;
}
batch_context->notification_cache = uvm_kvmalloc_zero(access_counters->max_notifications *
@ -422,23 +401,14 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
goto fail;
}
batch_context->virt.notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
sizeof(*batch_context->virt.notifications));
if (!batch_context->virt.notifications) {
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_batch_context_notifications) {
status = NV_ERR_NO_MEMORY;
goto fail;
}
batch_context->phys.notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
sizeof(*batch_context->phys.notifications));
if (!batch_context->phys.notifications) {
status = NV_ERR_NO_MEMORY;
goto fail;
}
batch_context->phys.translations = uvm_kvmalloc_zero((UVM_MAX_TRANSLATION_SIZE / PAGE_SIZE) *
sizeof(*batch_context->phys.translations));
if (!batch_context->phys.translations) {
batch_context->notifications = uvm_kvmalloc_zero(access_counters->max_notifications *
sizeof(*batch_context->notifications));
if (!batch_context->notifications) {
status = NV_ERR_NO_MEMORY;
goto fail;
}
@ -446,35 +416,39 @@ NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu)
return NV_OK;
fail:
uvm_parent_gpu_deinit_access_counters(parent_gpu);
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
return status;
}
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
{
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get_or_null(parent_gpu,
notif_buf_index);
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
// Access counters should have been disabled when the GPU is no longer
// registered in any VA space.
if (parent_gpu->isr.access_counters) {
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
"notif buf index: %u\n",
notif_buf_index);
}
if (access_counters->rm_info.accessCntrBufferHandle) {
if (access_counters && access_counters->rm_info.accessCntrBufferHandle) {
NV_STATUS status = uvm_rm_locked_call(nvUvmInterfaceDestroyAccessCntrInfo(parent_gpu->rm_device,
&access_counters->rm_info));
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
UVM_ASSERT(status == NV_OK);
access_counters->rm_info.accessCntrBufferHandle = 0;
uvm_tracker_deinit(&access_counters->clear_tracker);
}
uvm_kvfree(batch_context->notification_cache);
uvm_kvfree(batch_context->virt.notifications);
uvm_kvfree(batch_context->phys.notifications);
uvm_kvfree(batch_context->phys.translations);
batch_context->notification_cache = NULL;
batch_context->virt.notifications = NULL;
batch_context->phys.notifications = NULL;
batch_context->phys.translations = NULL;
uvm_kvfree(batch_context->notification_cache);
uvm_kvfree(batch_context->notifications);
batch_context->notification_cache = NULL;
batch_context->notifications = NULL;
}
}
bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
@ -485,30 +459,31 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
if (parent_gpu->rm_info.isSimulated)
return true;
return is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MIMC) || is_migration_enabled(UVM_ACCESS_COUNTER_TYPE_MOMC);
return is_migration_enabled();
}
// This function enables access counters with the given configuration and takes
// ownership from RM. The function also stores the new configuration within the
// uvm_gpu_t struct.
static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntrConfig *config)
static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, NvU32 index, const UvmGpuAccessCntrConfig *config)
{
NV_STATUS status, disable_status;
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent, index);
UVM_ASSERT(gpu->parent->access_counters_supported);
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters[index].service_lock));
status = uvm_rm_locked_call(nvUvmInterfaceEnableAccessCntr(gpu->parent->rm_device,
&access_counters->rm_info,
config));
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to enable access counter notification from RM: %s, GPU %s\n",
nvstatusToString(status), uvm_gpu_name(gpu));
UVM_ERR_PRINT("Failed to enable access counter notification from RM: %s, GPU %s notif buf index %u\n",
nvstatusToString(status),
uvm_gpu_name(gpu),
index);
return status;
}
status = access_counter_clear_all(gpu);
status = access_counter_clear_all(gpu, access_counters);
if (status != NV_OK)
goto error;
@ -520,12 +495,11 @@ static NV_STATUS access_counters_take_ownership(uvm_gpu_t *gpu, UvmGpuAccessCntr
// taken control of the notify buffer since the GPU was initialized. Then
// flush old notifications. This will update the cached_put pointer.
access_counters->cached_get = UVM_GPU_READ_ONCE(*access_counters->rm_info.pAccessCntrBufferGet);
access_counter_buffer_flush_locked(gpu->parent, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
access_counter_buffer_flush_locked(access_counters, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
access_counters->current_config.threshold = config->threshold;
init_access_counter_types_config(config, UVM_ACCESS_COUNTER_TYPE_MIMC, &access_counters->current_config.mimc);
init_access_counter_types_config(config, UVM_ACCESS_COUNTER_TYPE_MOMC, &access_counters->current_config.momc);
init_access_counter_config(config, access_counters);
return NV_OK;
@ -539,15 +513,14 @@ error:
// If ownership is yielded as part of reconfiguration, the access counters
// handling refcount may not be 0
static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
NV_STATUS status;
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu, index);
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[index].service_lock));
// Wait for any pending clear operation befor releasing ownership
// Wait for any pending clear operation before releasing ownership
status = uvm_tracker_wait(&access_counters->clear_tracker);
if (status != NV_OK)
UVM_ASSERT(status == uvm_global_get_status());
@ -559,100 +532,180 @@ static void access_counters_yield_ownership(uvm_parent_gpu_t *parent_gpu)
// Increment the refcount of access counter enablement. If this is the first
// reference, enable the HW feature.
static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu, UvmGpuAccessCntrConfig *config)
static NV_STATUS gpu_access_counters_enable(uvm_gpu_t *gpu,
uvm_access_counter_buffer_t *access_counters,
const UvmGpuAccessCntrConfig *config)
{
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters.service_lock));
NvU32 notif_buf_index = access_counters->index;
UVM_ASSERT(gpu->parent->access_counters_supported);
UVM_ASSERT(gpu->parent->access_counter_buffer_info.rm_info.accessCntrBufferHandle);
UVM_ASSERT(uvm_sem_is_locked(&gpu->parent->isr.access_counters[notif_buf_index].service_lock));
UVM_ASSERT(access_counters->rm_info.accessCntrBufferHandle);
// There cannot be a concurrent modification of the handling count, since
// the only two writes of that field happen in the enable/disable functions
// and those are protected by the access counters ISR lock.
if (gpu->parent->isr.access_counters.handling_ref_count == 0) {
NV_STATUS status = access_counters_take_ownership(gpu, config);
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0) {
NV_STATUS status = access_counters_take_ownership(gpu, notif_buf_index, config);
if (status != NV_OK)
return status;
}
++gpu->parent->isr.access_counters.handling_ref_count;
++gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count;
return NV_OK;
}
// Decrement the refcount of access counter enablement. If this is the last
// reference, disable the HW feature.
static void parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu)
static void access_counters_disable(uvm_access_counter_buffer_t *access_counters)
{
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count > 0);
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
if (--parent_gpu->isr.access_counters.handling_ref_count == 0)
access_counters_yield_ownership(parent_gpu);
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0,
"notif buf index: %u\n",
notif_buf_index);
if (--parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0)
access_counters_yield_ownership(parent_gpu, notif_buf_index);
}
// Invoked during registration of the GPU in the VA space
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
{
NV_STATUS status;
NvU32 notif_buf_index;
UVM_ASSERT(gpu->parent->access_counters_supported);
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
if (uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
status = NV_OK;
}
else {
UvmGpuAccessCntrConfig default_config =
{
.mimcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
.momcGranularity = UVM_PERF_ACCESS_COUNTER_GRANULARITY,
.mimcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
.momcUseLimit = UVM_ACCESS_COUNTER_USE_LIMIT_FULL,
.threshold = g_uvm_access_counter_threshold,
};
status = gpu_access_counters_enable(gpu, &default_config);
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
notif_buf_index);
uvm_access_counters_isr_lock(access_counters);
status = gpu_access_counters_enable(gpu, access_counters, &g_default_config);
// If this is the first reference taken on access counters, dropping
// the ISR lock will enable interrupts.
uvm_access_counters_isr_unlock(access_counters);
if (status != NV_OK)
goto cleanup;
}
// No VA space lock is currently held, so the mask is atomically
// modified to protect from concurrent enablement of access counters in
// another GPU
if (status == NV_OK)
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
// another GPU.
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
}
// If this is the first reference taken on access counters, dropping the
// ISR lock will enable interrupts.
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
return status;
cleanup:
// The "notif_buf_index" notification buffer is already disabled since it
// failed, we disable all prior to notif_buf_index.
while (notif_buf_index-- != 0) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
notif_buf_index);
uvm_access_counters_isr_lock(access_counters);
access_counters_disable(access_counters);
uvm_access_counters_isr_unlock(access_counters);
}
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
return status;
}
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu,
uvm_va_space_t *va_space)
static void access_counters_disable_notif_buffer(uvm_access_counter_buffer_t *access_counters,
uvm_gpu_t *gpu,
uvm_va_space_t *va_space)
{
UVM_ASSERT(parent_gpu->access_counters_supported);
NvU32 notif_buf_index = access_counters->index;
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
uvm_access_counters_isr_lock(access_counters);
if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
parent_gpu->id)) {
parent_gpu_access_counters_disable(parent_gpu);
access_counters_disable(access_counters);
// If this is VA space reconfigured access counters, clear the
// ownership to allow for other processes to invoke the reconfiguration
if (parent_gpu->access_counter_buffer_info.reconfiguration_owner == va_space)
parent_gpu->access_counter_buffer_info.reconfiguration_owner = NULL;
// If this VA space reconfigured access counters, clear the ownership to
// allow for other processes to invoke the reconfiguration.
if (access_counters->test.reconfiguration_owner == va_space) {
access_counters->test.reconfiguration_owner = NULL;
// Reset notification service test knobs.
access_counters->max_batch_size = access_counters_max_batch_size(access_counters);
access_counters->test.one_iteration_per_batch = false;
access_counters->test.sleep_per_iteration_us = 0;
// Reset HW access counters settings to default values. A test may
// have modified them. Concurrent processes and registered VA spaces
// would maintain the modified config, undermining the correctness
// of forthcoming tests.
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count > 0) {
NV_STATUS status;
// Disable counters, and renable with the new configuration.
// More details, refer to comments in
// uvm_test_reconfigure_access_counters().
access_counters_yield_ownership(gpu->parent, notif_buf_index);
status = access_counters_take_ownership(gpu, notif_buf_index, &g_default_config);
if (status != NV_OK) {
// Retaking ownership failed, so RM owns the interrupt.
// The state of any other VA space with access counters
// enabled is corrupt.
UVM_ASSERT_MSG(status == NV_OK,
"Access counters interrupt still owned by RM, other VA spaces may experience "
"failures");
}
}
}
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
uvm_access_counters_isr_unlock(access_counters);
}
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space)
{
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
NvU32 notif_buf_index;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(gpu->parent->access_counters_supported);
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
if (uvm_parent_processor_mask_test_and_clear_atomic(&va_space->access_counters_enabled_processors,
gpu->parent->id)) {
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
notif_buf_index);
// Disable access counters per notification buffer. If testing is
// enabled, we may reset the access counters config and testing
// knobs.
access_counters_disable_notif_buffer(access_counters, gpu, va_space);
}
}
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
}
static void write_get(uvm_access_counter_buffer_t *access_counters, NvU32 get)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
// Write get on the GPU only if it's changed.
if (access_counters->cached_get == get)
@ -664,16 +717,16 @@ static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
}
static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
static void access_counter_buffer_flush_locked(uvm_access_counter_buffer_t *access_counters,
uvm_gpu_buffer_flush_mode_t flush_mode)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
NvU32 get;
NvU32 put;
uvm_spin_loop_t spin;
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
// Read PUT pointer from the GPU if requested
UVM_ASSERT(flush_mode != UVM_GPU_BUFFER_FLUSH_MODE_WAIT_UPDATE_PUT);
@ -685,32 +738,39 @@ static void access_counter_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
while (get != put) {
// Wait until valid bit is set
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(access_counters, get), &spin) {
if (uvm_global_get_status() != NV_OK)
goto done;
}
parent_gpu->access_counter_buffer_hal->entry_clear_valid(parent_gpu, get);
parent_gpu->access_counter_buffer_hal->entry_clear_valid(access_counters, get);
++get;
if (get == access_counters->max_notifications)
get = 0;
}
done:
write_get(parent_gpu, get);
write_get(access_counters, get);
}
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu)
{
NvU32 notif_buf_index;
UVM_ASSERT(parent_gpu->access_counters_supported);
// Disables access counter interrupts and notification servicing
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu,
notif_buf_index);
if (parent_gpu->isr.access_counters.handling_ref_count > 0)
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
// Disables access counter interrupts and notification servicing
uvm_access_counters_isr_lock(access_counters);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
access_counter_buffer_flush_locked(access_counters, UVM_GPU_BUFFER_FLUSH_MODE_UPDATE_PUT);
uvm_access_counters_isr_unlock(access_counters);
}
}
static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffer_entry_t *a,
@ -718,24 +778,23 @@ static inline int cmp_access_counter_instance_ptr(const uvm_access_counter_buffe
{
int result;
result = uvm_gpu_phys_addr_cmp(a->virtual_info.instance_ptr, b->virtual_info.instance_ptr);
// On Volta+ we need to sort by {instance_ptr + subctx_id} pair since it can
// map to a different VA space
result = uvm_gpu_phys_addr_cmp(a->instance_ptr, b->instance_ptr);
// On Turing+ we need to sort by {instance_ptr + subctx_id} pair since it
// can map to a different VA space
if (result != 0)
return result;
return UVM_CMP_DEFAULT(a->virtual_info.ve_id, b->virtual_info.ve_id);
return UVM_CMP_DEFAULT(a->ve_id, b->ve_id);
}
// Sort comparator for pointers to GVA access counter notification buffer
// entries that sorts by instance pointer
static int cmp_sort_virt_notifications_by_instance_ptr(const void *_a, const void *_b)
// Sort comparator for pointers to access counter notification buffer entries
// that sorts by instance pointer and ve_id.
static int cmp_sort_notifications_by_instance_ptr(const void *_a, const void *_b)
{
const uvm_access_counter_buffer_entry_t *a = *(const uvm_access_counter_buffer_entry_t **)_a;
const uvm_access_counter_buffer_entry_t *b = *(const uvm_access_counter_buffer_entry_t **)_b;
UVM_ASSERT(a->address.is_virtual);
UVM_ASSERT(b->address.is_virtual);
return cmp_access_counter_instance_ptr(a, b);
}
@ -748,16 +807,15 @@ static inline int cmp_gpu(const uvm_gpu_t *a, const uvm_gpu_t *b)
return UVM_CMP_DEFAULT(id_a, id_b);
}
// Sort comparator for pointers to GVA access counter notification buffer
// entries that sorts by va_space, GPU ID, and fault address.
static int cmp_sort_virt_notifications_by_va_space_gpu_address(const void *_a, const void *_b)
// Sort comparator for pointers to access counter notification buffer entries
// that sorts by va_space, GPU ID, and notification address.
static int cmp_sort_notifications_by_va_space_gpu_address(const void *_a, const void *_b)
{
const uvm_access_counter_buffer_entry_t **a = (const uvm_access_counter_buffer_entry_t **)_a;
const uvm_access_counter_buffer_entry_t **b = (const uvm_access_counter_buffer_entry_t **)_b;
int result;
result = UVM_CMP_DEFAULT((*a)->virtual_info.va_space, (*b)->virtual_info.va_space);
result = UVM_CMP_DEFAULT((*a)->va_space, (*b)->va_space);
if (result != 0)
return result;
@ -765,20 +823,7 @@ static int cmp_sort_virt_notifications_by_va_space_gpu_address(const void *_a, c
if (result != 0)
return result;
return UVM_CMP_DEFAULT((*a)->address.address, (*b)->address.address);
}
// Sort comparator for pointers to GPA access counter notification buffer
// entries that sorts by physical address' aperture
static int cmp_sort_phys_notifications_by_processor_id(const void *_a, const void *_b)
{
const uvm_access_counter_buffer_entry_t *a = *(const uvm_access_counter_buffer_entry_t **)_a;
const uvm_access_counter_buffer_entry_t *b = *(const uvm_access_counter_buffer_entry_t **)_b;
UVM_ASSERT(!a->address.is_virtual);
UVM_ASSERT(!b->address.is_virtual);
return uvm_id_cmp(a->physical_info.resident_id, b->physical_info.resident_id);
return UVM_CMP_DEFAULT((*a)->address, (*b)->address);
}
typedef enum
@ -792,21 +837,20 @@ typedef enum
NOTIFICATION_FETCH_MODE_ALL,
} notification_fetch_mode_t;
static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_service_batch_context_t *batch_context,
static NvU32 fetch_access_counter_buffer_entries(uvm_access_counter_buffer_t *access_counters,
notification_fetch_mode_t fetch_mode)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
NvU32 get;
NvU32 put;
NvU32 notification_index;
uvm_access_counter_buffer_entry_t *notification_cache;
uvm_spin_loop_t spin;
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
NvU32 last_instance_ptr_idx = 0;
uvm_aperture_t last_aperture = UVM_APERTURE_PEER_MAX;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
notification_cache = batch_context->notification_cache;
@ -822,12 +866,8 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
if (get == put)
return 0;
batch_context->phys.num_notifications = 0;
batch_context->virt.num_notifications = 0;
batch_context->virt.is_single_instance_ptr = true;
batch_context->phys.is_single_aperture = true;
batch_context->num_notifications = 0;
batch_context->is_single_instance_ptr = true;
notification_index = 0;
// Parse until get != put and have enough space to cache.
@ -838,7 +878,8 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
// We cannot just wait for the last entry (the one pointed by put) to
// become valid, we have to do it individually since entries can be
// written out of order
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(parent_gpu, get), &spin) {
UVM_SPIN_WHILE(!parent_gpu->access_counter_buffer_hal->entry_is_valid(access_counters, get),
&spin) {
// We have some entry to work on. Let's do the rest later.
if (fetch_mode != NOTIFICATION_FETCH_MODE_ALL && notification_index > 0)
goto done;
@ -853,54 +894,15 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
smp_mb__after_atomic();
// Got valid bit set. Let's cache.
parent_gpu->access_counter_buffer_hal->parse_entry(parent_gpu, get, current_entry);
parent_gpu->access_counter_buffer_hal->parse_entry(access_counters, get, current_entry);
if (current_entry->address.is_virtual) {
batch_context->virt.notifications[batch_context->virt.num_notifications++] = current_entry;
batch_context->notifications[batch_context->num_notifications++] = current_entry;
if (batch_context->virt.is_single_instance_ptr) {
if (batch_context->virt.num_notifications == 1) {
last_instance_ptr_idx = notification_index;
}
else if (cmp_access_counter_instance_ptr(&notification_cache[last_instance_ptr_idx],
current_entry) != 0) {
batch_context->virt.is_single_instance_ptr = false;
}
}
}
else {
NvU64 translation_size;
uvm_gpu_t *gpu;
translation_size = get_config_for_type(access_counters,
current_entry->counter_type)->translation_size;
current_entry->address.address = UVM_ALIGN_DOWN(current_entry->address.address, translation_size);
batch_context->phys.notifications[batch_context->phys.num_notifications++] = current_entry;
gpu = uvm_parent_gpu_find_first_valid_gpu(parent_gpu);
if (!gpu) {
current_entry->physical_info.resident_id = UVM_ID_INVALID;
current_entry->gpu = NULL;
}
else {
current_entry->gpu = gpu;
current_entry->physical_info.resident_id =
uvm_gpu_get_processor_id_by_address(gpu, uvm_gpu_phys_address(current_entry->address.aperture,
current_entry->address.address));
if (batch_context->phys.is_single_aperture) {
if (batch_context->phys.num_notifications == 1)
last_aperture = current_entry->address.aperture;
else if (current_entry->address.aperture != last_aperture)
batch_context->phys.is_single_aperture = false;
}
if (current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC)
UVM_ASSERT(uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
else
UVM_ASSERT(!uvm_id_equal(current_entry->physical_info.resident_id, gpu->id));
}
if (batch_context->is_single_instance_ptr) {
if (batch_context->num_notifications == 1)
last_instance_ptr_idx = notification_index;
else if (cmp_access_counter_instance_ptr(&notification_cache[last_instance_ptr_idx], current_entry) != 0)
batch_context->is_single_instance_ptr = false;
}
++notification_index;
@ -910,83 +912,69 @@ static NvU32 fetch_access_counter_buffer_entries(uvm_parent_gpu_t *parent_gpu,
}
done:
write_get(parent_gpu, get);
write_get(access_counters, get);
return notification_index;
}
static void translate_virt_notifications_instance_ptrs(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_service_batch_context_t *batch_context)
static void translate_notifications_instance_ptrs(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_service_batch_context_t *batch_context)
{
NvU32 i;
NV_STATUS status;
for (i = 0; i < batch_context->virt.num_notifications; ++i) {
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
for (i = 0; i < batch_context->num_notifications; ++i) {
uvm_access_counter_buffer_entry_t *current_entry = batch_context->notifications[i];
if (i == 0 ||
cmp_access_counter_instance_ptr(current_entry, batch_context->virt.notifications[i - 1]) != 0) {
if (i == 0 || cmp_access_counter_instance_ptr(current_entry, batch_context->notifications[i - 1]) != 0) {
// If instance_ptr is different, make a new translation. If the
// translation fails then va_space will be NULL and the entry will
// simply be ignored in subsequent processing.
status = uvm_parent_gpu_access_counter_entry_to_va_space(parent_gpu,
current_entry,
&current_entry->virtual_info.va_space,
&current_entry->va_space,
&current_entry->gpu);
if (status != NV_OK) {
UVM_ASSERT(current_entry->virtual_info.va_space == NULL);
UVM_ASSERT(current_entry->va_space == NULL);
UVM_ASSERT(current_entry->gpu == NULL);
}
}
else {
current_entry->virtual_info.va_space = batch_context->virt.notifications[i - 1]->virtual_info.va_space;
current_entry->gpu = batch_context->virt.notifications[i - 1]->gpu;
current_entry->va_space = batch_context->notifications[i - 1]->va_space;
current_entry->gpu = batch_context->notifications[i - 1]->gpu;
}
}
}
// GVA notifications provide an instance_ptr and ve_id that can be directly
// Notifications provide an instance_ptr and ve_id that can be directly
// translated to a VA space. In order to minimize translations, we sort the
// entries by instance_ptr, va_space and notification address in that order.
static void preprocess_virt_notifications(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_service_batch_context_t *batch_context)
static void preprocess_notifications(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_service_batch_context_t *batch_context)
{
if (!batch_context->virt.is_single_instance_ptr) {
sort(batch_context->virt.notifications,
batch_context->virt.num_notifications,
sizeof(*batch_context->virt.notifications),
cmp_sort_virt_notifications_by_instance_ptr,
if (!batch_context->is_single_instance_ptr) {
sort(batch_context->notifications,
batch_context->num_notifications,
sizeof(*batch_context->notifications),
cmp_sort_notifications_by_instance_ptr,
NULL);
}
translate_virt_notifications_instance_ptrs(parent_gpu, batch_context);
translate_notifications_instance_ptrs(parent_gpu, batch_context);
sort(batch_context->virt.notifications,
batch_context->virt.num_notifications,
sizeof(*batch_context->virt.notifications),
cmp_sort_virt_notifications_by_va_space_gpu_address,
sort(batch_context->notifications,
batch_context->num_notifications,
sizeof(*batch_context->notifications),
cmp_sort_notifications_by_va_space_gpu_address,
NULL);
}
// GPA notifications provide a physical address and an aperture. Sort
// accesses by aperture to try to coalesce operations on the same target
// processor.
static void preprocess_phys_notifications(uvm_access_counter_service_batch_context_t *batch_context)
{
if (!batch_context->phys.is_single_aperture) {
sort(batch_context->phys.notifications,
batch_context->phys.num_notifications,
sizeof(*batch_context->phys.notifications),
cmp_sort_phys_notifications_by_processor_id,
NULL);
}
}
static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *parent_gpu,
static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_access_counter_buffer_t *access_counters,
uvm_access_counter_buffer_entry_t **notification_start,
NvU32 num_entries,
NvU32 flags)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
uvm_gpu_t *gpu = uvm_parent_gpu_find_first_valid_gpu(parent_gpu);
NV_STATUS status = NV_OK;
@ -999,19 +987,20 @@ static NV_STATUS notify_tools_broadcast_and_process_flags(uvm_parent_gpu_t *pare
NvU32 i;
for (i = 0; i < num_entries; i++)
uvm_tools_broadcast_access_counter(gpu, notification_start[i], flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
uvm_tools_broadcast_access_counter(gpu, notification_start[i]);
}
UVM_ASSERT(!(flags & UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR));
if (flags & UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR)
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
status = access_counter_clear_notifications(gpu, access_counters, notification_start, num_entries);
return status;
}
static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
uvm_gpu_t *gpu,
uvm_access_counter_buffer_t *access_counters,
NvU64 base,
uvm_access_counter_buffer_entry_t **notification_start,
NvU32 num_entries,
@ -1023,12 +1012,8 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
if (uvm_enable_builtin_tests) {
NvU32 i;
for (i = 0; i < num_entries; i++) {
uvm_tools_record_access_counter(va_space,
gpu->id,
notification_start[i],
flags & UVM_ACCESS_COUNTER_PHYS_ON_MANAGED);
}
for (i = 0; i < num_entries; i++)
uvm_tools_record_access_counter(va_space, gpu->id, notification_start[i]);
}
if (flags & UVM_ACCESS_COUNTER_ACTION_TARGETED_CLEAR) {
@ -1042,7 +1027,7 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
NvU32 end_index;
for (end_index = i; end_index < num_entries; end_index++) {
NvU32 mask_index = (notification_start[end_index]->address.address - base) / PAGE_SIZE;
NvU32 mask_index = (notification_start[end_index]->address - base) / PAGE_SIZE;
if (!uvm_page_mask_test(migrated_mask, mask_index))
break;
@ -1050,6 +1035,7 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
if (end_index > start_index) {
status = access_counter_clear_notifications(gpu,
access_counters,
&notification_start[start_index],
end_index - start_index);
if (status != NV_OK)
@ -1062,7 +1048,7 @@ static NV_STATUS notify_tools_and_process_flags(uvm_va_space_t *va_space,
else if (flags & UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) {
UVM_ASSERT(!base);
UVM_ASSERT(!migrated_mask);
status = access_counter_clear_notifications(gpu, notification_start, num_entries);
status = access_counter_clear_notifications(gpu, access_counters, notification_start, num_entries);
}
return status;
@ -1242,162 +1228,6 @@ static NV_STATUS service_va_block_locked(uvm_processor_id_t processor,
return status;
}
static void reverse_mappings_to_va_block_page_mask(uvm_va_block_t *va_block,
const uvm_reverse_map_t *reverse_mappings,
size_t num_reverse_mappings,
uvm_page_mask_t *page_mask)
{
NvU32 index;
UVM_ASSERT(page_mask);
if (num_reverse_mappings > 0)
UVM_ASSERT(reverse_mappings);
uvm_page_mask_zero(page_mask);
// Populate the mask of accessed pages within the VA Block
for (index = 0; index < num_reverse_mappings; ++index) {
const uvm_reverse_map_t *reverse_map = &reverse_mappings[index];
uvm_va_block_region_t region = reverse_map->region;
UVM_ASSERT(reverse_map->va_block == va_block);
// The VA Block could have been split since we obtained the reverse
// mappings. Clamp the region to the current VA block size, to handle
// the case in which it was split.
region.outer = min(region.outer, (uvm_page_index_t)uvm_va_block_num_cpu_pages(va_block));
region.first = min(region.first, region.outer);
uvm_page_mask_region_fill(page_mask, region);
}
}
static NV_STATUS service_phys_single_va_block(uvm_access_counter_service_batch_context_t *batch_context,
const uvm_access_counter_buffer_entry_t *current_entry,
const uvm_reverse_map_t *reverse_mappings,
size_t num_reverse_mappings,
NvU32 *out_flags)
{
uvm_gpu_t *gpu = current_entry->gpu;
size_t index;
uvm_va_block_t *va_block = reverse_mappings[0].va_block;
uvm_va_space_t *va_space = NULL;
struct mm_struct *mm = NULL;
NV_STATUS status = NV_OK;
const uvm_processor_id_t processor = current_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC?
gpu->id: UVM_ID_CPU;
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
UVM_ASSERT(num_reverse_mappings > 0);
uvm_mutex_lock(&va_block->lock);
va_space = uvm_va_block_get_va_space_maybe_dead(va_block);
uvm_mutex_unlock(&va_block->lock);
if (va_space) {
uvm_va_block_retry_t va_block_retry;
va_space_access_counters_info_t *va_space_access_counters;
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
// If an mm is registered with the VA space, we have to retain it
// in order to lock it before locking the VA space.
mm = uvm_va_space_mm_retain_lock(va_space);
uvm_va_space_down_read(va_space);
// Re-check that the VA block is valid after taking the VA block lock.
if (uvm_va_block_is_dead(va_block))
goto done;
va_space_access_counters = va_space_access_counters_info_get(va_space);
if (UVM_ID_IS_CPU(processor) && !atomic_read(&va_space_access_counters->params.enable_momc_migrations))
goto done;
if (!UVM_ID_IS_CPU(processor) && !atomic_read(&va_space_access_counters->params.enable_mimc_migrations))
goto done;
service_context->operation = UVM_SERVICE_OPERATION_ACCESS_COUNTERS;
service_context->num_retries = 0;
uvm_va_block_context_init(service_context->block_context, mm);
if (uvm_va_block_is_hmm(va_block))
uvm_hmm_migrate_begin_wait(va_block);
uvm_mutex_lock(&va_block->lock);
reverse_mappings_to_va_block_page_mask(va_block, reverse_mappings, num_reverse_mappings, accessed_pages);
status = UVM_VA_BLOCK_RETRY_LOCKED(va_block,
&va_block_retry,
service_va_block_locked(processor,
va_block,
&va_block_retry,
service_context,
accessed_pages));
uvm_mutex_unlock(&va_block->lock);
if (uvm_va_block_is_hmm(va_block)) {
uvm_hmm_migrate_finish(va_block);
// If the pages could not be migrated, no need to try again,
// this is best effort only.
if (status == NV_WARN_MORE_PROCESSING_REQUIRED || status == NV_WARN_MISMATCHED_TARGET)
status = NV_OK;
}
if (status == NV_OK)
*out_flags |= UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
}
done:
if (va_space) {
uvm_va_space_up_read(va_space);
uvm_va_space_mm_release_unlock(va_space, mm);
}
// Drop the refcounts taken by the reverse map translation routines
for (index = 0; index < num_reverse_mappings; ++index)
uvm_va_block_release(va_block);
return status;
}
static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context_t *batch_context,
const uvm_access_counter_buffer_entry_t *current_entry,
const uvm_reverse_map_t *reverse_mappings,
size_t num_reverse_mappings,
NvU32 *out_flags)
{
NV_STATUS status = NV_OK;
size_t index;
*out_flags &= ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
for (index = 0; index < num_reverse_mappings; ++index) {
NvU32 out_flags_local = 0;
status = service_phys_single_va_block(batch_context,
current_entry,
reverse_mappings + index,
1,
&out_flags_local);
if (status != NV_OK)
break;
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) == 0);
*out_flags |= out_flags_local;
}
// In the case of failure, drop the refcounts for the remaining reverse mappings
while (++index < num_reverse_mappings)
uvm_va_block_release(reverse_mappings[index].va_block);
return status;
}
// Iterate over all regions set in the given sub_granularity mask
#define for_each_sub_granularity_region(region_start, region_end, sub_granularity, num_regions) \
for ((region_start) = find_first_bit(&(sub_granularity), (num_regions)), \
@ -1406,189 +1236,6 @@ static NV_STATUS service_phys_va_blocks(uvm_access_counter_service_batch_context
(region_start) = find_next_bit(&(sub_granularity), (num_regions), (region_end) + 1), \
(region_end) = find_next_zero_bit(&(sub_granularity), (num_regions), (region_start) + 1))
static bool are_reverse_mappings_on_single_block(const uvm_reverse_map_t *reverse_mappings, size_t num_reverse_mappings)
{
size_t index;
uvm_va_block_t *prev_va_block = NULL;
for (index = 0; index < num_reverse_mappings; ++index) {
uvm_va_block_t *va_block = reverse_mappings[index].va_block;
UVM_ASSERT(va_block);
if (prev_va_block && prev_va_block != va_block)
return false;
prev_va_block = va_block;
}
return true;
}
// Service the given translation range. It will return the count of the reverse
// mappings found during servicing in num_reverse_mappings, even if the function
// doesn't return NV_OK.
static NV_STATUS service_phys_notification_translation(uvm_gpu_t *resident_gpu,
uvm_access_counter_service_batch_context_t *batch_context,
const uvm_gpu_access_counter_type_config_t *config,
const uvm_access_counter_buffer_entry_t *current_entry,
NvU64 address,
unsigned long sub_granularity,
size_t *num_reverse_mappings,
NvU32 *out_flags)
{
uvm_gpu_t *gpu = current_entry->gpu;
NV_STATUS status;
NvU32 region_start, region_end;
*num_reverse_mappings = 0;
// Get the reverse_map translations for all the regions set in the
// sub_granularity field of the counter.
for_each_sub_granularity_region(region_start,
region_end,
sub_granularity,
config->sub_granularity_regions_per_translation) {
NvU64 local_address = address + region_start * config->sub_granularity_region_size;
NvU32 local_translation_size = (region_end - region_start) * config->sub_granularity_region_size;
uvm_reverse_map_t *local_reverse_mappings = batch_context->phys.translations + *num_reverse_mappings;
// Obtain the virtual addresses of the pages within the reported
// DMA range
if (resident_gpu) {
*num_reverse_mappings += uvm_pmm_gpu_phys_to_virt(&resident_gpu->pmm,
local_address,
local_translation_size,
local_reverse_mappings);
}
else {
*num_reverse_mappings += uvm_pmm_sysmem_mappings_dma_to_virt(&gpu->pmm_reverse_sysmem_mappings,
local_address,
local_translation_size,
local_reverse_mappings,
local_translation_size / PAGE_SIZE);
}
}
if (*num_reverse_mappings == 0)
return NV_OK;
// Service all the translations
if (are_reverse_mappings_on_single_block(batch_context->phys.translations, *num_reverse_mappings)) {
status = service_phys_single_va_block(batch_context,
current_entry,
batch_context->phys.translations,
*num_reverse_mappings,
out_flags);
}
else {
status = service_phys_va_blocks(batch_context,
current_entry,
batch_context->phys.translations,
*num_reverse_mappings,
out_flags);
}
return status;
}
static NV_STATUS service_phys_notification(uvm_access_counter_service_batch_context_t *batch_context,
uvm_access_counter_buffer_entry_t *current_entry)
{
NvU64 address;
NvU64 translation_index;
uvm_gpu_t *gpu = current_entry->gpu;
uvm_parent_gpu_t *parent_gpu = gpu->parent;
uvm_access_counter_buffer_info_t *access_counters = &parent_gpu->access_counter_buffer_info;
uvm_access_counter_type_t counter_type = current_entry->counter_type;
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters, counter_type);
unsigned long sub_granularity;
size_t total_reverse_mappings = 0;
uvm_gpu_t *resident_gpu = NULL;
NV_STATUS status = NV_OK;
NvU32 flags = 0;
address = current_entry->address.address;
UVM_ASSERT(address % config->translation_size == 0);
sub_granularity = current_entry->sub_granularity;
if (config->rm.granularity == UVM_ACCESS_COUNTER_GRANULARITY_64K)
sub_granularity = 1;
if (UVM_ID_IS_GPU(current_entry->physical_info.resident_id)) {
resident_gpu = uvm_gpu_get(current_entry->physical_info.resident_id);
UVM_ASSERT(resident_gpu != NULL);
if (gpu != resident_gpu && uvm_parent_gpus_are_nvswitch_connected(gpu->parent, resident_gpu->parent)) {
UVM_ASSERT(address >= resident_gpu->parent->nvswitch_info.fabric_memory_window_start);
address -= resident_gpu->parent->nvswitch_info.fabric_memory_window_start;
}
// On P9 systems, the CPU accesses the reserved heap on vidmem via
// coherent NVLINK mappings. This can trigger notifications that
// fall outside of the allocatable address range. We just drop
// them.
if (address >= resident_gpu->mem_info.max_allocatable_address)
goto out;
}
for (translation_index = 0; translation_index < config->translations_per_counter; ++translation_index) {
size_t num_reverse_mappings;
NvU32 out_flags_local = 0;
status = service_phys_notification_translation(resident_gpu,
batch_context,
config,
current_entry,
address,
sub_granularity,
&num_reverse_mappings,
&out_flags_local);
total_reverse_mappings += num_reverse_mappings;
UVM_ASSERT((out_flags_local & ~UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR) == 0);
flags |= out_flags_local;
if (status != NV_OK)
break;
address += config->translation_size;
sub_granularity = sub_granularity >> config->sub_granularity_regions_per_translation;
}
if (uvm_enable_builtin_tests)
flags |= ((total_reverse_mappings != 0) ? UVM_ACCESS_COUNTER_PHYS_ON_MANAGED : 0);
out:
notify_tools_broadcast_and_process_flags(parent_gpu, &current_entry, 1, flags);
return status;
}
// TODO: Bug 2018899: Add statistics for dropped access counter notifications
static NV_STATUS service_phys_notifications(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_service_batch_context_t *batch_context)
{
NvU32 i;
uvm_access_counter_buffer_entry_t **notifications = batch_context->phys.notifications;
UVM_ASSERT(parent_gpu->access_counters_can_use_physical_addresses);
preprocess_phys_notifications(batch_context);
for (i = 0; i < batch_context->phys.num_notifications; ++i) {
NV_STATUS status;
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
if (!UVM_ID_IS_VALID(current_entry->physical_info.resident_id))
continue;
status = service_phys_notification(batch_context, current_entry);
if (status != NV_OK)
return status;
}
return NV_OK;
}
static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
uvm_va_block_t *va_block,
uvm_processor_id_t processor,
@ -1617,6 +1264,7 @@ static NV_STATUS service_notification_va_block_helper(struct mm_struct *mm,
static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
uvm_va_block_t *va_block,
const uvm_access_counter_buffer_t *access_counters,
uvm_va_block_context_t *va_block_context,
uvm_page_mask_t *accessed_pages,
const uvm_access_counter_buffer_entry_t *current_entry)
@ -1627,18 +1275,15 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
uvm_processor_id_t resident_id;
uvm_page_index_t page_index;
uvm_gpu_t *gpu = gpu_va_space->gpu;
const uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
const uvm_gpu_access_counter_type_config_t *config = get_config_for_type(access_counters,
UVM_ACCESS_COUNTER_TYPE_MIMC);
config_granularity_to_bytes(config->rm.granularity, &granularity);
config_granularity_to_bytes(access_counters->current_config.rm.granularity, &granularity);
// Granularities other than 2MB can only be enabled by UVM tests. Do nothing
// in that case.
if (granularity != UVM_PAGE_SIZE_2M)
return;
addr = current_entry->address.address;
addr = current_entry->address;
uvm_assert_rwsem_locked(&gpu_va_space->va_space->lock);
uvm_assert_mutex_locked(&va_block->lock);
@ -1665,8 +1310,8 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
NvU32 region_start;
NvU32 region_end;
unsigned long sub_granularity = current_entry->sub_granularity;
NvU32 num_regions = config->sub_granularity_regions_per_translation;
NvU32 num_sub_pages = config->sub_granularity_region_size / PAGE_SIZE;
NvU32 num_regions = access_counters->current_config.sub_granularity_regions_per_translation;
NvU32 num_sub_pages = access_counters->current_config.sub_granularity_region_size / PAGE_SIZE;
uvm_page_mask_t *resident_mask = uvm_va_block_resident_mask_get(va_block, resident_id, NUMA_NO_NODE);
UVM_ASSERT(num_sub_pages >= 1);
@ -1686,12 +1331,12 @@ static void expand_notification_block(uvm_gpu_va_space_t *gpu_va_space,
}
}
static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_va_block_t *va_block,
uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index,
NvU32 *out_index)
static NV_STATUS service_notifications_in_block(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_access_counter_buffer_t *access_counters,
uvm_va_block_t *va_block,
NvU32 index,
NvU32 *out_index)
{
NvU32 i;
NvU32 flags = 0;
@ -1699,12 +1344,13 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
NV_STATUS flags_status;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
uvm_page_mask_t *accessed_pages = &batch_context->accessed_pages;
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
uvm_access_counter_buffer_entry_t **notifications = batch_context->notifications;
uvm_service_block_context_t *service_context = &batch_context->block_service_context;
UVM_ASSERT(va_block);
UVM_ASSERT(index < batch_context->virt.num_notifications);
UVM_ASSERT(index < batch_context->num_notifications);
uvm_assert_rwsem_locked(&va_space->lock);
@ -1714,15 +1360,16 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
uvm_mutex_lock(&va_block->lock);
for (i = index; i < batch_context->virt.num_notifications; i++) {
for (i = index; i < batch_context->num_notifications; i++) {
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
NvU64 address = current_entry->address.address;
NvU64 address = current_entry->address;
if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address > va_block->end)
if (current_entry->va_space != va_space || current_entry->gpu != gpu || address > va_block->end)
break;
expand_notification_block(gpu_va_space,
va_block,
access_counters,
batch_context->block_service_context.block_context,
accessed_pages,
current_entry);
@ -1733,6 +1380,8 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
// Atleast one notification should have been processed.
UVM_ASSERT(index < *out_index);
batch_context->block_service_context.access_counters_buffer_index = access_counters->index;
status = service_notification_va_block_helper(mm, va_block, gpu->id, batch_context);
uvm_mutex_unlock(&va_block->lock);
@ -1742,6 +1391,7 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
flags_status = notify_tools_and_process_flags(va_space,
gpu,
access_counters,
0,
&notifications[index],
*out_index - index,
@ -1754,11 +1404,11 @@ static NV_STATUS service_virt_notifications_in_block(uvm_gpu_va_space_t *gpu_va_
return status;
}
static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index,
NvU32 *out_index)
static NV_STATUS service_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_access_counter_buffer_t *access_counters,
NvU32 index,
NvU32 *out_index)
{
NvU32 i;
@ -1770,15 +1420,16 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
struct vm_area_struct *vma = NULL;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
uvm_ats_fault_context_t *ats_context = &batch_context->ats_context;
uvm_access_counter_buffer_entry_t **notifications = batch_context->virt.notifications;
uvm_access_counter_buffer_entry_t **notifications = batch_context->notifications;
UVM_ASSERT(index < batch_context->virt.num_notifications);
UVM_ASSERT(index < batch_context->num_notifications);
uvm_assert_mmap_lock_locked(mm);
uvm_assert_rwsem_locked(&va_space->lock);
address = notifications[index]->address.address;
address = notifications[index]->address;
vma = find_vma_intersection(mm, address, address + 1);
if (!vma) {
@ -1786,6 +1437,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
// notifications when a new VMA is allocated in this range.
status = notify_tools_and_process_flags(va_space,
gpu,
access_counters,
0,
&notifications[index],
1,
@ -1800,11 +1452,11 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_zero(&ats_context->access_counters.accessed_mask);
for (i = index; i < batch_context->virt.num_notifications; i++) {
for (i = index; i < batch_context->num_notifications; i++) {
uvm_access_counter_buffer_entry_t *current_entry = notifications[i];
address = current_entry->address.address;
address = current_entry->address;
if (current_entry->virtual_info.va_space != va_space || current_entry->gpu != gpu || address >= end)
if (current_entry->va_space != va_space || current_entry->gpu != gpu || address >= end)
break;
uvm_page_mask_set(&ats_context->access_counters.accessed_mask, (address - base) / PAGE_SIZE);
@ -1822,6 +1474,7 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
flags_status = notify_tools_and_process_flags(va_space,
gpu,
access_counters,
base,
&notifications[index],
*out_index - index,
@ -1834,17 +1487,19 @@ static NV_STATUS service_virt_notification_ats(uvm_gpu_va_space_t *gpu_va_space,
return status;
}
static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_access_counter_service_batch_context_t *batch_context,
NvU32 index,
NvU32 *out_index)
// TODO: Bug 2018899: Add statistics for dropped access counter notifications
static NV_STATUS service_notifications_batch(uvm_gpu_va_space_t *gpu_va_space,
struct mm_struct *mm,
uvm_access_counter_buffer_t *access_counters,
NvU32 index,
NvU32 *out_index)
{
NV_STATUS status;
uvm_va_range_t *va_range;
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[index];
NvU64 address = current_entry->address.address;
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
uvm_access_counter_buffer_entry_t *current_entry = batch_context->notifications[index];
NvU64 address = current_entry->address;
UVM_ASSERT(va_space);
@ -1864,26 +1519,32 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
uvm_va_range_managed_t *managed_range = uvm_va_range_to_managed_or_null(va_range);
if (managed_range) {
size_t index = uvm_va_range_block_index(managed_range, address);
size_t block_index = uvm_va_range_block_index(managed_range, address);
va_block = uvm_va_range_block(managed_range, index);
va_block = uvm_va_range_block(managed_range, block_index);
// If the va_range is a managed range, the notification belongs to a
// recently freed va_range if va_block is NULL. If va_block is not
// NULL, service_virt_notifications_in_block will process flags.
// NULL, service_notifications_in_block will process flags.
// Clear the notification entry to continue receiving notifications
// when a new va_range is allocated in that region.
flags = UVM_ACCESS_COUNTER_ACTION_BATCH_CLEAR;
}
if (va_block) {
status = service_virt_notifications_in_block(gpu_va_space, mm, va_block, batch_context, index, out_index);
status = service_notifications_in_block(gpu_va_space,
mm,
access_counters,
va_block,
index,
out_index);
}
else {
status = notify_tools_and_process_flags(va_space,
gpu_va_space->gpu,
access_counters,
0,
batch_context->virt.notifications,
batch_context->notifications,
1,
flags,
NULL);
@ -1891,7 +1552,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
}
}
else if (uvm_ats_can_service_faults(gpu_va_space, mm)) {
status = service_virt_notification_ats(gpu_va_space, mm, batch_context, index, out_index);
status = service_notification_ats(gpu_va_space, mm, access_counters, index, out_index);
}
else {
NvU32 flags;
@ -1918,8 +1579,9 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
// in the batch.
status = notify_tools_and_process_flags(va_space,
gpu_va_space->gpu,
access_counters,
0,
batch_context->virt.notifications,
batch_context->notifications,
1,
flags,
NULL);
@ -1930,8 +1592,7 @@ static NV_STATUS service_virt_notifications_batch(uvm_gpu_va_space_t *gpu_va_spa
return status;
}
static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
uvm_access_counter_service_batch_context_t *batch_context)
static NV_STATUS service_notifications(uvm_access_counter_buffer_t *access_counters)
{
NvU32 i = 0;
NV_STATUS status = NV_OK;
@ -1940,24 +1601,25 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
uvm_va_space_t *prev_va_space = NULL;
uvm_gpu_t *prev_gpu = NULL;
uvm_gpu_va_space_t *gpu_va_space = NULL;
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
uvm_access_counter_service_batch_context_t *batch_context = &access_counters->batch_service_context;
// TODO: Bug 4299018 : Add support for virtual access counter migrations on
// 4K page sizes.
if (PAGE_SIZE == UVM_PAGE_SIZE_4K) {
return notify_tools_broadcast_and_process_flags(parent_gpu,
batch_context->virt.notifications,
batch_context->virt.num_notifications,
return notify_tools_broadcast_and_process_flags(access_counters,
batch_context->notifications,
batch_context->num_notifications,
0);
}
preprocess_virt_notifications(parent_gpu, batch_context);
preprocess_notifications(parent_gpu, batch_context);
while (i < batch_context->virt.num_notifications) {
uvm_access_counter_buffer_entry_t *current_entry = batch_context->virt.notifications[i];
va_space = current_entry->virtual_info.va_space;
while (i < batch_context->num_notifications) {
uvm_access_counter_buffer_entry_t *current_entry = batch_context->notifications[i];
va_space = current_entry->va_space;
if (va_space != prev_va_space) {
// New va_space detected, drop locks of the old va_space.
if (prev_va_space) {
uvm_va_space_up_read(prev_va_space);
@ -1983,13 +1645,14 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
}
if (gpu_va_space && uvm_va_space_has_access_counter_migrations(va_space)) {
status = service_virt_notifications_batch(gpu_va_space, mm, batch_context, i, &i);
status = service_notifications_batch(gpu_va_space, mm, access_counters, i, &i);
}
else {
status = notify_tools_and_process_flags(va_space,
current_entry->gpu,
access_counters,
0,
&batch_context->virt.notifications[i],
&batch_context->notifications[i],
1,
0,
NULL);
@ -1997,7 +1660,10 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
}
}
else {
status = notify_tools_broadcast_and_process_flags(parent_gpu, &batch_context->virt.notifications[i], 1, 0);
status = notify_tools_broadcast_and_process_flags(access_counters,
&batch_context->notifications[i],
1,
0);
i++;
}
@ -2013,42 +1679,46 @@ static NV_STATUS service_virt_notifications(uvm_parent_gpu_t *parent_gpu,
return status;
}
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu)
void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters)
{
NV_STATUS status = NV_OK;
uvm_access_counter_service_batch_context_t *batch_context = &parent_gpu->access_counter_buffer_info.batch_service_context;
uvm_access_counter_service_batch_context_t *batch_context;
UVM_ASSERT(parent_gpu->access_counters_supported);
batch_context = &access_counters->batch_service_context;
if (parent_gpu->access_counter_buffer_info.notifications_ignored_count > 0)
if (access_counters->notifications_ignored_count > 0)
return;
while (1) {
batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(parent_gpu,
batch_context,
batch_context->num_cached_notifications = fetch_access_counter_buffer_entries(access_counters,
NOTIFICATION_FETCH_MODE_BATCH_READY);
if (batch_context->num_cached_notifications == 0)
break;
++batch_context->batch_id;
if (batch_context->virt.num_notifications) {
status = service_virt_notifications(parent_gpu, batch_context);
if (batch_context->num_notifications) {
status = service_notifications(access_counters);
if (status != NV_OK)
break;
}
if (batch_context->phys.num_notifications) {
status = service_phys_notifications(parent_gpu, batch_context);
if (status != NV_OK)
if (uvm_enable_builtin_tests) {
if (access_counters->test.sleep_per_iteration_us) {
usleep_range(access_counters->test.sleep_per_iteration_us,
access_counters->test.sleep_per_iteration_us * 2);
}
if (access_counters->test.one_iteration_per_batch)
break;
}
}
if (status != NV_OK) {
UVM_DBG_PRINT("Error %s servicing access counter notifications on GPU: %s\n",
UVM_DBG_PRINT("Error %s servicing access counter notifications on GPU: %s notif buf index: %u\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu));
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}
}
@ -2069,7 +1739,6 @@ NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS
uvm_va_space_down_read(va_space);
for_each_va_space_gpu(gpu, va_space) {
if (gpu->parent == parent_gpu)
continue;
@ -2081,25 +1750,28 @@ NV_STATUS uvm_api_clear_all_access_counters(UVM_CLEAR_ALL_ACCESS_COUNTERS_PARAMS
uvm_va_space_up_read(va_space);
for_each_gpu_in_mask(gpu, retained_gpus) {
NvU32 notif_buf_index;
if (!gpu->parent->access_counters_supported)
continue;
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
notif_buf_index);
uvm_access_counters_isr_lock(access_counters);
// Access counters not enabled. Nothing to clear
if (gpu->parent->isr.access_counters.handling_ref_count) {
uvm_access_counter_buffer_info_t *access_counters = &gpu->parent->access_counter_buffer_info;
// Access counters are not enabled. Nothing to clear.
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count) {
status = access_counter_clear_all(gpu, access_counters);
if (status == NV_OK)
status = uvm_tracker_wait(&access_counters->clear_tracker);
}
status = access_counter_clear_all(gpu);
if (status == NV_OK)
status = uvm_tracker_wait(&access_counters->clear_tracker);
uvm_access_counters_isr_unlock(access_counters);
if (status != NV_OK)
break;
}
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
if (status != NV_OK)
break;
}
for_each_gpu_in_mask(gpu, retained_gpus)
@ -2121,21 +1793,12 @@ static NV_STATUS access_counters_config_from_test_params(const UVM_TEST_RECONFIG
if (params->threshold == 0 || params->threshold > g_uvm_access_counters_threshold_max)
return NV_ERR_INVALID_ARGUMENT;
if (config_granularity_to_bytes(params->mimc_granularity, &tracking_size) != NV_OK)
if (config_granularity_to_bytes(params->granularity, &tracking_size) != NV_OK)
return NV_ERR_INVALID_ARGUMENT;
if (config_granularity_to_bytes(params->momc_granularity, &tracking_size) != NV_OK)
return NV_ERR_INVALID_ARGUMENT;
// Since values for granularity/use limit are shared between tests and
// nv_uvm_types.h, the value will be checked in the call to
// nvUvmInterfaceEnableAccessCntr
config->mimcGranularity = params->mimc_granularity;
config->momcGranularity = params->momc_granularity;
config->mimcUseLimit = params->mimc_use_limit;
config->momcUseLimit = params->momc_use_limit;
// Since values for granularity are shared between tests and nv_uvm_types.h,
// the value will be checked in the call to nvUvmInterfaceEnableAccessCntr
config->granularity = params->granularity;
config->threshold = params->threshold;
return NV_OK;
@ -2145,7 +1808,40 @@ bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space)
{
va_space_access_counters_info_t *va_space_access_counters = va_space_access_counters_info_get(va_space);
return atomic_read(&va_space_access_counters->params.enable_mimc_migrations);
return atomic_read(&va_space_access_counters->enable_migrations);
}
NV_STATUS uvm_access_counters_init(void)
{
NV_STATUS status = NV_OK;
NvU64 granularity_bytes = 0;
if (uvm_perf_access_counter_threshold < UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN) {
g_default_config.threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MIN;
UVM_INFO_PRINT("Value %u too small for uvm_perf_access_counter_threshold, using %u instead\n",
uvm_perf_access_counter_threshold,
g_default_config.threshold);
}
else if (uvm_perf_access_counter_threshold > UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX) {
g_default_config.threshold = UVM_PERF_ACCESS_COUNTER_THRESHOLD_MAX;
UVM_INFO_PRINT("Value %u too large for uvm_perf_access_counter_threshold, using %u instead\n",
uvm_perf_access_counter_threshold,
g_default_config.threshold);
}
else {
g_default_config.threshold = uvm_perf_access_counter_threshold;
}
status = config_granularity_to_bytes(g_default_config.granularity, &granularity_bytes);
UVM_ASSERT(status == NV_OK);
if (granularity_bytes > UVM_MAX_TRANSLATION_SIZE)
UVM_ASSERT(granularity_bytes % UVM_MAX_TRANSLATION_SIZE == 0);
return NV_OK;
}
void uvm_access_counters_exit(void)
{
}
NV_STATUS uvm_perf_access_counters_init(void)
@ -2203,14 +1899,110 @@ NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_E
return NV_OK;
}
static NV_STATUS test_reconfigure_access_counters_notif_buffer(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params,
uvm_gpu_t *gpu,
uvm_va_space_t *va_space,
UvmGpuAccessCntrConfig *config,
uvm_access_counter_buffer_t *access_counters)
{
NV_STATUS status = NV_OK;
uvm_va_space_t *va_space_reconfiguration_owner;
NvU32 notif_buf_index = access_counters->index;
if (params->max_batch_size > access_counters->max_notifications)
return NV_ERR_INVALID_ARGUMENT;
// ISR lock ensures that we own GET/PUT registers. It disables
// interrupts and ensures that no other thread (nor the top half) will
// be able to re-enable interrupts during reconfiguration.
uvm_access_counters_isr_lock(access_counters);
uvm_va_space_down_read_rm(va_space);
if (!uvm_processor_mask_test(&va_space->registered_gpus, gpu->id)) {
status = NV_ERR_INVALID_STATE;
goto exit_unlock;
}
// Unregistration already started. Fail to avoid an interleaving in
// which access counters end up been enabled on an unregistered GPU:
// (thread 0) uvm_va_space_unregister_gpu disables access counters.
// (thread 1) assuming no VA space lock is held yet by the
// unregistration, this function enables access counters and
// runs to completion, returning NV_OK.
// (thread 0) uvm_va_space_unregister_gpu takes the VA space lock and
// completes the unregistration.
if (uvm_processor_mask_test(&va_space->gpu_unregister_in_progress, gpu->id)) {
status = NV_ERR_INVALID_STATE;
goto exit_unlock;
}
va_space_reconfiguration_owner = access_counters->test.reconfiguration_owner;
// If any other VA space has reconfigured access counters on this GPU,
// return error to avoid overwriting its configuration.
if (va_space_reconfiguration_owner && (va_space_reconfiguration_owner != va_space)) {
status = NV_ERR_INVALID_STATE;
goto exit_unlock;
}
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
status = gpu_access_counters_enable(gpu, access_counters, config);
if (status != NV_OK)
goto exit_unlock;
}
UVM_ASSERT_MSG(gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count > 0,
"notif buf index: %u\n",
notif_buf_index);
// Disable counters, and renable with the new configuration.
// Note that we are yielding ownership even when the access counters are
// enabled in at least gpu. This inconsistent state is not visible to
// other threads or VA spaces because of the ISR lock, and it is
// immediately rectified by retaking ownership.
access_counters_yield_ownership(gpu->parent, notif_buf_index);
status = access_counters_take_ownership(gpu, notif_buf_index, config);
// Retaking ownership failed, so RM owns the interrupt.
if (status != NV_OK) {
// The state of any other VA space with access counters enabled is
// corrupt
// TODO: Bug 2419290: Fail reconfiguration if access
// counters are enabled on a different VA space.
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count > 1) {
UVM_ASSERT_MSG(status == NV_OK,
"Access counters interrupt still owned by RM, other VA spaces may experience failures");
}
access_counters_disable(access_counters);
goto exit_unlock;
}
access_counters->test.reconfiguration_owner = va_space;
if (params->max_batch_size)
access_counters->max_batch_size = params->max_batch_size;
access_counters->test.one_iteration_per_batch = params->one_iteration_per_batch;
access_counters->test.sleep_per_iteration_us = params->sleep_per_iteration_us;
exit_unlock:
uvm_va_space_up_read_rm(va_space);
uvm_access_counters_isr_unlock(access_counters);
return status;
}
NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
{
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu = NULL;
UvmGpuAccessCntrConfig config = {0};
va_space_access_counters_info_t *va_space_access_counters;
uvm_va_space_t *va_space_reconfiguration_owner;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
NvU32 notif_buf_index;
status = access_counters_config_from_test_params(params, &config);
if (status != NV_OK)
@ -2225,90 +2017,31 @@ NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNT
goto exit_release_gpu;
}
// ISR lock ensures that we own GET/PUT registers. It disables interrupts
// and ensures that no other thread (nor the top half) will be able to
// re-enable interrupts during reconfiguration.
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
uvm_va_space_down_read_rm(va_space);
if (!uvm_processor_mask_test(&va_space->registered_gpus, gpu->id)) {
status = NV_ERR_INVALID_STATE;
goto exit_isr_unlock;
// a zero max_batch_size does not change the driver's behavior.
if (params->max_batch_size < (NvU32)UVM_PERF_ACCESS_COUNTER_BATCH_COUNT_MIN && params->max_batch_size != 0) {
status = NV_ERR_INVALID_ARGUMENT;
goto exit_release_gpu;
}
// Unregistration already started. Fail to avoid an interleaving in which
// access counters end up been enabled on an unregistered GPU:
// (thread 0) uvm_va_space_unregister_gpu disables access counters
// (thread 1) assuming no VA space lock is held yet by the unregistration,
// this function enables access counters and runs to completion,
// returning NV_OK
// (thread 0) uvm_va_space_unregister_gpu takes the VA space lock and
// completes the unregistration
if (uvm_processor_mask_test(&va_space->gpu_unregister_in_progress, gpu->id)) {
status = NV_ERR_INVALID_STATE;
goto exit_isr_unlock;
uvm_mutex_lock(&gpu->parent->access_counters_enablement_lock);
for (notif_buf_index = 0; notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
notif_buf_index);
status = test_reconfigure_access_counters_notif_buffer(params, gpu, va_space, &config, access_counters);
if (status != NV_OK)
goto exit_ac_lock;
}
va_space_access_counters = va_space_access_counters_info_get(va_space);
va_space_reconfiguration_owner = gpu->parent->access_counter_buffer_info.reconfiguration_owner;
// If any other VA space has reconfigured access counters on this GPU,
// return error to avoid overwriting its configuration.
if (va_space_reconfiguration_owner && (va_space_reconfiguration_owner != va_space)) {
status = NV_ERR_INVALID_STATE;
goto exit_isr_unlock;
}
if (!uvm_parent_processor_mask_test(&va_space->access_counters_enabled_processors, gpu->parent->id)) {
status = gpu_access_counters_enable(gpu, &config);
if (status == NV_OK)
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
else
goto exit_isr_unlock;
}
UVM_ASSERT(gpu->parent->isr.access_counters.handling_ref_count > 0);
// Disable counters, and renable with the new configuration.
// Note that we are yielding ownership even when the access counters are
// enabled in at least gpu. This inconsistent state is not visible to other
// threads or VA spaces because of the ISR lock, and it is immediately
// rectified by retaking ownership.
access_counters_yield_ownership(gpu->parent);
status = access_counters_take_ownership(gpu, &config);
// Retaking ownership failed, so RM owns the interrupt.
if (status != NV_OK) {
// The state of any other VA space with access counters enabled is
// corrupt
// TODO: Bug 2419290: Fail reconfiguration if access
// counters are enabled on a different VA space.
if (gpu->parent->isr.access_counters.handling_ref_count > 1) {
UVM_ASSERT_MSG(status == NV_OK,
"Access counters interrupt still owned by RM, other VA spaces may experience failures");
}
uvm_parent_processor_mask_clear_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
parent_gpu_access_counters_disable(gpu->parent);
goto exit_isr_unlock;
}
gpu->parent->access_counter_buffer_info.reconfiguration_owner = va_space;
uvm_va_space_up_read_rm(va_space);
uvm_va_space_down_write(va_space);
atomic_set(&va_space_access_counters->params.enable_mimc_migrations, !!params->enable_mimc_migrations);
atomic_set(&va_space_access_counters->params.enable_momc_migrations, !!params->enable_momc_migrations);
va_space_access_counters = va_space_access_counters_info_get(va_space);
atomic_set(&va_space_access_counters->enable_migrations, !!params->enable_migrations);
uvm_va_space_up_write(va_space);
exit_isr_unlock:
if (status != NV_OK)
uvm_va_space_up_read_rm(va_space);
uvm_parent_processor_mask_set_atomic(&va_space->access_counters_enabled_processors, gpu->parent->id);
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
exit_ac_lock:
uvm_mutex_unlock(&gpu->parent->access_counters_enablement_lock);
exit_release_gpu:
uvm_gpu_release(gpu);
@ -2320,17 +2053,12 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
{
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu = NULL;
uvm_access_counter_buffer_info_t *access_counters;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
NvU32 notif_buf_index;
if (params->mode >= UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX)
return NV_ERR_INVALID_ARGUMENT;
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_TARGETED &&
params->counter_type >= UVM_TEST_ACCESS_COUNTER_TYPE_MAX) {
return NV_ERR_INVALID_ARGUMENT;
}
gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpu_uuid);
if (!gpu)
return NV_ERR_INVALID_DEVICE;
@ -2340,37 +2068,51 @@ NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *
goto exit_release_gpu;
}
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
for (notif_buf_index = 0;
notif_buf_index < gpu->parent->rm_info.accessCntrBufferCount && status == NV_OK;
notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(gpu->parent,
notif_buf_index);
// Access counters not enabled. Nothing to reset
if (gpu->parent->isr.access_counters.handling_ref_count == 0)
goto exit_isr_unlock;
uvm_access_counters_isr_lock(access_counters);
access_counters = &gpu->parent->access_counter_buffer_info;
// Access counters not enabled. Nothing to reset
if (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0)
goto exit_isr_unlock;
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
status = access_counter_clear_all(gpu);
}
else {
uvm_access_counter_buffer_entry_t entry = { 0 };
uvm_access_counter_buffer_entry_t *notification = &entry;
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
status = access_counter_clear_all(gpu, access_counters);
}
else {
uvm_access_counter_buffer_entry_t entry = { 0 };
uvm_access_counter_buffer_entry_t *notification = &entry;
if (params->counter_type == UVM_TEST_ACCESS_COUNTER_TYPE_MIMC)
entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MIMC;
else
entry.counter_type = UVM_ACCESS_COUNTER_TYPE_MOMC;
entry.bank = params->bank;
entry.tag = params->tag;
entry.bank = params->bank;
entry.tag = params->tag;
status = access_counter_clear_notifications(gpu, access_counters, &notification, 1);
}
status = access_counter_clear_notifications(gpu, &notification, 1);
}
if (status == NV_OK)
status = uvm_tracker_wait(&access_counters->clear_tracker);
if (status == NV_OK)
status = uvm_tracker_wait(&access_counters->clear_tracker);
exit_isr_unlock:
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
uvm_access_counters_isr_unlock(access_counters);
// We only need to clear_all() once.
if (params->mode == UVM_TEST_ACCESS_COUNTER_RESET_MODE_ALL) {
NvU32 i;
// Early exit of the main loop; since we only need to clear_all()
// once. Check that all the remaining notification buffers have
// access counters in same state.
NvBool index0_state = (gpu->parent->isr.access_counters[notif_buf_index].handling_ref_count == 0);
for (i = notif_buf_index + 1; i < gpu->parent->rm_info.accessCntrBufferCount; i++)
UVM_ASSERT((gpu->parent->isr.access_counters[i].handling_ref_count == 0) == index0_state);
break;
}
}
exit_release_gpu:
uvm_gpu_release(gpu);
@ -2381,39 +2123,44 @@ exit_release_gpu:
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore)
{
bool change_intr_state = false;
NvU32 notif_buf_index;
if (!parent_gpu->access_counters_supported)
return;
uvm_parent_gpu_access_counters_isr_lock(parent_gpu);
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
uvm_access_counter_buffer_t *access_counters = parent_gpu_access_counter_buffer_get(parent_gpu,
notif_buf_index);
uvm_access_counters_isr_lock(access_counters);
if (do_ignore) {
if (parent_gpu->access_counter_buffer_info.notifications_ignored_count++ == 0)
change_intr_state = true;
if (do_ignore) {
if (access_counters->notifications_ignored_count++ == 0)
change_intr_state = true;
}
else {
UVM_ASSERT(access_counters->notifications_ignored_count >= 1);
if (--access_counters->notifications_ignored_count == 0)
change_intr_state = true;
}
if (change_intr_state) {
// We need to avoid an interrupt storm while ignoring notifications.
// We just disable the interrupt.
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
if (do_ignore)
uvm_access_counters_intr_disable(access_counters);
else
uvm_access_counters_intr_enable(access_counters);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
if (!do_ignore)
access_counter_buffer_flush_locked(access_counters, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
}
uvm_access_counters_isr_unlock(access_counters);
}
else {
UVM_ASSERT(parent_gpu->access_counter_buffer_info.notifications_ignored_count >= 1);
if (--parent_gpu->access_counter_buffer_info.notifications_ignored_count == 0)
change_intr_state = true;
}
if (change_intr_state) {
// We need to avoid an interrupt storm while ignoring notifications. We
// just disable the interrupt.
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
if (do_ignore)
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
else
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
if (!do_ignore)
access_counter_buffer_flush_locked(parent_gpu, UVM_GPU_BUFFER_FLUSH_MODE_CACHED_PUT);
}
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
}
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
@ -2434,3 +2181,34 @@ NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTER
uvm_gpu_release(gpu);
return status;
}
NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp)
{
NV_STATUS status = NV_OK;
uvm_gpu_t *gpu = NULL;
uvm_va_space_t *va_space = uvm_va_space_get(filp);
NvU32 buffer_size;
NvU32 index;
gpu = uvm_va_space_retain_gpu_by_uuid(va_space, &params->gpu_uuid);
if (!gpu)
return NV_ERR_INVALID_DEVICE;
if (!gpu->parent->access_counters_supported) {
status = NV_ERR_NOT_SUPPORTED;
goto exit_release_gpu;
}
buffer_size = gpu->parent->access_counter_buffer[0].rm_info.bufferSize;
for (index = 1; index < gpu->parent->rm_info.accessCntrBufferCount; index++)
UVM_ASSERT(gpu->parent->access_counter_buffer[index].rm_info.bufferSize == buffer_size);
params->num_notification_buffers = gpu->parent->rm_info.accessCntrBufferCount;
params->num_notification_entries = buffer_size / gpu->parent->access_counter_buffer_hal->entry_size(gpu->parent);
exit_release_gpu:
uvm_gpu_release(gpu);
return status;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -27,11 +27,11 @@
#include "uvm_forward_decl.h"
#include "uvm_test_ioctl.h"
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu);
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu);
NV_STATUS uvm_parent_gpu_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_parent_gpu_deinit_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 index);
bool uvm_parent_gpu_access_counters_pending(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_parent_gpu_service_access_counters(uvm_parent_gpu_t *parent_gpu);
void uvm_service_access_counters(uvm_access_counter_buffer_t *access_counters);
void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
@ -46,17 +46,23 @@ void uvm_parent_gpu_access_counter_buffer_flush(uvm_parent_gpu_t *parent_gpu);
//
// When uningoring, the interrupt conditions will be re-evaluated to trigger
// processing of buffered notifications, if any exist.
//
// All parent_gpu's notifications buffers are affected.
void uvm_parent_gpu_access_counters_set_ignore(uvm_parent_gpu_t *parent_gpu, bool do_ignore);
// Return whether the VA space has access counter migrations enabled. The
// caller must ensure that the VA space cannot go away.
bool uvm_va_space_has_access_counter_migrations(uvm_va_space_t *va_space);
// Global perf initialization/cleanup functions
// Global access counters initialization/cleanup functions.
NV_STATUS uvm_access_counters_init(void);
void uvm_access_counters_exit(void);
// Global perf initialization/cleanup functions.
NV_STATUS uvm_perf_access_counters_init(void);
void uvm_perf_access_counters_exit(void);
// VA space Initialization/cleanup functions. See comments in
// VA space initialization/cleanup functions. See comments in
// uvm_perf_heuristics.h
NV_STATUS uvm_perf_access_counters_load(uvm_va_space_t *va_space);
void uvm_perf_access_counters_unload(uvm_va_space_t *va_space);
@ -72,17 +78,18 @@ bool uvm_parent_gpu_access_counters_required(const uvm_parent_gpu_t *parent_gpu)
// counters are currently enabled. The hardware notifications and interrupts on
// the GPU are enabled the first time any VA space invokes
// uvm_gpu_access_counters_enable, and disabled when the last VA space invokes
// uvm_parent_gpu_access_counters_disable().
// uvm_gpu_access_counters_disable().
//
// Locking: the VA space lock must not be held by the caller since these
// functions may take the access counters ISR lock.
NV_STATUS uvm_gpu_access_counters_enable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
void uvm_parent_gpu_access_counters_disable(uvm_parent_gpu_t *parent_gpu, uvm_va_space_t *va_space);
void uvm_gpu_access_counters_disable(uvm_gpu_t *gpu, uvm_va_space_t *va_space);
NV_STATUS uvm_test_access_counters_enabled_by_default(UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_reconfigure_access_counters(UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_reset_access_counters(UVM_TEST_RESET_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_set_ignore_access_counters(UVM_TEST_SET_IGNORE_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_query_access_counters(UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS *params, struct file *filp);
#endif // __UVM_GPU_ACCESS_COUNTERS_H__

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -154,62 +154,73 @@ static unsigned schedule_non_replayable_faults_handler(uvm_parent_gpu_t *parent_
return 1;
}
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu)
static unsigned schedule_access_counters_handler(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
{
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
// On Volta, accessCntrBufferCount is > 0, but we don't support access
// counters in UVM (access_counters_supported is cleared during HAL
// initialization.) This check prevents the top-half from accessing
// unallocated memory.
if (!parent_gpu->access_counters_supported)
return 0;
if (parent_gpu->isr.is_suspended)
return 0;
if (!parent_gpu->isr.access_counters.handling_ref_count)
if (!parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count)
return 0;
if (down_trylock(&parent_gpu->isr.access_counters.service_lock.sem) != 0)
if (down_trylock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem) != 0)
return 0;
if (!uvm_parent_gpu_access_counters_pending(parent_gpu)) {
up(&parent_gpu->isr.access_counters.service_lock.sem);
if (!uvm_parent_gpu_access_counters_pending(parent_gpu, notif_buf_index)) {
up(&parent_gpu->isr.access_counters[notif_buf_index].service_lock.sem);
return 0;
}
nv_kref_get(&parent_gpu->gpu_kref);
// Interrupts need to be disabled to avoid an interrupt storm
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
uvm_access_counters_intr_disable(&parent_gpu->access_counter_buffer[notif_buf_index]);
nv_kthread_q_schedule_q_item(&parent_gpu->isr.bottom_half_q,
&parent_gpu->isr.access_counters.bottom_half_q_item);
&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item);
return 1;
}
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and UVM is given a
// chance to handle the interrupt, before most of the RM processing. UVM communicates what it
// did, back to RM, via the return code:
// This is called from RM's top-half ISR (see: the nvidia_isr() function), and
// UVM is given a chance to handle the interrupt, before most of the RM
// processing. UVM communicates what it did, back to RM, via the return code:
//
// NV_OK:
// UVM handled an interrupt.
//
// NV_WARN_MORE_PROCESSING_REQUIRED:
// UVM did not schedule a bottom half, because it was unable to get the locks it
// needed, but there is still UVM work to be done. RM will return "not handled" to the
// Linux kernel, *unless* RM handled other faults in its top half. In that case, the
// fact that UVM did not handle its interrupt is lost. However, life and interrupt
// processing continues anyway: the GPU will soon raise another interrupt, because
// that's what it does when there are replayable page faults remaining (GET != PUT in
// the fault buffer).
// UVM did not schedule a bottom half, because it was unable to get the
// locks it needed, but there is still UVM work to be done. RM will
// return "not handled" to the Linux kernel, *unless* RM handled other
// faults in its top half. In that case, the fact that UVM did not
// handle its interrupt is lost. However, life and interrupt processing
// continues anyway: the GPU will soon raise another interrupt, because
// that's what it does when there are replayable page faults remaining
// (GET != PUT in the fault buffer).
//
// NV_ERR_NO_INTR_PENDING:
// UVM did not find any work to do. Currently this is handled in RM in exactly the same
// way as NV_WARN_MORE_PROCESSING_REQUIRED is handled. However, the extra precision is
// available for the future. RM's interrupt handling tends to evolve as new chips and
// new interrupts get created.
// UVM did not find any work to do. Currently this is handled in RM in
// exactly the same way as NV_WARN_MORE_PROCESSING_REQUIRED is handled.
// However, the extra precision is available for the future. RM's
// interrupt handling tends to evolve as new chips and new interrupts
// get created.
static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
{
uvm_parent_gpu_t *parent_gpu;
unsigned num_handlers_scheduled = 0;
NV_STATUS status = NV_OK;
NvU32 i;
if (!in_interrupt() && in_atomic()) {
// Early-out if we're not in interrupt context, but memory allocations
@ -243,14 +254,16 @@ static NV_STATUS uvm_isr_top_half(const NvProcessorUuid *gpu_uuid)
nv_kref_get(&parent_gpu->gpu_kref);
uvm_spin_unlock_irqrestore(&g_uvm_global.gpu_table_lock);
// Now that we got a GPU object, lock it so that it can't be removed without us noticing.
// Now that we got a GPU object, lock it so that it can't be removed without
// us noticing.
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
++parent_gpu->isr.interrupt_count;
num_handlers_scheduled += schedule_replayable_faults_handler(parent_gpu);
num_handlers_scheduled += schedule_non_replayable_faults_handler(parent_gpu);
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu);
for (i = 0; i < parent_gpu->rm_info.accessCntrBufferCount; i++)
num_handlers_scheduled += schedule_access_counters_handler(parent_gpu, i);
if (num_handlers_scheduled == 0) {
if (parent_gpu->isr.is_suspended)
@ -288,6 +301,55 @@ static NV_STATUS init_queue_on_node(nv_kthread_q_t *queue, const char *name, int
return errno_to_nv_status(nv_kthread_q_init(queue, name));
}
static NV_STATUS uvm_isr_init_access_counters(uvm_parent_gpu_t *parent_gpu, NvU32 notif_buf_index)
{
NV_STATUS status = NV_OK;
uvm_va_block_context_t *block_context;
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
uvm_sema_init(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, 1, UVM_LOCK_ORDER_ISR);
status = uvm_parent_gpu_init_access_counters(parent_gpu, notif_buf_index);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s, notif buf index: %u\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu),
notif_buf_index);
return status;
}
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_block_context)
return NV_ERR_NO_MEMORY;
block_context = uvm_va_block_context_alloc(NULL);
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer[notif_buf_index].batch_service_context.block_service_context.block_context =
block_context;
nv_kthread_q_item_init(&parent_gpu->isr.access_counters[notif_buf_index].bottom_half_q_item,
access_counters_isr_bottom_half_entry,
&parent_gpu->access_counter_buffer[notif_buf_index]);
// Access counters interrupts are initially disabled. They are
// dynamically enabled when the GPU is registered on a VA space.
parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count = 0;
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc_stats_cpu)
return NV_ERR_NO_MEMORY;
parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count =
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count) *
num_possible_cpus());
if (!parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count)
return NV_ERR_NO_MEMORY;
return NV_OK;
}
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
@ -316,7 +378,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->fault_buffer_info.replayable.block_service_context.block_context = block_context;
parent_gpu->fault_buffer.replayable.block_service_context.block_context = block_context;
parent_gpu->isr.replayable_faults.handling = true;
@ -344,7 +406,7 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context = block_context;
parent_gpu->fault_buffer.non_replayable.block_service_context.block_context = block_context;
parent_gpu->isr.non_replayable_faults.handling = true;
@ -361,32 +423,31 @@ NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu)
}
if (parent_gpu->access_counters_supported) {
status = uvm_parent_gpu_init_access_counters(parent_gpu);
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to initialize GPU access counters: %s, GPU: %s\n",
nvstatusToString(status),
uvm_parent_gpu_name(parent_gpu));
return status;
NvU32 index_count = parent_gpu->rm_info.accessCntrBufferCount;
NvU32 notif_buf_index;
UVM_ASSERT(index_count > 0);
if (uvm_enable_builtin_tests && parent_gpu->test.access_counters_alloc_buffer)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer = uvm_kvmalloc_zero(sizeof(*parent_gpu->access_counter_buffer) *
index_count);
if (!parent_gpu->access_counter_buffer)
return NV_ERR_NO_MEMORY;
if (uvm_enable_builtin_tests && parent_gpu->test.isr_access_counters_alloc)
return NV_ERR_NO_MEMORY;
parent_gpu->isr.access_counters = uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters) * index_count);
if (!parent_gpu->isr.access_counters)
return NV_ERR_NO_MEMORY;
for (notif_buf_index = 0; notif_buf_index < index_count; notif_buf_index++) {
status = uvm_isr_init_access_counters(parent_gpu, notif_buf_index);
if (status != NV_OK)
return status;
}
block_context = uvm_va_block_context_alloc(NULL);
if (!block_context)
return NV_ERR_NO_MEMORY;
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context =
block_context;
nv_kthread_q_item_init(&parent_gpu->isr.access_counters.bottom_half_q_item,
access_counters_isr_bottom_half_entry,
parent_gpu);
// Access counters interrupts are initially disabled. They are
// dynamically enabled when the GPU is registered on a VA space.
parent_gpu->isr.access_counters.handling_ref_count = 0;
parent_gpu->isr.access_counters.stats.cpu_exec_count =
uvm_kvmalloc_zero(sizeof(*parent_gpu->isr.access_counters.stats.cpu_exec_count) * num_possible_cpus());
if (!parent_gpu->isr.access_counters.stats.cpu_exec_count)
return NV_ERR_NO_MEMORY;
}
}
@ -401,7 +462,15 @@ void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu)
void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu)
{
UVM_ASSERT(parent_gpu->isr.access_counters.handling_ref_count == 0);
NvU32 notif_buf_index;
if (parent_gpu->isr.access_counters) {
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
UVM_ASSERT_MSG(parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count == 0,
"notif buf index: %u\n",
notif_buf_index);
}
}
// Now that the GPU is safely out of the global table, lock the GPU and mark
// it as no longer handling interrupts so the top half knows not to schedule
@ -459,24 +528,38 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu)
}
if (parent_gpu->access_counters_supported) {
// It is safe to deinitialize access counters even if they have not been
// successfully initialized.
uvm_parent_gpu_deinit_access_counters(parent_gpu);
block_context =
parent_gpu->access_counter_buffer_info.batch_service_context.block_service_context.block_context;
uvm_va_block_context_free(block_context);
NvU32 notif_buf_index;
for (notif_buf_index = 0; notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount; notif_buf_index++) {
// It is safe to deinitialize access counters even if they have not
// been successfully initialized.
uvm_parent_gpu_deinit_access_counters(parent_gpu, notif_buf_index);
if (parent_gpu->access_counter_buffer) {
uvm_access_counter_buffer_t *access_counter = &parent_gpu->access_counter_buffer[notif_buf_index];
block_context = access_counter->batch_service_context.block_service_context.block_context;
uvm_va_block_context_free(block_context);
}
if (parent_gpu->isr.access_counters)
uvm_kvfree(parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count);
}
uvm_kvfree(parent_gpu->isr.access_counters);
uvm_kvfree(parent_gpu->access_counter_buffer);
}
if (parent_gpu->non_replayable_faults_supported) {
block_context = parent_gpu->fault_buffer_info.non_replayable.block_service_context.block_context;
block_context = parent_gpu->fault_buffer.non_replayable.block_service_context.block_context;
uvm_va_block_context_free(block_context);
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
}
block_context = parent_gpu->fault_buffer_info.replayable.block_service_context.block_context;
block_context = parent_gpu->fault_buffer.replayable.block_service_context.block_context;
uvm_va_block_context_free(block_context);
uvm_kvfree(parent_gpu->isr.replayable_faults.stats.cpu_exec_count);
uvm_kvfree(parent_gpu->isr.non_replayable_faults.stats.cpu_exec_count);
uvm_kvfree(parent_gpu->isr.access_counters.stats.cpu_exec_count);
}
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu)
@ -584,25 +667,29 @@ static void non_replayable_faults_isr_bottom_half_entry(void *args)
static void access_counters_isr_bottom_half(void *args)
{
uvm_parent_gpu_t *parent_gpu = (uvm_parent_gpu_t *)args;
uvm_access_counter_buffer_t *access_counters = (uvm_access_counter_buffer_t *)args;
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
unsigned int cpu;
UVM_ASSERT(parent_gpu->access_counters_supported);
UVM_ASSERT(notif_buf_index < parent_gpu->rm_info.accessCntrBufferCount);
uvm_record_lock(&parent_gpu->isr.access_counters.service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
uvm_record_lock(&parent_gpu->isr.access_counters[notif_buf_index].service_lock, UVM_LOCK_FLAGS_MODE_SHARED);
// Multiple bottom halves for counter notifications can be running
// concurrently, but only one can be running this function for a given GPU
// since we enter with the access_counters_isr_lock held.
// concurrently, but only one per-notification-buffer (i.e.,
// notif_buf_index) can be running this function for a given GPU since we
// enter with the per-notification-buffer access_counters_isr_lock held.
cpu = get_cpu();
++parent_gpu->isr.access_counters.stats.bottom_half_count;
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters.stats.cpus_used_mask);
++parent_gpu->isr.access_counters.stats.cpu_exec_count[cpu];
++parent_gpu->isr.access_counters[notif_buf_index].stats.bottom_half_count;
cpumask_set_cpu(cpu, &parent_gpu->isr.access_counters[notif_buf_index].stats.cpus_used_mask);
++parent_gpu->isr.access_counters[notif_buf_index].stats.cpu_exec_count[cpu];
put_cpu();
uvm_parent_gpu_service_access_counters(parent_gpu);
uvm_service_access_counters(access_counters);
uvm_parent_gpu_access_counters_isr_unlock(parent_gpu);
uvm_access_counters_isr_unlock(access_counters);
uvm_parent_gpu_kref_put(parent_gpu);
}
@ -725,7 +812,7 @@ void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu)
// clear_replayable_faults is a no-op for architectures that don't
// support pulse-based interrupts.
parent_gpu->fault_buffer_hal->clear_replayable_faults(parent_gpu,
parent_gpu->fault_buffer_info.replayable.cached_get);
parent_gpu->fault_buffer.replayable.cached_get);
}
// This unlock call has to be out-of-order unlock due to interrupts_lock
@ -751,37 +838,41 @@ void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gp
uvm_up(&parent_gpu->isr.non_replayable_faults.service_lock);
}
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters)
{
// See comments in uvm_parent_gpu_replayable_faults_isr_lock
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
uvm_parent_gpu_access_counters_intr_disable(parent_gpu);
uvm_access_counters_intr_disable(access_counters);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
uvm_down(&parent_gpu->isr.access_counters.service_lock);
uvm_down(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
}
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_access_counter_buffer_hal_t *ac_hal = parent_gpu->access_counter_buffer_hal;
UVM_ASSERT(nv_kref_read(&parent_gpu->gpu_kref) > 0);
// See comments in uvm_parent_gpu_replayable_faults_isr_unlock
uvm_spin_lock_irqsave(&parent_gpu->isr.interrupts_lock);
uvm_parent_gpu_access_counters_intr_enable(parent_gpu);
uvm_access_counters_intr_enable(access_counters);
if (parent_gpu->isr.access_counters.handling_ref_count > 0) {
parent_gpu->access_counter_buffer_hal->clear_access_counter_notifications(parent_gpu,
parent_gpu->access_counter_buffer_info.cached_get);
}
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0)
ac_hal->clear_access_counter_notifications(access_counters, access_counters->cached_get);
// This unlock call has to be out-of-order unlock due to interrupts_lock
// still being held. Otherwise, it would result in a lock order violation.
uvm_up_out_of_order(&parent_gpu->isr.access_counters.service_lock);
uvm_up_out_of_order(&parent_gpu->isr.access_counters[notif_buf_index].service_lock);
uvm_spin_unlock_irqrestore(&parent_gpu->isr.interrupts_lock);
}
@ -806,8 +897,11 @@ static void uvm_parent_gpu_replayable_faults_intr_enable(uvm_parent_gpu_t *paren
parent_gpu->fault_buffer_hal->enable_replayable_faults(parent_gpu);
}
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
// The read of handling_ref_count could race with a write from
@ -815,24 +909,27 @@ void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu)
// ISR lock. But those functions are invoked with the interrupt disabled
// (disable_intr_ref_count > 0), so the check always returns false when the
// race occurs
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(parent_gpu);
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->disable_access_counter_notifications(access_counters);
}
++parent_gpu->isr.access_counters.disable_intr_ref_count;
++parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
}
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu)
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters)
{
uvm_parent_gpu_t *parent_gpu = access_counters->parent_gpu;
NvU32 notif_buf_index = access_counters->index;
uvm_assert_spinlock_locked(&parent_gpu->isr.interrupts_lock);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters.service_lock));
UVM_ASSERT(parent_gpu->isr.access_counters.disable_intr_ref_count > 0);
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.access_counters[notif_buf_index].service_lock));
UVM_ASSERT(parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count > 0);
--parent_gpu->isr.access_counters.disable_intr_ref_count;
--parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count;
if (parent_gpu->isr.access_counters.handling_ref_count > 0 &&
parent_gpu->isr.access_counters.disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(parent_gpu);
if (parent_gpu->isr.access_counters[notif_buf_index].handling_ref_count > 0 &&
parent_gpu->isr.access_counters[notif_buf_index].disable_intr_ref_count == 0) {
parent_gpu->access_counter_buffer_hal->enable_access_counter_notifications(access_counters);
}
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -70,8 +70,8 @@ typedef struct
struct
{
// Number of the bottom-half invocations for this interrupt on a GPU over
// its lifetime
// Number of the bottom-half invocations for this interrupt on a GPU
// over its lifetime.
NvU64 bottom_half_count;
// A bitmask of the CPUs on which the bottom half has executed. The
@ -110,20 +110,20 @@ typedef struct
// bottom-half per interrupt type.
nv_kthread_q_t bottom_half_q;
// Protects the state of interrupts (enabled/disabled) and whether the GPU is
// currently handling them. Taken in both interrupt and process context.
// Protects the state of interrupts (enabled/disabled) and whether the GPU
// is currently handling them. Taken in both interrupt and process context.
uvm_spinlock_irqsave_t interrupts_lock;
uvm_intr_handler_t replayable_faults;
uvm_intr_handler_t non_replayable_faults;
uvm_intr_handler_t access_counters;
uvm_intr_handler_t *access_counters;
// Kernel thread used to kill channels on fatal non-replayable faults.
// This is needed because we cannot call into RM from the bottom-half to
// avoid deadlocks.
nv_kthread_q_t kill_channel_q;
// Number of top-half ISRs called for this GPU over its lifetime
// Number of top-half ISRs called for this GPU over its lifetime.
NvU64 interrupt_count;
} uvm_isr_info_t;
@ -133,7 +133,7 @@ NV_STATUS uvm_isr_top_half_entry(const NvProcessorUuid *gpu_uuid);
// Initialize ISR handling state
NV_STATUS uvm_parent_gpu_init_isr(uvm_parent_gpu_t *parent_gpu);
// Flush any currently scheduled bottom halves. This is called during GPU
// Flush any currently scheduled bottom halves. This is called during GPU
// removal.
void uvm_parent_gpu_flush_bottom_halves(uvm_parent_gpu_t *parent_gpu);
@ -146,7 +146,7 @@ void uvm_parent_gpu_disable_isr(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
// Take parent_gpu->isr.replayable_faults.service_lock from a non-top/bottom
// half thread. This will also disable replayable page fault interrupts (if
// half thread. This will also disable replayable page fault interrupts (if
// supported by the GPU) because the top half attempts to take this lock, and we
// would cause an interrupt storm if we didn't disable them first.
//
@ -154,49 +154,48 @@ void uvm_parent_gpu_deinit_isr(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
// Unlock parent_gpu->isr.replayable_faults.service_lock. This call may
// re-enable replayable page fault interrupts. Unlike
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only called from
// re-enable replayable page fault interrupts. Unlike
// uvm_parent_gpu_replayable_faults_isr_lock(), which should only be called from
// non-top/bottom half threads, this can be called by any thread.
void uvm_parent_gpu_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// Lock/unlock routines for non-replayable faults. These do not need to prevent
// interrupt storms since the GPU fault buffers for non-replayable faults are
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
// managed by RM. Unlike uvm_parent_gpu_replayable_faults_isr_lock, no GPUs
// under the parent need to have been previously retained.
void uvm_parent_gpu_non_replayable_faults_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_non_replayable_faults_isr_unlock(uvm_parent_gpu_t *parent_gpu);
// See uvm_parent_gpu_replayable_faults_isr_lock/unlock
void uvm_parent_gpu_access_counters_isr_lock(uvm_parent_gpu_t *parent_gpu);
void uvm_parent_gpu_access_counters_isr_unlock(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_isr_lock(uvm_access_counter_buffer_t *access_counters);
void uvm_access_counters_isr_unlock(uvm_access_counter_buffer_t *access_counters);
// Increments the reference count tracking whether access counter interrupts
// should be disabled. The caller is guaranteed that access counter interrupts
// are disabled upon return. Interrupts might already be disabled prior to
// making this call. Each call is ref-counted, so this must be paired with a
// call to uvm_parent_gpu_access_counters_intr_enable().
// call to uvm_access_counters_intr_enable().
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_parent_gpu_access_counters_intr_disable(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_intr_disable(uvm_access_counter_buffer_t *access_counters);
// Decrements the reference count tracking whether access counter interrupts
// should be disabled. Only once the count reaches 0 are the HW interrupts
// actually enabled, so this call does not guarantee that the interrupts have
// been re-enabled upon return.
//
// uvm_parent_gpu_access_counters_intr_disable() must have been called prior to
// calling this function.
// uvm_access_counters_intr_disable() must have been called prior to calling
// this function.
//
// NOTE: For pulse-based interrupts, the caller is responsible for re-arming
// the interrupt.
//
// parent_gpu->isr.interrupts_lock must be held to call this function.
void uvm_parent_gpu_access_counters_intr_enable(uvm_parent_gpu_t *parent_gpu);
void uvm_access_counters_intr_enable(uvm_access_counter_buffer_t *access_counters);
// Return the first valid GPU given the parent GPU or NULL if no MIG instances
// are registered. This should only be called from bottom halves or if the
// g_uvm_global.global_lock is held so that the returned pointer remains valid.
//
uvm_gpu_t *uvm_parent_gpu_find_first_valid_gpu(uvm_parent_gpu_t *parent_gpu);
#endif // __UVM_GPU_ISR_H__

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -119,18 +119,18 @@
// calling uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults on failure.
NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
non_replayable_faults->shadow_buffer_copy = NULL;
non_replayable_faults->fault_cache = NULL;
non_replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize /
non_replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize /
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
non_replayable_faults->shadow_buffer_copy =
uvm_kvmalloc_zero(parent_gpu->fault_buffer_info.rm_info.nonReplayable.bufferSize);
uvm_kvmalloc_zero(parent_gpu->fault_buffer.rm_info.nonReplayable.bufferSize);
if (!non_replayable_faults->shadow_buffer_copy)
return NV_ERR_NO_MEMORY;
@ -147,7 +147,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init_non_replayable_faults(uvm_parent_gpu_
void uvm_parent_gpu_fault_buffer_deinit_non_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
if (non_replayable_faults->fault_cache) {
UVM_ASSERT(uvm_tracker_is_empty(&non_replayable_faults->clear_faulted_tracker));
@ -170,7 +170,7 @@ bool uvm_parent_gpu_non_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->isr.non_replayable_faults.handling);
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
status = nvUvmInterfaceHasPendingNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
&has_pending_faults);
UVM_ASSERT(status == NV_OK);
@ -182,14 +182,14 @@ static NV_STATUS fetch_non_replayable_fault_buffer_entries(uvm_parent_gpu_t *par
NV_STATUS status;
NvU32 i;
NvU32 entry_size = parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
char *current_hw_entry = (char *)non_replayable_faults->shadow_buffer_copy;
uvm_fault_buffer_entry_t *fault_entry = non_replayable_faults->fault_cache;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.non_replayable_faults.service_lock));
UVM_ASSERT(parent_gpu->non_replayable_faults_supported);
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer_info.rm_info,
status = nvUvmInterfaceGetNonReplayableFaults(&parent_gpu->fault_buffer.rm_info,
current_hw_entry,
cached_faults);
@ -267,7 +267,7 @@ static NV_STATUS clear_faulted_method_on_gpu(uvm_user_channel_t *user_channel,
uvm_gpu_t *gpu = user_channel->gpu;
NV_STATUS status;
uvm_push_t push;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
UVM_ASSERT(!fault_entry->is_fatal);
@ -355,7 +355,7 @@ static NV_STATUS service_managed_fault_in_block_locked(uvm_va_block_t *va_block,
uvm_processor_id_t new_residency;
bool read_duplicate;
uvm_va_space_t *va_space = uvm_va_block_get_va_space(va_block);
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
const uvm_va_policy_t *policy;
UVM_ASSERT(!fault_entry->is_fatal);
@ -450,7 +450,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
NV_STATUS status, tracker_status;
uvm_va_block_retry_t va_block_retry;
uvm_gpu_t *gpu = fault_entry->gpu;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.non_replayable.block_service_context;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.non_replayable.block_service_context;
service_context->operation = UVM_SERVICE_OPERATION_NON_REPLAYABLE_FAULTS;
service_context->num_retries = 0;
@ -467,7 +467,7 @@ static NV_STATUS service_managed_fault_in_block(uvm_va_block_t *va_block,
service_context,
hmm_migratable));
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer_info.non_replayable.fault_service_tracker,
tracker_status = uvm_tracker_add_tracker_safe(&gpu->parent->fault_buffer.non_replayable.fault_service_tracker,
&va_block->tracker);
uvm_mutex_unlock(&va_block->lock);
@ -507,7 +507,7 @@ static void schedule_kill_channel(uvm_fault_buffer_entry_t *fault_entry, uvm_use
{
uvm_va_space_t *va_space = fault_entry->va_space;
uvm_parent_gpu_t *parent_gpu = fault_entry->gpu->parent;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
void *packet = (char *)non_replayable_faults->shadow_buffer_copy +
(fault_entry->non_replayable.buffer_index * parent_gpu->fault_buffer_hal->entry_size(parent_gpu));
@ -551,7 +551,7 @@ static NV_STATUS service_non_managed_fault(uvm_gpu_va_space_t *gpu_va_space,
{
uvm_va_space_t *va_space = gpu_va_space->va_space;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &gpu->parent->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &gpu->parent->fault_buffer.non_replayable;
uvm_ats_fault_invalidate_t *ats_invalidate = &non_replayable_faults->ats_invalidate;
NV_STATUS status = lookup_status;
NV_STATUS fatal_fault_status = NV_ERR_INVALID_ADDRESS;
@ -649,7 +649,7 @@ static NV_STATUS service_fault_once(uvm_parent_gpu_t *parent_gpu,
struct mm_struct *mm;
uvm_gpu_va_space_t *gpu_va_space;
uvm_gpu_t *gpu;
uvm_non_replayable_fault_buffer_info_t *non_replayable_faults = &parent_gpu->fault_buffer_info.non_replayable;
uvm_non_replayable_fault_buffer_t *non_replayable_faults = &parent_gpu->fault_buffer.non_replayable;
uvm_va_block_context_t *va_block_context = non_replayable_faults->block_service_context.block_context;
status = uvm_parent_gpu_fault_entry_to_va_space(parent_gpu,
@ -757,7 +757,7 @@ exit_no_channel:
static NV_STATUS service_fault(uvm_parent_gpu_t *parent_gpu, uvm_fault_buffer_entry_t *fault_entry)
{
uvm_service_block_context_t *service_context =
&parent_gpu->fault_buffer_info.non_replayable.block_service_context;
&parent_gpu->fault_buffer.non_replayable.block_service_context;
NV_STATUS status;
bool hmm_migratable = true;
@ -794,7 +794,7 @@ void uvm_parent_gpu_service_non_replayable_fault_buffer(uvm_parent_gpu_t *parent
// non-replayable faults since getting multiple faults on the same
// memory region is not very likely
for (i = 0; i < cached_faults; ++i) {
status = service_fault(parent_gpu, &parent_gpu->fault_buffer_info.non_replayable.fault_cache[i]);
status = service_fault(parent_gpu, &parent_gpu->fault_buffer.non_replayable.fault_cache[i]);
if (status != NV_OK)
return;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -119,7 +119,7 @@ module_param(uvm_perf_fault_coalesce, uint, S_IRUGO);
// the power management resume path.
static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
// Read the current get/put pointers, as this might not be the first time
// we take control of the fault buffer since the GPU was initialized,
@ -129,7 +129,7 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
replayable_faults->cached_put = parent_gpu->fault_buffer_hal->read_put(parent_gpu);
// (Re-)enable fault prefetching
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled)
if (parent_gpu->fault_buffer.prefetch_faults_enabled)
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
else
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
@ -140,28 +140,28 @@ static void fault_buffer_reinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
NV_STATUS status = NV_OK;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
UVM_ASSERT(parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize %
UVM_ASSERT(parent_gpu->fault_buffer.rm_info.replayable.bufferSize %
parent_gpu->fault_buffer_hal->entry_size(parent_gpu) == 0);
replayable_faults->max_faults = parent_gpu->fault_buffer_info.rm_info.replayable.bufferSize /
replayable_faults->max_faults = parent_gpu->fault_buffer.rm_info.replayable.bufferSize /
parent_gpu->fault_buffer_hal->entry_size(parent_gpu);
// Check provided module parameter value
parent_gpu->fault_buffer_info.max_batch_size = max(uvm_perf_fault_batch_count,
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
parent_gpu->fault_buffer_info.max_batch_size = min(parent_gpu->fault_buffer_info.max_batch_size,
replayable_faults->max_faults);
parent_gpu->fault_buffer.max_batch_size = max(uvm_perf_fault_batch_count,
(NvU32)UVM_PERF_FAULT_BATCH_COUNT_MIN);
parent_gpu->fault_buffer.max_batch_size = min(parent_gpu->fault_buffer.max_batch_size,
replayable_faults->max_faults);
if (parent_gpu->fault_buffer_info.max_batch_size != uvm_perf_fault_batch_count) {
pr_info("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_batch_count,
UVM_PERF_FAULT_BATCH_COUNT_MIN,
replayable_faults->max_faults,
parent_gpu->fault_buffer_info.max_batch_size);
if (parent_gpu->fault_buffer.max_batch_size != uvm_perf_fault_batch_count) {
UVM_INFO_PRINT("Invalid uvm_perf_fault_batch_count value on GPU %s: %u. Valid range [%u:%u] Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_batch_count,
UVM_PERF_FAULT_BATCH_COUNT_MIN,
replayable_faults->max_faults,
parent_gpu->fault_buffer.max_batch_size);
}
batch_context->fault_cache = uvm_kvmalloc_zero(replayable_faults->max_faults * sizeof(*batch_context->fault_cache));
@ -198,22 +198,22 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
UVM_PERF_FAULT_REPLAY_POLICY_DEFAULT;
if (replayable_faults->replay_policy != uvm_perf_fault_replay_policy) {
pr_info("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_policy,
replayable_faults->replay_policy);
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_policy value on GPU %s: %d. Using %d instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_policy,
replayable_faults->replay_policy);
}
replayable_faults->replay_update_put_ratio = min(uvm_perf_fault_replay_update_put_ratio, 100u);
if (replayable_faults->replay_update_put_ratio != uvm_perf_fault_replay_update_put_ratio) {
pr_info("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_update_put_ratio,
replayable_faults->replay_update_put_ratio);
UVM_INFO_PRINT("Invalid uvm_perf_fault_replay_update_put_ratio value on GPU %s: %u. Using %u instead\n",
uvm_parent_gpu_name(parent_gpu),
uvm_perf_fault_replay_update_put_ratio,
replayable_faults->replay_update_put_ratio);
}
// Re-enable fault prefetching just in case it was disabled in a previous run
parent_gpu->fault_buffer_info.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
parent_gpu->fault_buffer.prefetch_faults_enabled = parent_gpu->prefetch_fault_supported;
fault_buffer_reinit_replayable_faults(parent_gpu);
@ -222,7 +222,7 @@ static NV_STATUS fault_buffer_init_replayable_faults(uvm_parent_gpu_t *parent_gp
static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
if (batch_context->fault_cache) {
@ -230,9 +230,9 @@ static void fault_buffer_deinit_replayable_faults(uvm_parent_gpu_t *parent_gpu)
uvm_tracker_deinit(&replayable_faults->replay_tracker);
}
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
// Re-enable prefetch faults in case we disabled them
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer_info.prefetch_faults_enabled)
if (parent_gpu->prefetch_fault_supported && !parent_gpu->fault_buffer.prefetch_faults_enabled)
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
}
@ -252,7 +252,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->replayable_faults_supported);
status = uvm_rm_locked_call(nvUvmInterfaceInitFaultInfo(parent_gpu->rm_device,
&parent_gpu->fault_buffer_info.rm_info));
&parent_gpu->fault_buffer.rm_info));
if (status != NV_OK) {
UVM_ERR_PRINT("Failed to init fault buffer info from RM: %s, GPU %s\n",
nvstatusToString(status),
@ -262,7 +262,7 @@ NV_STATUS uvm_parent_gpu_fault_buffer_init(uvm_parent_gpu_t *parent_gpu)
// when it returns an error. Set the buffer handle to zero as it is
// used by the deinitialization logic to determine if it was correctly
// initialized.
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
goto fail;
}
@ -304,24 +304,25 @@ void uvm_parent_gpu_fault_buffer_deinit(uvm_parent_gpu_t *parent_gpu)
fault_buffer_deinit_replayable_faults(parent_gpu);
if (parent_gpu->fault_buffer_info.rm_info.faultBufferHandle) {
if (parent_gpu->fault_buffer.rm_info.faultBufferHandle) {
status = uvm_rm_locked_call(nvUvmInterfaceOwnPageFaultIntr(parent_gpu->rm_device, NV_FALSE));
UVM_ASSERT(status == NV_OK);
uvm_rm_locked_call_void(nvUvmInterfaceDestroyFaultInfo(parent_gpu->rm_device,
&parent_gpu->fault_buffer_info.rm_info));
&parent_gpu->fault_buffer.rm_info));
parent_gpu->fault_buffer_info.rm_info.faultBufferHandle = 0;
parent_gpu->fault_buffer.rm_info.faultBufferHandle = 0;
}
}
bool uvm_parent_gpu_replayable_faults_pending(uvm_parent_gpu_t *parent_gpu)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
UVM_ASSERT(parent_gpu->replayable_faults_supported);
// Fast path 1: we left some faults unserviced in the buffer in the last pass
// Fast path 1: we left some faults unserviced in the buffer in the last
// pass
if (replayable_faults->cached_get != replayable_faults->cached_put)
return true;
@ -357,7 +358,7 @@ static NV_STATUS push_cancel_on_gpu(uvm_gpu_t *gpu,
{
NV_STATUS status;
uvm_push_t push;
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer_info.replayable.replay_tracker;
uvm_tracker_t *replay_tracker = &gpu->parent->fault_buffer.replayable.replay_tracker;
UVM_ASSERT(tracker != NULL);
@ -443,7 +444,7 @@ static NV_STATUS cancel_fault_precise_va(uvm_fault_buffer_entry_t *fault_entry,
uvm_gpu_t *gpu = fault_entry->gpu;
uvm_gpu_phys_address_t pdb;
uvm_push_t push;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
NvU64 offset;
UVM_ASSERT(gpu->parent->replayable_faults_supported);
@ -505,7 +506,7 @@ static NV_STATUS push_replay_on_gpu(uvm_gpu_t *gpu,
{
NV_STATUS status;
uvm_push_t push;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_tracker_t *tracker = NULL;
if (batch_context)
@ -556,7 +557,7 @@ static NV_STATUS push_replay_on_parent_gpu(uvm_parent_gpu_t *parent_gpu,
static void write_get(uvm_parent_gpu_t *parent_gpu, NvU32 get)
{
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@ -589,7 +590,7 @@ static NV_STATUS hw_fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu, hw_f
return NV_OK;
is_flush_mode_move = (NvBool) (flush_mode == HW_FAULT_BUFFER_FLUSH_MODE_MOVE);
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer_info.rm_info, is_flush_mode_move);
status = nvUvmInterfaceFlushReplayableFaultBuffer(&parent_gpu->fault_buffer.rm_info, is_flush_mode_move);
UVM_ASSERT(status == NV_OK);
@ -618,7 +619,7 @@ static NV_STATUS fault_buffer_flush_locked(uvm_parent_gpu_t *parent_gpu,
NvU32 get;
NvU32 put;
uvm_spin_loop_t spin;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
NV_STATUS status;
UVM_ASSERT(uvm_sem_is_locked(&parent_gpu->isr.replayable_faults.service_lock));
@ -852,7 +853,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
uvm_fault_buffer_entry_t *fault_cache;
uvm_spin_loop_t spin;
NV_STATUS status = NV_OK;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
const bool in_pascal_cancel_path = (!parent_gpu->fault_cancel_va_supported && fetch_mode == FAULT_FETCH_MODE_ALL);
const bool may_filter = uvm_perf_fault_coalesce && !in_pascal_cancel_path;
@ -887,7 +888,7 @@ static NV_STATUS fetch_fault_buffer_entries(uvm_parent_gpu_t *parent_gpu,
// Parse until get != put and have enough space to cache.
while ((get != put) &&
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer_info.max_batch_size)) {
(fetch_mode == FAULT_FETCH_MODE_ALL || fault_index < parent_gpu->fault_buffer.max_batch_size)) {
bool is_same_instance_ptr = true;
uvm_fault_buffer_entry_t *current_entry = &fault_cache[fault_index];
uvm_fault_utlb_info_t *current_tlb;
@ -1385,7 +1386,7 @@ static NV_STATUS service_fault_batch_block_locked(uvm_gpu_t *gpu,
uvm_page_index_t last_page_index;
NvU32 page_fault_count = 0;
uvm_range_group_range_iter_t iter;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_fault_buffer_entry_t **ordered_fault_cache = batch_context->ordered_fault_cache;
uvm_fault_buffer_entry_t *first_fault_entry = ordered_fault_cache[first_fault_index];
uvm_service_block_context_t *block_context = &replayable_faults->block_service_context;
@ -1612,7 +1613,7 @@ static NV_STATUS service_fault_batch_block(uvm_gpu_t *gpu,
NV_STATUS status;
uvm_va_block_retry_t va_block_retry;
NV_STATUS tracker_status;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_service_block_context_t *fault_block_context = &replayable_faults->block_service_context;
fault_block_context->operation = UVM_SERVICE_OPERATION_REPLAYABLE_FAULTS;
@ -1803,7 +1804,7 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
uvm_page_mask_t *prefetch_only_fault_mask = &ats_context->faults.prefetch_only_fault_mask;
uvm_gpu_t *gpu = gpu_va_space->gpu;
bool replay_per_va_block =
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK);
UVM_ASSERT(vma);
@ -1851,8 +1852,8 @@ static NV_STATUS service_fault_batch_ats_sub(uvm_gpu_va_space_t *gpu_va_space,
page_index = (fault_address - sub_batch_base) / PAGE_SIZE;
// Do not check for coalesced access type. If there are multiple different
// accesses to an address, we can disregard the prefetch one.
// Do not check for coalesced access type. If there are multiple
// different accesses to an address, we can disregard the prefetch one.
if ((access_type == UVM_FAULT_ACCESS_TYPE_PREFETCH) &&
(uvm_fault_access_type_mask_highest(current_entry->access_type_mask) == UVM_FAULT_ACCESS_TYPE_PREFETCH))
uvm_page_mask_set(prefetch_only_fault_mask, page_index);
@ -1956,7 +1957,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
uvm_va_block_t *va_block;
uvm_gpu_t *gpu = gpu_va_space->gpu;
uvm_va_block_context_t *va_block_context =
gpu->parent->fault_buffer_info.replayable.block_service_context.block_context;
gpu->parent->fault_buffer.replayable.block_service_context.block_context;
uvm_fault_buffer_entry_t *current_entry = batch_context->ordered_fault_cache[fault_index];
struct mm_struct *mm = va_block_context->mm;
NvU64 fault_address = current_entry->fault_address;
@ -1985,7 +1986,7 @@ static NV_STATUS service_fault_batch_dispatch(uvm_va_space_t *va_space,
NvU64 outer = ~0ULL;
UVM_ASSERT(replay_per_va_block ==
(gpu->parent->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
(gpu->parent->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK));
// Limit outer to the minimum of next va_range.start and first
// fault_address' next UVM_GMMU_ATS_GRANULARITY alignment so that it's
@ -2046,8 +2047,8 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
uvm_gpu_t *gpu = batch_context->fatal_gpu;
uvm_gpu_va_space_t *gpu_va_space = NULL;
struct mm_struct *mm;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer_info.replayable.block_service_context;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
uvm_service_block_context_t *service_context = &gpu->parent->fault_buffer.replayable.block_service_context;
uvm_va_block_context_t *va_block_context = service_context->block_context;
UVM_ASSERT(va_space);
@ -2155,7 +2156,7 @@ static NV_STATUS service_fault_batch_for_cancel(uvm_fault_service_batch_context_
++i;
}
else {
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer_info.replayable.ats_invalidate;
uvm_ats_fault_invalidate_t *ats_invalidate = &gpu->parent->fault_buffer.replayable.ats_invalidate;
NvU32 block_faults;
const bool hmm_migratable = true;
@ -2236,12 +2237,12 @@ static NV_STATUS service_fault_batch(uvm_parent_gpu_t *parent_gpu,
NvU32 i;
uvm_va_space_t *va_space = NULL;
uvm_gpu_va_space_t *prev_gpu_va_space = NULL;
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer_info.replayable.ats_invalidate;
uvm_ats_fault_invalidate_t *ats_invalidate = &parent_gpu->fault_buffer.replayable.ats_invalidate;
struct mm_struct *mm = NULL;
const bool replay_per_va_block = service_mode != FAULT_SERVICE_MODE_CANCEL &&
parent_gpu->fault_buffer_info.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
parent_gpu->fault_buffer.replayable.replay_policy == UVM_PERF_FAULT_REPLAY_POLICY_BLOCK;
uvm_service_block_context_t *service_context =
&parent_gpu->fault_buffer_info.replayable.block_service_context;
&parent_gpu->fault_buffer.replayable.block_service_context;
uvm_va_block_context_t *va_block_context = service_context->block_context;
bool hmm_migratable = true;
@ -2711,8 +2712,9 @@ static void cancel_fault_batch(uvm_parent_gpu_t *parent_gpu,
// 5- Fetch all faults from buffer
// 6- Check what uTLBs are in lockdown mode and can be cancelled
// 7- Preprocess faults (order per va_space, fault address, access type)
// 8- Service all non-fatal faults and mark all non-serviceable faults as fatal
// 6.1- If fatal faults are not found, we are done
// 8- Service all non-fatal faults and mark all non-serviceable faults as
// fatal.
// 8.1- If fatal faults are not found, we are done
// 9- Search for a uTLB which can be targeted for cancel, as described in
// try_to_cancel_utlbs. If found, cancel it.
// END LOOP
@ -2726,14 +2728,14 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
{
NV_STATUS status;
NV_STATUS tracker_status;
uvm_replayable_fault_buffer_info_t *replayable_faults = &gpu->parent->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &gpu->parent->fault_buffer.replayable;
bool first = true;
UVM_ASSERT(gpu->parent->replayable_faults_supported);
// 1) Disable prefetching to avoid new requests keep coming and flooding
// the buffer
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
gpu->parent->arch_hal->disable_prefetch_faults(gpu->parent);
while (1) {
@ -2847,7 +2849,7 @@ static NV_STATUS cancel_faults_precise_tlb(uvm_gpu_t *gpu, uvm_fault_service_bat
}
// 10) Re-enable prefetching
if (gpu->parent->fault_buffer_info.prefetch_faults_enabled)
if (gpu->parent->fault_buffer.prefetch_faults_enabled)
gpu->parent->arch_hal->enable_prefetch_faults(gpu->parent);
if (status == NV_OK)
@ -2884,16 +2886,16 @@ static void enable_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu,
// comment in mark_fault_invalid_prefetch(..).
// Some tests rely on this logic (and ratio) to correctly disable prefetch
// fault reporting. If the logic changes, the tests will have to be changed.
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled &&
if (parent_gpu->fault_buffer.prefetch_faults_enabled &&
uvm_perf_reenable_prefetch_faults_lapse_msec > 0 &&
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer_info.max_batch_size * 2) ||
((batch_context->num_invalid_prefetch_faults * 3 > parent_gpu->fault_buffer.max_batch_size * 2) ||
(uvm_enable_builtin_tests &&
parent_gpu->rm_info.isSimulated &&
batch_context->num_invalid_prefetch_faults > 5))) {
uvm_parent_gpu_disable_prefetch_faults(parent_gpu);
}
else if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp;
else if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
NvU64 lapse = NV_GETTIME() - parent_gpu->fault_buffer.disable_prefetch_faults_timestamp;
// Reenable prefetch faults after some time
if (lapse > ((NvU64)uvm_perf_reenable_prefetch_faults_lapse_msec * (1000 * 1000)))
@ -2907,7 +2909,7 @@ void uvm_parent_gpu_service_replayable_faults(uvm_parent_gpu_t *parent_gpu)
NvU32 num_batches = 0;
NvU32 num_throttled = 0;
NV_STATUS status = NV_OK;
uvm_replayable_fault_buffer_info_t *replayable_faults = &parent_gpu->fault_buffer_info.replayable;
uvm_replayable_fault_buffer_t *replayable_faults = &parent_gpu->fault_buffer.replayable;
uvm_fault_service_batch_context_t *batch_context = &replayable_faults->batch_service_context;
UVM_ASSERT(parent_gpu->replayable_faults_supported);
@ -3030,9 +3032,9 @@ void uvm_parent_gpu_enable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
if (!parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
if (!parent_gpu->fault_buffer.prefetch_faults_enabled) {
parent_gpu->arch_hal->enable_prefetch_faults(parent_gpu);
parent_gpu->fault_buffer_info.prefetch_faults_enabled = true;
parent_gpu->fault_buffer.prefetch_faults_enabled = true;
}
}
@ -3041,10 +3043,10 @@ void uvm_parent_gpu_disable_prefetch_faults(uvm_parent_gpu_t *parent_gpu)
UVM_ASSERT(parent_gpu->isr.replayable_faults.handling);
UVM_ASSERT(parent_gpu->prefetch_fault_supported);
if (parent_gpu->fault_buffer_info.prefetch_faults_enabled) {
if (parent_gpu->fault_buffer.prefetch_faults_enabled) {
parent_gpu->arch_hal->disable_prefetch_faults(parent_gpu);
parent_gpu->fault_buffer_info.prefetch_faults_enabled = false;
parent_gpu->fault_buffer_info.disable_prefetch_faults_timestamp = NV_GETTIME();
parent_gpu->fault_buffer.prefetch_faults_enabled = false;
parent_gpu->fault_buffer.disable_prefetch_faults_timestamp = NV_GETTIME();
}
}

View File

@ -217,7 +217,6 @@ static uvm_hal_class_ops_t host_table[] =
.clear_faulted_channel_method = uvm_hal_maxwell_host_clear_faulted_channel_method_unsupported,
.clear_faulted_channel_register = uvm_hal_maxwell_host_clear_faulted_channel_register_unsupported,
.access_counter_clear_all = uvm_hal_maxwell_access_counter_clear_all_unsupported,
.access_counter_clear_type = uvm_hal_maxwell_access_counter_clear_type_unsupported,
.access_counter_clear_targeted = uvm_hal_maxwell_access_counter_clear_targeted_unsupported,
.get_time = uvm_hal_maxwell_get_time,
}
@ -254,9 +253,6 @@ static uvm_hal_class_ops_t host_table[] =
.replay_faults = uvm_hal_volta_replay_faults,
.cancel_faults_va = uvm_hal_volta_cancel_faults_va,
.clear_faulted_channel_method = uvm_hal_volta_host_clear_faulted_channel_method,
.access_counter_clear_all = uvm_hal_volta_access_counter_clear_all,
.access_counter_clear_type = uvm_hal_volta_access_counter_clear_type,
.access_counter_clear_targeted = uvm_hal_volta_access_counter_clear_targeted,
.semaphore_timestamp = uvm_hal_volta_host_semaphore_timestamp,
}
},
@ -271,6 +267,8 @@ static uvm_hal_class_ops_t host_table[] =
.tlb_invalidate_all = uvm_hal_turing_host_tlb_invalidate_all,
.tlb_invalidate_va = uvm_hal_turing_host_tlb_invalidate_va,
.tlb_invalidate_test = uvm_hal_turing_host_tlb_invalidate_test,
.access_counter_clear_all = uvm_hal_turing_access_counter_clear_all,
.access_counter_clear_targeted = uvm_hal_turing_access_counter_clear_targeted,
}
},
{
@ -537,22 +535,19 @@ static uvm_hal_class_ops_t access_counter_buffer_table[] =
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GP100,
.u.access_counter_buffer_ops = {
.enable_access_counter_notifications = uvm_hal_volta_enable_access_counter_notifications,
.disable_access_counter_notifications = uvm_hal_volta_disable_access_counter_notifications,
.clear_access_counter_notifications = uvm_hal_volta_clear_access_counter_notifications,
.parse_entry = uvm_hal_volta_access_counter_buffer_parse_entry,
.entry_is_valid = uvm_hal_volta_access_counter_buffer_entry_is_valid,
.entry_clear_valid = uvm_hal_volta_access_counter_buffer_entry_clear_valid,
.entry_size = uvm_hal_volta_access_counter_buffer_entry_size,
}
.u.access_counter_buffer_ops = {}
},
{
.id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_TU100,
.parent_id = NV2080_CTRL_MC_ARCH_INFO_ARCHITECTURE_GV100,
.u.access_counter_buffer_ops = {
.enable_access_counter_notifications = uvm_hal_turing_enable_access_counter_notifications,
.disable_access_counter_notifications = uvm_hal_turing_disable_access_counter_notifications,
.clear_access_counter_notifications = uvm_hal_turing_clear_access_counter_notifications,
.parse_entry = uvm_hal_turing_access_counter_buffer_parse_entry,
.entry_is_valid = uvm_hal_turing_access_counter_buffer_entry_is_valid,
.entry_clear_valid = uvm_hal_turing_access_counter_buffer_entry_clear_valid,
.entry_size = uvm_hal_turing_access_counter_buffer_entry_size,
}
},
{
@ -843,10 +838,8 @@ static void hal_override_properties(uvm_parent_gpu_t *parent_gpu)
// Computing.
//
// TODO: Bug 200692962: Add support for access counters in vGPU
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled) {
if ((parent_gpu->virt_mode != UVM_VIRT_MODE_NONE) || g_uvm_global.conf_computing_enabled)
parent_gpu->access_counters_supported = false;
parent_gpu->access_counters_can_use_physical_addresses = false;
}
}
void uvm_hal_init_properties(uvm_parent_gpu_t *parent_gpu)
@ -1042,36 +1035,15 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry)
UVM_DBG_PRINT(" timestamp: %llu\n", entry->timestamp);
}
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type)
{
BUILD_BUG_ON(UVM_ACCESS_COUNTER_TYPE_MAX != 2);
switch (access_counter_type) {
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MIMC);
UVM_ENUM_STRING_CASE(UVM_ACCESS_COUNTER_TYPE_MOMC);
UVM_ENUM_STRING_DEFAULT();
}
}
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry)
{
if (!entry->address.is_virtual) {
UVM_DBG_PRINT("physical address: {0x%llx:%s}\n",
entry->address.address,
uvm_aperture_string(entry->address.aperture));
}
else {
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address.address);
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
entry->virtual_info.instance_ptr.address,
uvm_aperture_string(entry->virtual_info.instance_ptr.aperture));
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->virtual_info.mmu_engine_type));
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->virtual_info.mmu_engine_id);
UVM_DBG_PRINT(" ve_id %u\n", entry->virtual_info.ve_id);
}
UVM_DBG_PRINT(" is_virtual %u\n", entry->address.is_virtual);
UVM_DBG_PRINT(" counter_type %s\n", uvm_access_counter_type_string(entry->counter_type));
UVM_DBG_PRINT("virtual address: 0x%llx\n", entry->address);
UVM_DBG_PRINT(" instance_ptr {0x%llx:%s}\n",
entry->instance_ptr.address,
uvm_aperture_string(entry->instance_ptr.aperture));
UVM_DBG_PRINT(" mmu_engine_type %s\n", uvm_mmu_engine_type_string(entry->mmu_engine_type));
UVM_DBG_PRINT(" mmu_engine_id %u\n", entry->mmu_engine_id);
UVM_DBG_PRINT(" ve_id %u\n", entry->ve_id);
UVM_DBG_PRINT(" counter_value %u\n", entry->counter_value);
UVM_DBG_PRINT(" subgranularity 0x%08x\n", entry->sub_granularity);
UVM_DBG_PRINT(" bank %u\n", entry->bank);

View File

@ -686,54 +686,52 @@ void uvm_hal_print_fault_entry(const uvm_fault_buffer_entry_t *entry);
void uvm_hal_print_access_counter_buffer_entry(const uvm_access_counter_buffer_entry_t *entry);
// Access counters
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_parent_gpu_t *parent_gpu, NvU32 get);
typedef void (*uvm_hal_enable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
typedef void (*uvm_hal_disable_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters);
typedef void (*uvm_hal_clear_access_counter_notifications_t)(uvm_access_counter_buffer_t *access_counters, NvU32 get);
// Parse the entry on the given buffer index. This also clears the valid bit of
// the entry in the buffer.
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_parent_gpu_t *parent_gpu,
typedef void (*uvm_hal_access_counter_buffer_parse_entry_t)(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_parent_gpu_t *parent_gpu, NvU32 index);
typedef bool (*uvm_hal_access_counter_buffer_entry_is_valid_t)(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
typedef void (*uvm_hal_access_counter_buffer_entry_clear_valid_t)(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
typedef NvU32 (*uvm_hal_access_counter_buffer_entry_size_t)(uvm_parent_gpu_t *parent_gpu);
typedef void (*uvm_hal_access_counter_clear_all_t)(uvm_push_t *push);
typedef void (*uvm_hal_access_counter_clear_type_t)(uvm_push_t *push, uvm_access_counter_type_t type);
typedef void (*uvm_hal_access_counter_clear_targeted_t)(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 get);
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index);
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index);
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push);
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type);
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index);
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push);
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type);
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get);
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters);
void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get);
void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry);
bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index);
NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu);
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push);
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry);
// The source and destination addresses must be 16-byte aligned. Note that the
// best performance is achieved with 256-byte alignment. The decrypt size must
@ -786,7 +784,6 @@ struct uvm_host_hal_struct
uvm_hal_host_clear_faulted_channel_method_t clear_faulted_channel_method;
uvm_hal_host_clear_faulted_channel_register_t clear_faulted_channel_register;
uvm_hal_access_counter_clear_all_t access_counter_clear_all;
uvm_hal_access_counter_clear_type_t access_counter_clear_type;
uvm_hal_access_counter_clear_targeted_t access_counter_clear_targeted;
uvm_hal_get_time_t get_time;
};

View File

@ -471,69 +471,34 @@ static uvm_membar_t uvm_membar_max(uvm_membar_t membar_1, uvm_membar_t membar_2)
return max(membar_1, membar_2);
}
typedef enum
{
UVM_ACCESS_COUNTER_TYPE_MIMC = 0,
UVM_ACCESS_COUNTER_TYPE_MOMC,
UVM_ACCESS_COUNTER_TYPE_MAX,
} uvm_access_counter_type_t;
const char *uvm_access_counter_type_string(uvm_access_counter_type_t access_counter_type);
struct uvm_access_counter_buffer_entry_struct
{
// Whether this counter refers to outbound accesses to remote GPUs or
// sysmem (MIMC), or it refers to inbound accesses from CPU or a non-peer
// GPU (whose accesses are routed through the CPU, too) to vidmem (MOMC)
uvm_access_counter_type_t counter_type;
// Address of the region for which a notification was sent
uvm_gpu_address_t address;
NvU64 address;
union
{
// These fields are only valid if address.is_virtual is true
struct
{
// Instance pointer of one of the channels in the TSG that triggered
// the notification.
uvm_gpu_phys_address_t instance_ptr;
// Instance pointer of one of the channels in the TSG that triggered
// the notification.
uvm_gpu_phys_address_t instance_ptr;
uvm_mmu_engine_type_t mmu_engine_type;
uvm_mmu_engine_type_t mmu_engine_type;
NvU32 mmu_engine_id;
NvU32 mmu_engine_id;
// Identifier of the subcontext that performed the memory accesses
// that triggered the notification. This value, combined with the
// instance_ptr, is needed to obtain the GPU VA space of the process
// that triggered the notification.
NvU32 ve_id;
// Identifier of the subcontext that performed the memory accesses
// that triggered the notification. This value, combined with the
// instance_ptr, is needed to obtain the GPU VA space of the process
// that triggered the notification.
NvU32 ve_id;
// VA space for the address that triggered the notification
uvm_va_space_t *va_space;
} virtual_info;
// VA space for the address that triggered the notification
uvm_va_space_t *va_space;
// These fields are only valid if address.is_virtual is false
struct
{
// Processor id where data is resident
//
// Although this information is not tied to a VA space, we can use
// a regular processor id because P2P is not allowed between
// partitioned GPUs.
uvm_processor_id_t resident_id;
} physical_info;
};
// This is the GPU that triggered the notification. Note that physical
// address based notifications are only supported on non-MIG-capable GPUs.
// This is the GPU that triggered the notification.
uvm_gpu_t *gpu;
// Number of times the tracked region was accessed since the last time it
// was cleared. Counter values saturate at the maximum value supported by
// the GPU (2^16 - 1 in Volta)
// the GPU (2^16 - 1 on Turing)
NvU32 counter_value;
// When the granularity of the tracked regions is greater than 64KB, the

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -1602,7 +1602,7 @@ static NV_STATUS hmm_va_block_cpu_page_populate(uvm_va_block_t *va_block,
return status;
}
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk, page_index);
status = uvm_va_block_map_cpu_chunk_on_gpus(va_block, chunk);
if (status != NV_OK) {
uvm_cpu_chunk_remove_from_block(va_block, page_to_nid(page), page_index);
uvm_cpu_chunk_free(chunk);

View File

@ -50,12 +50,10 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_hopper_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount *
parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.maxGpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 <<
(sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Hopper covers 64 PB and that's the minimum
@ -99,8 +97,6 @@ void uvm_hal_hopper_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2020 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -111,13 +111,13 @@ void uvm_kvmalloc_exit(void)
return;
if (atomic_long_read(&g_uvm_leak_checker.bytes_allocated) > 0) {
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "Memory leak of %lu bytes detected.%s\n",
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
UVM_INFO_PRINT("Memory leak of %lu bytes detected.%s\n",
atomic_long_read(&g_uvm_leak_checker.bytes_allocated),
uvm_leak_checker < UVM_KVMALLOC_LEAK_CHECK_ORIGIN ?
" insmod with uvm_leak_checker=2 for detailed information." :
"");
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX "!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
UVM_INFO_PRINT("!!!!!!!!!!!!!!!!!!!!!!!!!!!!\n");
if (g_uvm_global.unload_state.ptr)
*g_uvm_global.unload_state.ptr |= UVM_TEST_UNLOAD_STATE_MEMORY_LEAK;
@ -129,12 +129,12 @@ void uvm_kvmalloc_exit(void)
uvm_rb_tree_for_each_safe(node, next, &g_uvm_leak_checker.allocation_info) {
uvm_kvmalloc_info_t *info = container_of(node, uvm_kvmalloc_info_t, node);
printk(KERN_ERR NVIDIA_UVM_PRETTY_PRINTING_PREFIX " Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
uvm_kvsize((void *)((uintptr_t)info->node.key)),
kbasename(info->file),
info->line,
info->function,
info->node.key);
UVM_INFO_PRINT(" Leaked %zu bytes from %s:%d:%s (0x%llx)\n",
uvm_kvsize((void *)((uintptr_t)info->node.key)),
kbasename(info->file),
info->line,
info->function,
info->node.key);
// Free so we don't keep eating up memory while debugging. Note that
// this also removes the entry from the table, frees info, and drops

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -27,12 +27,13 @@
const char *uvm_lock_order_to_string(uvm_lock_order_t lock_order)
{
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 36);
BUILD_BUG_ON(UVM_LOCK_ORDER_COUNT != 37);
switch (lock_order) {
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_INVALID);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL_PM);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_GLOBAL);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ACCESS_COUNTERS);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_ISR);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_MMAP_LOCK);
UVM_ENUM_STRING_CASE(UVM_LOCK_ORDER_VA_SPACES_LIST);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2022 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -69,6 +69,17 @@
//
// This should be taken whenever global GPU state might need to be modified.
//
// - Access counters VA space enablement state lock
// Order: UVM_LOCK_ORDER_ACCESS_COUNTERS
// Exclusive lock (mutex)
//
// This protects VA space state associated with access counters enablement.
// Blackwell+ GPUs may have multiple access counters notification buffers
// and their "atomic" enablement is protected by this lock.
//
// This should be taken whenever VA space access counters state might need
// to be modified.
//
// - GPU ISR lock
// Order: UVM_LOCK_ORDER_ISR
// Exclusive lock (mutex) per gpu
@ -487,6 +498,7 @@ typedef enum
UVM_LOCK_ORDER_INVALID = 0,
UVM_LOCK_ORDER_GLOBAL_PM,
UVM_LOCK_ORDER_GLOBAL,
UVM_LOCK_ORDER_ACCESS_COUNTERS,
UVM_LOCK_ORDER_ISR,
UVM_LOCK_ORDER_MMAP_LOCK,
UVM_LOCK_ORDER_VA_SPACES_LIST,
@ -742,7 +754,8 @@ bool __uvm_locking_initialized(void);
ret; \
})
// Helper for calling a UVM-RM interface function that returns void with lock recording
// Helper for calling a UVM-RM interface function that returns void with lock
// recording
#define uvm_rm_locked_call_void(call) ({ \
uvm_record_lock_rm_all(); \
call; \

View File

@ -63,8 +63,6 @@ void uvm_hal_maxwell_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = false;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = false;
parent_gpu->scoped_atomics_supported = false;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -24,25 +24,29 @@
#include "uvm_gpu.h"
#include "uvm_hal.h"
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
void uvm_hal_maxwell_enable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
{
UVM_ASSERT_MSG(false,
"enable_access_counter_notifications is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"enable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu)
void uvm_hal_maxwell_disable_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters)
{
UVM_ASSERT_MSG(false,
"disable_access_counter_notifications is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"disable_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 get)
void uvm_hal_maxwell_clear_access_counter_notifications_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 get)
{
UVM_ASSERT_MSG(false,
"clear_access_counter_notifications is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"clear_access_counter_notifications is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}
NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gpu_t *parent_gpu)
@ -53,26 +57,31 @@ NvU32 uvm_hal_maxwell_access_counter_buffer_entry_size_unsupported(uvm_parent_gp
return 0;
}
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
bool uvm_hal_maxwell_access_counter_buffer_entry_is_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index)
{
UVM_ASSERT_MSG(false,
"access_counter_buffer_entry_is_valid is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"access_counter_buffer_entry_is_valid is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
return false;
}
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_parent_gpu_t *parent_gpu, NvU32 index)
void uvm_hal_maxwell_access_counter_buffer_entry_clear_valid_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index)
{
UVM_ASSERT_MSG(false,
"access_counter_buffer_entry_clear_valid is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"access_counter_buffer_entry_clear_valid is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_parent_gpu_t *parent_gpu,
void uvm_hal_maxwell_access_counter_buffer_parse_entry_unsupported(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry)
{
UVM_ASSERT_MSG(false,
"access_counter_buffer_parse_entry is not supported on GPU: %s.\n",
uvm_parent_gpu_name(parent_gpu));
"access_counter_buffer_parse_entry is not supported on GPU: %s notif buf index: %u.\n",
uvm_parent_gpu_name(access_counters->parent_gpu),
access_counters->index);
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2022 NVIDIA Corporation
Copyright (c) 2021-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -330,11 +330,6 @@ void uvm_hal_maxwell_access_counter_clear_all_unsupported(uvm_push_t *push)
UVM_ASSERT_MSG(false, "host access_counter_clear_all called on Maxwell GPU\n");
}
void uvm_hal_maxwell_access_counter_clear_type_unsupported(uvm_push_t *push, uvm_access_counter_type_t type)
{
UVM_ASSERT_MSG(false, "host access_counter_clear_type called on Maxwell GPU\n");
}
void uvm_hal_maxwell_access_counter_clear_targeted_unsupported(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry)
{

View File

@ -582,7 +582,7 @@ static NV_STATUS uvm_migrate_ranges(uvm_va_space_t *va_space,
managed_range_last = managed_range;
// For UVM-Lite GPUs, the CUDA driver may suballocate a single
// managed_range into many range groups. For this reason, we iterate
// managed_range into many range groups. For this reason, we iterate
// over each managed_range first then through the range groups within.
uvm_range_group_for_each_migratability_in(&iter,
va_space,
@ -865,9 +865,9 @@ NV_STATUS uvm_migrate_init(void)
else {
g_uvm_perf_migrate_cpu_preunmap_size = UVM_VA_BLOCK_SIZE << UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT;
pr_info("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
uvm_perf_migrate_cpu_preunmap_block_order,
UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
UVM_INFO_PRINT("Invalid value %u for uvm_perf_migrate_cpu_preunmap_block_order. Using %u instead\n",
uvm_perf_migrate_cpu_preunmap_block_order,
UVM_PERF_MIGRATE_CPU_PREUNMAP_BLOCK_ORDER_DEFAULT);
}
}
@ -909,14 +909,13 @@ NV_STATUS uvm_api_migrate(UVM_MIGRATE_PARAMS *params, struct file *filp)
if ((params->flags & UVM_MIGRATE_FLAGS_TEST_ALL) && !uvm_enable_builtin_tests) {
UVM_INFO_PRINT("Test flag set for UVM_MIGRATE. Did you mean to insmod with uvm_enable_builtin_tests=1?\n");
UVM_INFO_PRINT("TEMP\n");
return NV_ERR_INVALID_ARGUMENT;
}
gpus_to_check_for_nvlink_errors = uvm_processor_mask_cache_alloc();
if (!gpus_to_check_for_nvlink_errors)
return NV_ERR_NO_MEMORY;
uvm_processor_mask_zero(gpus_to_check_for_nvlink_errors);
// mmap_lock will be needed if we have to create CPU mappings

View File

@ -90,9 +90,9 @@ NV_STATUS uvm_mmu_init(void)
page_table_aperture = UVM_APERTURE_SYS;
}
else {
pr_info("Invalid uvm_page_table_location %s. Using %s instead.\n",
uvm_page_table_location,
uvm_aperture_string(page_table_aperture));
UVM_INFO_PRINT("Invalid uvm_page_table_location %s. Using %s instead.\n",
uvm_page_table_location,
uvm_aperture_string(page_table_aperture));
}
return NV_OK;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -40,10 +40,10 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_pascal_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Pascal covers 128 TB and that's the minimum
@ -92,8 +92,6 @@ void uvm_hal_pascal_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = false;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = false;
parent_gpu->scoped_atomics_supported = false;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -44,8 +44,8 @@ void uvm_hal_pascal_enable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnSet;
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnSet;
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
@ -55,33 +55,33 @@ void uvm_hal_pascal_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnClear;
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnClear;
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
NvU32 uvm_hal_pascal_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
{
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferPut);
UVM_ASSERT(put < parent_gpu->fault_buffer_info.replayable.max_faults);
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferPut);
UVM_ASSERT(put < parent_gpu->fault_buffer.replayable.max_faults);
return put;
}
NvU32 uvm_hal_pascal_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu)
{
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet);
UVM_ASSERT(get < parent_gpu->fault_buffer_info.replayable.max_faults);
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet);
UVM_ASSERT(get < parent_gpu->fault_buffer.replayable.max_faults);
return get;
}
void uvm_hal_pascal_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, index);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, index);
}
static uvm_fault_access_type_t get_fault_access_type(const NvU32 *fault_entry)
@ -189,9 +189,9 @@ static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
fault_buffer_entry_b069_t *buffer_start;
NvU32 *fault_entry;
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
buffer_start = (fault_buffer_entry_b069_t *)parent_gpu->fault_buffer_info.rm_info.replayable.bufferAddress;
buffer_start = (fault_buffer_entry_b069_t *)parent_gpu->fault_buffer.rm_info.replayable.bufferAddress;
fault_entry = (NvU32 *)&buffer_start[index];
return fault_entry;
@ -205,10 +205,10 @@ static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t
{
UvmFaultMetadataPacket *fault_entry_metadata;
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata;
fault_entry_metadata = parent_gpu->fault_buffer.rm_info.replayable.bufferMetadata;
UVM_ASSERT(fault_entry_metadata != NULL);
return fault_entry_metadata + index;
@ -267,7 +267,7 @@ NV_STATUS uvm_hal_pascal_fault_buffer_parse_replayable_entry(uvm_parent_gpu_t *p
// Compute global uTLB id
utlb_id = buffer_entry->fault_source.gpc_id * parent_gpu->utlb_per_gpc_count + gpc_utlb_id;
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer_info.replayable.utlb_count);
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer.replayable.utlb_count);
buffer_entry->fault_source.utlb_id = utlb_id;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2023 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -21,7 +21,6 @@
*******************************************************************************/
// For Pascal, UVM page tree 'depth' maps to hardware as follows:
//
// UVM depth HW level VA bits
@ -377,7 +376,7 @@ uvm_mmu_mode_hal_t *uvm_hal_mmu_mode_pascal(NvU64 big_page_size)
static void mmu_set_prefetch_faults(uvm_parent_gpu_t *parent_gpu, bool enable)
{
volatile NvU32 *prefetch_ctrl = parent_gpu->fault_buffer_info.rm_info.replayable.pPrefetchCtrl;
volatile NvU32 *prefetch_ctrl = parent_gpu->fault_buffer.rm_info.replayable.pPrefetchCtrl;
// A null prefetch control mapping indicates that UVM should toggle the
// register's value using the RM API, instead of performing a direct access.
@ -388,7 +387,7 @@ static void mmu_set_prefetch_faults(uvm_parent_gpu_t *parent_gpu, bool enable)
// Computing.
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
status = nvUvmInterfaceTogglePrefetchFaults(&parent_gpu->fault_buffer_info.rm_info, (NvBool)enable);
status = nvUvmInterfaceTogglePrefetchFaults(&parent_gpu->fault_buffer.rm_info, (NvBool)enable);
UVM_ASSERT(status == NV_OK);
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -512,8 +512,9 @@ NV_STATUS uvm_perf_prefetch_init(void)
g_uvm_perf_prefetch_threshold = uvm_perf_prefetch_threshold;
}
else {
pr_info("Invalid value %u for uvm_perf_prefetch_threshold. Using %u instead\n",
uvm_perf_prefetch_threshold, UVM_PREFETCH_THRESHOLD_DEFAULT);
UVM_INFO_PRINT("Invalid value %u for uvm_perf_prefetch_threshold. Using %u instead\n",
uvm_perf_prefetch_threshold,
UVM_PREFETCH_THRESHOLD_DEFAULT);
g_uvm_perf_prefetch_threshold = UVM_PREFETCH_THRESHOLD_DEFAULT;
}
@ -523,8 +524,9 @@ NV_STATUS uvm_perf_prefetch_init(void)
g_uvm_perf_prefetch_min_faults = uvm_perf_prefetch_min_faults;
}
else {
pr_info("Invalid value %u for uvm_perf_prefetch_min_faults. Using %u instead\n",
uvm_perf_prefetch_min_faults, UVM_PREFETCH_MIN_FAULTS_DEFAULT);
UVM_INFO_PRINT("Invalid value %u for uvm_perf_prefetch_min_faults. Using %u instead\n",
uvm_perf_prefetch_min_faults,
UVM_PREFETCH_MIN_FAULTS_DEFAULT);
g_uvm_perf_prefetch_min_faults = UVM_PREFETCH_MIN_FAULTS_DEFAULT;
}

View File

@ -338,28 +338,28 @@ static unsigned g_uvm_perf_thrashing_max_resets;
// parameter _d. The user value is read from _v, and the final value is stored
// in a variable named g_##_v, so it must be declared, too. Only unsigned
// parameters are supported.
#define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma) \
do { \
unsigned v = (_v); \
unsigned d = (_d); \
unsigned mi = (_mi); \
unsigned ma = (_ma); \
\
BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned)); \
BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v)); \
\
UVM_ASSERT(mi <= ma); \
UVM_ASSERT(d >= mi); \
UVM_ASSERT(d <= ma); \
\
if (v >= mi && v <= ma) { \
g_##_v = v; \
} \
else { \
pr_info("Invalid value %u for " #_v ". Using %u instead\n", v, d); \
\
g_##_v = d; \
} \
#define INIT_THRASHING_PARAMETER_MIN_MAX(_v, _d, _mi, _ma) \
do { \
unsigned v = (_v); \
unsigned d = (_d); \
unsigned mi = (_mi); \
unsigned ma = (_ma); \
\
BUILD_BUG_ON(sizeof(_v) > sizeof(unsigned)); \
BUILD_BUG_ON(THRASHING_PARAMETER_IS_SIGNED(_v)); \
\
UVM_ASSERT(mi <= ma); \
UVM_ASSERT(d >= mi); \
UVM_ASSERT(d <= ma); \
\
if (v >= mi && v <= ma) { \
g_##_v = v; \
} \
else { \
UVM_INFO_PRINT("Invalid value %u for " #_v ". Using %u instead\n", v, d); \
\
g_##_v = d; \
} \
} while (0)
#define INIT_THRASHING_PARAMETER(v, d) INIT_THRASHING_PARAMETER_MIN_MAX(v, d, 0u, UINT_MAX)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -31,21 +31,14 @@ static int uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
module_param(uvm_cpu_chunk_allocation_sizes, uint, S_IRUGO | S_IWUSR);
MODULE_PARM_DESC(uvm_cpu_chunk_allocation_sizes, "OR'ed value of all CPU chunk allocation sizes.");
static struct kmem_cache *g_reverse_page_map_cache __read_mostly;
NV_STATUS uvm_pmm_sysmem_init(void)
{
g_reverse_page_map_cache = NV_KMEM_CACHE_CREATE("uvm_pmm_sysmem_page_reverse_map_t",
uvm_reverse_map_t);
if (!g_reverse_page_map_cache)
return NV_ERR_NO_MEMORY;
// Ensure that only supported CPU chunk sizes are enabled.
uvm_cpu_chunk_allocation_sizes &= UVM_CPU_CHUNK_SIZES;
if (!uvm_cpu_chunk_allocation_sizes || !(uvm_cpu_chunk_allocation_sizes & PAGE_SIZE)) {
pr_info("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
uvm_cpu_chunk_allocation_sizes,
UVM_CPU_CHUNK_SIZES);
UVM_INFO_PRINT("Invalid value for uvm_cpu_chunk_allocation_sizes = 0x%x, using 0x%llx instead\n",
uvm_cpu_chunk_allocation_sizes,
UVM_CPU_CHUNK_SIZES);
uvm_cpu_chunk_allocation_sizes = UVM_CPU_CHUNK_SIZES;
}
@ -54,387 +47,11 @@ NV_STATUS uvm_pmm_sysmem_init(void)
void uvm_pmm_sysmem_exit(void)
{
kmem_cache_destroy_safe(&g_reverse_page_map_cache);
}
NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings)
{
memset(sysmem_mappings, 0, sizeof(*sysmem_mappings));
sysmem_mappings->gpu = gpu;
uvm_mutex_init(&sysmem_mappings->reverse_map_lock, UVM_LOCK_ORDER_LEAF);
uvm_init_radix_tree_preloadable(&sysmem_mappings->reverse_map_tree);
return NV_OK;
}
void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings)
{
if (sysmem_mappings->gpu) {
UVM_ASSERT_MSG(radix_tree_empty(&sysmem_mappings->reverse_map_tree),
"radix_tree not empty for GPU %s\n",
uvm_gpu_name(sysmem_mappings->gpu));
}
sysmem_mappings->gpu = NULL;
}
// TODO: Bug 1995015: use a more efficient data structure for
// physically-contiguous allocations.
NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 virt_addr,
NvU64 region_size,
uvm_va_block_t *va_block,
uvm_processor_id_t owner)
{
NV_STATUS status = NV_OK;
uvm_reverse_map_t *new_reverse_map;
NvU64 key;
const NvU64 base_key = dma_addr / PAGE_SIZE;
const NvU32 num_pages = region_size / PAGE_SIZE;
uvm_page_index_t page_index;
UVM_ASSERT(va_block);
UVM_ASSERT(!uvm_va_block_is_dead(va_block));
UVM_ASSERT(IS_ALIGNED(dma_addr, region_size));
UVM_ASSERT(IS_ALIGNED(virt_addr, region_size));
UVM_ASSERT(region_size <= UVM_VA_BLOCK_SIZE);
UVM_ASSERT(is_power_of_2(region_size));
UVM_ASSERT(uvm_va_block_contains_address(va_block, virt_addr));
UVM_ASSERT(uvm_va_block_contains_address(va_block, virt_addr + region_size - 1));
uvm_assert_mutex_locked(&va_block->lock);
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return NV_OK;
new_reverse_map = nv_kmem_cache_zalloc(g_reverse_page_map_cache, NV_UVM_GFP_FLAGS);
if (!new_reverse_map)
return NV_ERR_NO_MEMORY;
page_index = uvm_va_block_cpu_page_index(va_block, virt_addr);
new_reverse_map->va_block = va_block;
new_reverse_map->region = uvm_va_block_region(page_index, page_index + num_pages);
new_reverse_map->owner = owner;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
for (key = base_key; key < base_key + num_pages; ++key) {
int ret = radix_tree_insert(&sysmem_mappings->reverse_map_tree, key, new_reverse_map);
if (ret != 0) {
NvU64 remove_key;
for (remove_key = base_key; remove_key < key; ++remove_key)
(void)radix_tree_delete(&sysmem_mappings->reverse_map_tree, remove_key);
kmem_cache_free(g_reverse_page_map_cache, new_reverse_map);
status = errno_to_nv_status(ret);
break;
}
}
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
// The assert is added for Coverity's sake. It is equivalent to adding
// assert(num_pages > 0) before the loop. However, Coverity is not able to
// deduce that the loop has to execute at least once from num_pages > 0.
UVM_ASSERT(key != base_key || status != NV_OK);
return status;
}
static void pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
bool check_mapping)
{
uvm_reverse_map_t *reverse_map;
NvU64 key;
const NvU64 base_key = dma_addr / PAGE_SIZE;
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
reverse_map = radix_tree_delete(&sysmem_mappings->reverse_map_tree, base_key);
if (check_mapping)
UVM_ASSERT(reverse_map);
if (!reverse_map) {
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
return;
}
uvm_assert_mutex_locked(&reverse_map->va_block->lock);
for (key = base_key + 1; key < base_key + uvm_va_block_region_num_pages(reverse_map->region); ++key) {
uvm_reverse_map_t *curr_reverse_map = radix_tree_delete(&sysmem_mappings->reverse_map_tree, key);
UVM_ASSERT(curr_reverse_map == reverse_map);
}
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
kmem_cache_free(g_reverse_page_map_cache, reverse_map);
}
void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr)
{
pmm_sysmem_mappings_remove_gpu_mapping(sysmem_mappings, dma_addr, true);
}
void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr)
{
pmm_sysmem_mappings_remove_gpu_mapping(sysmem_mappings, dma_addr, false);
}
void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
uvm_va_block_t *va_block)
{
NvU64 virt_addr;
uvm_reverse_map_t *reverse_map;
const NvU64 base_key = dma_addr / PAGE_SIZE;
uvm_page_index_t new_start_page;
UVM_ASSERT(PAGE_ALIGNED(dma_addr));
UVM_ASSERT(va_block);
UVM_ASSERT(!uvm_va_block_is_dead(va_block));
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
UVM_ASSERT(reverse_map);
// Compute virt address by hand since the old VA block may be messed up
// during split
virt_addr = reverse_map->va_block->start + reverse_map->region.first * PAGE_SIZE;
new_start_page = uvm_va_block_cpu_page_index(va_block, virt_addr);
reverse_map->region = uvm_va_block_region(new_start_page,
new_start_page + uvm_va_block_region_num_pages(reverse_map->region));
reverse_map->va_block = va_block;
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_start(reverse_map)));
UVM_ASSERT(uvm_va_block_contains_address(va_block, uvm_reverse_map_end(reverse_map)));
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
}
NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size)
{
uvm_reverse_map_t *orig_reverse_map;
const NvU64 base_key = dma_addr / PAGE_SIZE;
const size_t num_pages = new_region_size / PAGE_SIZE;
size_t old_num_pages;
size_t subregion, num_subregions;
uvm_reverse_map_t **new_reverse_maps;
UVM_ASSERT(IS_ALIGNED(dma_addr, new_region_size));
UVM_ASSERT(new_region_size <= UVM_VA_BLOCK_SIZE);
UVM_ASSERT(is_power_of_2(new_region_size));
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return NV_OK;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
orig_reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
// We can access orig_reverse_map outside the tree lock because we hold the
// VA block lock so we cannot have concurrent modifications in the tree for
// the mappings of the chunks that belong to that VA block.
UVM_ASSERT(orig_reverse_map);
UVM_ASSERT(orig_reverse_map->va_block);
uvm_assert_mutex_locked(&orig_reverse_map->va_block->lock);
old_num_pages = uvm_va_block_region_num_pages(orig_reverse_map->region);
UVM_ASSERT(num_pages < old_num_pages);
num_subregions = old_num_pages / num_pages;
new_reverse_maps = uvm_kvmalloc_zero(sizeof(*new_reverse_maps) * (num_subregions - 1));
if (!new_reverse_maps)
return NV_ERR_NO_MEMORY;
// Allocate the descriptors for the new subregions
for (subregion = 1; subregion < num_subregions; ++subregion) {
uvm_reverse_map_t *new_reverse_map = nv_kmem_cache_zalloc(g_reverse_page_map_cache, NV_UVM_GFP_FLAGS);
uvm_page_index_t page_index = orig_reverse_map->region.first + num_pages * subregion;
if (new_reverse_map == NULL) {
// On error, free the previously-created descriptors
while (--subregion != 0)
kmem_cache_free(g_reverse_page_map_cache, new_reverse_maps[subregion - 1]);
uvm_kvfree(new_reverse_maps);
return NV_ERR_NO_MEMORY;
}
new_reverse_map->va_block = orig_reverse_map->va_block;
new_reverse_map->region = uvm_va_block_region(page_index, page_index + num_pages);
new_reverse_map->owner = orig_reverse_map->owner;
new_reverse_maps[subregion - 1] = new_reverse_map;
}
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
for (subregion = 1; subregion < num_subregions; ++subregion) {
NvU64 key;
for (key = base_key + num_pages * subregion; key < base_key + num_pages * (subregion + 1); ++key) {
void **slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key);
UVM_ASSERT(slot);
UVM_ASSERT(radix_tree_deref_slot(slot) == orig_reverse_map);
NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, new_reverse_maps[subregion - 1]);
}
}
orig_reverse_map->region = uvm_va_block_region(orig_reverse_map->region.first,
orig_reverse_map->region.first + num_pages);
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
uvm_kvfree(new_reverse_maps);
return NV_OK;
}
void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size)
{
uvm_reverse_map_t *first_reverse_map;
uvm_page_index_t running_page_index;
NvU64 key;
const NvU64 base_key = dma_addr / PAGE_SIZE;
const size_t num_pages = new_region_size / PAGE_SIZE;
size_t num_mapping_pages;
UVM_ASSERT(IS_ALIGNED(dma_addr, new_region_size));
UVM_ASSERT(new_region_size <= UVM_VA_BLOCK_SIZE);
UVM_ASSERT(is_power_of_2(new_region_size));
if (!sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses)
return;
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
// Find the first mapping in the region
first_reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, base_key);
UVM_ASSERT(first_reverse_map);
num_mapping_pages = uvm_va_block_region_num_pages(first_reverse_map->region);
UVM_ASSERT(num_pages >= num_mapping_pages);
UVM_ASSERT(IS_ALIGNED(base_key, num_mapping_pages));
// The region in the tree matches the size of the merged region, just return
if (num_pages == num_mapping_pages)
goto unlock_no_update;
// Otherwise update the rest of slots to point at the same reverse map
// descriptor
key = base_key + uvm_va_block_region_num_pages(first_reverse_map->region);
running_page_index = first_reverse_map->region.outer;
while (key < base_key + num_pages) {
uvm_reverse_map_t *reverse_map = NULL;
void **slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key);
size_t slot_index;
UVM_ASSERT(slot);
reverse_map = radix_tree_deref_slot(slot);
UVM_ASSERT(reverse_map);
UVM_ASSERT(reverse_map != first_reverse_map);
UVM_ASSERT(reverse_map->va_block == first_reverse_map->va_block);
UVM_ASSERT(uvm_id_equal(reverse_map->owner, first_reverse_map->owner));
UVM_ASSERT(reverse_map->region.first == running_page_index);
NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, first_reverse_map);
num_mapping_pages = uvm_va_block_region_num_pages(reverse_map->region);
UVM_ASSERT(IS_ALIGNED(key, num_mapping_pages));
UVM_ASSERT(key + num_mapping_pages <= base_key + num_pages);
for (slot_index = 1; slot_index < num_mapping_pages; ++slot_index) {
slot = radix_tree_lookup_slot(&sysmem_mappings->reverse_map_tree, key + slot_index);
UVM_ASSERT(slot);
UVM_ASSERT(reverse_map == radix_tree_deref_slot(slot));
NV_RADIX_TREE_REPLACE_SLOT(&sysmem_mappings->reverse_map_tree, slot, first_reverse_map);
}
key += num_mapping_pages;
running_page_index = reverse_map->region.outer;
kmem_cache_free(g_reverse_page_map_cache, reverse_map);
}
// Grow the first mapping to cover the whole region
first_reverse_map->region.outer = first_reverse_map->region.first + num_pages;
unlock_no_update:
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
}
size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 region_size,
uvm_reverse_map_t *out_mappings,
size_t max_out_mappings)
{
NvU64 key;
size_t num_mappings = 0;
const NvU64 base_key = dma_addr / PAGE_SIZE;
NvU32 num_pages = region_size / PAGE_SIZE;
UVM_ASSERT(region_size >= PAGE_SIZE);
UVM_ASSERT(PAGE_ALIGNED(region_size));
UVM_ASSERT(sysmem_mappings->gpu->parent->access_counters_can_use_physical_addresses);
UVM_ASSERT(max_out_mappings > 0);
uvm_mutex_lock(&sysmem_mappings->reverse_map_lock);
key = base_key;
do {
uvm_reverse_map_t *reverse_map = radix_tree_lookup(&sysmem_mappings->reverse_map_tree, key);
if (reverse_map) {
size_t num_chunk_pages = uvm_va_block_region_num_pages(reverse_map->region);
NvU32 page_offset = key & (num_chunk_pages - 1);
NvU32 num_mapping_pages = min(num_pages, (NvU32)num_chunk_pages - page_offset);
// Sysmem mappings are removed during VA block destruction.
// Therefore, we can safely retain the VA blocks as long as they
// are in the reverse map and we hold the reverse map lock.
uvm_va_block_retain(reverse_map->va_block);
out_mappings[num_mappings] = *reverse_map;
out_mappings[num_mappings].region.first += page_offset;
out_mappings[num_mappings].region.outer = out_mappings[num_mappings].region.first + num_mapping_pages;
if (++num_mappings == max_out_mappings)
break;
num_pages -= num_mapping_pages;
key += num_mapping_pages;
}
else {
--num_pages;
++key;
}
}
while (num_pages > 0);
uvm_mutex_unlock(&sysmem_mappings->reverse_map_lock);
return num_mappings;
}
uvm_chunk_sizes_mask_t uvm_cpu_chunk_get_allocation_sizes(void)
{
return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
return uvm_cpu_chunk_allocation_sizes & UVM_CPU_CHUNK_SIZES;
}
static void uvm_cpu_chunk_set_size(uvm_cpu_chunk_t *chunk, uvm_chunk_size_t size)

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -30,96 +30,12 @@
#include "uvm_lock.h"
#include "uvm_pmm_gpu.h"
// Module to handle per-GPU user mappings to sysmem physical memory. Notably,
// this implements a reverse map of the DMA address to {va_block, virt_addr}.
// This is required by the GPU access counters feature since they may provide a
// physical address in the notification packet (GPA notifications). We use the
// table to obtain the VAs of the memory regions being accessed remotely. The
// reverse map is implemented by a radix tree, which is indexed using the
// DMA address. For now, only PAGE_SIZE translations are supported (i.e. no
// big/huge pages).
//
// TODO: Bug 1995015: add support for physically-contiguous mappings.
struct uvm_pmm_sysmem_mappings_struct
{
uvm_gpu_t *gpu;
struct radix_tree_root reverse_map_tree;
uvm_mutex_t reverse_map_lock;
};
// Global initialization/exit functions, that need to be called during driver
// initialization/tear-down. These are needed to allocate/free global internal
// data structures.
NV_STATUS uvm_pmm_sysmem_init(void);
void uvm_pmm_sysmem_exit(void);
// Initialize per-GPU sysmem mapping tracking
NV_STATUS uvm_pmm_sysmem_mappings_init(uvm_gpu_t *gpu, uvm_pmm_sysmem_mappings_t *sysmem_mappings);
// Destroy per-GPU sysmem mapping tracking. The caller must ensure that all the
// mappings have been removed before calling this function.
void uvm_pmm_sysmem_mappings_deinit(uvm_pmm_sysmem_mappings_t *sysmem_mappings);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// dma_addr -> {va_block, virt_addr} mapping is inserted in the reverse map.
NV_STATUS uvm_pmm_sysmem_mappings_add_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 virt_addr,
NvU64 region_size,
uvm_va_block_t *va_block,
uvm_processor_id_t owner);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// entries for the physical region starting at dma_addr are removed from the
// reverse map.
void uvm_pmm_sysmem_mappings_remove_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
// Like uvm_pmm_sysmem_mappings_remove_gpu_mapping but it doesn't assert if the
// mapping doesn't exist. See uvm_va_block_evict_chunks for more information.
void uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(uvm_pmm_sysmem_mappings_t *sysmem_mappings, NvU64 dma_addr);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// mapping for the region starting at dma_addr is updated with va_block.
// This is required on VA block split.
void uvm_pmm_sysmem_mappings_reparent_gpu_mapping(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
uvm_va_block_t *va_block);
// If the GPU used to initialize sysmem_mappings supports access counters, the
// mapping for the region starting at dma_addr is split into regions of
// new_region_size. new_region_size must be a power of two and smaller than the
// previously-registered size.
NV_STATUS uvm_pmm_sysmem_mappings_split_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size);
// If the GPU used to initialize sysmem_mappings supports access counters, all
// the mappings within the region [dma_addr, dma_addr + new_region_size) are
// merged into a single mapping. new_region_size must be a power of two. The
// whole region must be previously populated with mappings and all of them must
// have the same VA block and processor owner.
void uvm_pmm_sysmem_mappings_merge_gpu_mappings(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 new_region_size);
// Obtain the {va_block, virt_addr} information for the mappings in the given
// [dma_addr:dma_addr + region_size) range. dma_addr and region_size must be
// page-aligned.
//
// Valid translations are written to out_mappings sequentially (there are no
// gaps). max_out_mappings are written, at most. The caller is required to
// provide enough entries in out_mappings.
//
// The VA Block in each returned translation entry is retained, and it's up to
// the caller to release them
size_t uvm_pmm_sysmem_mappings_dma_to_virt(uvm_pmm_sysmem_mappings_t *sysmem_mappings,
NvU64 dma_addr,
NvU64 region_size,
uvm_reverse_map_t *out_mappings,
size_t max_out_mappings);
#define UVM_CPU_CHUNK_SIZES (UVM_PAGE_SIZE_2M | UVM_PAGE_SIZE_64K | PAGE_SIZE)
typedef enum
@ -425,9 +341,9 @@ void uvm_cpu_chunk_mark_clean(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_inde
bool uvm_cpu_chunk_is_dirty(uvm_cpu_chunk_t *chunk, uvm_page_index_t page_index);
static NV_STATUS uvm_test_get_cpu_chunk_allocation_sizes(UVM_TEST_GET_CPU_CHUNK_ALLOC_SIZES_PARAMS *params,
struct file *filp)
struct file *filp)
{
params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
return NV_OK;
params->alloc_size_mask = (NvU32)uvm_cpu_chunk_get_allocation_sizes();
return NV_OK;
}
#endif

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -35,544 +35,6 @@
#include "uvm_push.h"
#include "uvm_processors.h"
// Pre-allocated array used for dma-to-virt translations
static uvm_reverse_map_t g_sysmem_translations[PAGES_PER_UVM_VA_BLOCK];
// We use our own separate reverse map to easily specify contiguous DMA
// address ranges
static uvm_pmm_sysmem_mappings_t g_reverse_map;
// Check that the DMA addresses in the range defined by
// [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
// are registered in the reverse map, using one call per entry. The returned
// virtual addresses must belong to va_block. The function assumes a 1:1
// dma-to-virt mapping for the whole VA block
static NV_STATUS check_reverse_map_block_page(uvm_va_block_t *va_block,
NvU64 base_dma_addr,
const uvm_page_mask_t *page_mask)
{
uvm_page_index_t page_index;
for_each_va_block_page(page_index, va_block) {
size_t num_pages;
memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
num_pages = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
base_dma_addr + page_index * PAGE_SIZE,
PAGE_SIZE,
g_sysmem_translations,
PAGES_PER_UVM_VA_BLOCK);
if (!page_mask || uvm_page_mask_test(page_mask, page_index)) {
TEST_CHECK_RET(num_pages == 1);
TEST_CHECK_RET(g_sysmem_translations[0].va_block == va_block);
TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
TEST_CHECK_RET(uvm_reverse_map_start(&g_sysmem_translations[0]) == uvm_va_block_cpu_page_address(va_block, page_index));
TEST_CHECK_RET(uvm_va_block_region_num_pages(g_sysmem_translations[0].region) == 1);
TEST_CHECK_RET(UVM_ID_IS_CPU(g_sysmem_translations[0].owner));
uvm_va_block_release(g_sysmem_translations[0].va_block);
}
else {
TEST_CHECK_RET(num_pages == 0);
}
}
return NV_OK;
}
// Check that the DMA addresses in the range defined by
// [base_dma_addr:base_dma_addr + uvm_va_block_size(va_block)] and page_mask
// are registered in the reverse map, using a single translation call. The
// returned virtual addresses must belong to va_block. The function assumes a
// 1:1 dma-to-virt mapping for the whole VA block
static NV_STATUS check_reverse_map_block_batch(uvm_va_block_t *va_block,
NvU64 base_dma_addr,
const uvm_page_mask_t *page_mask)
{
size_t num_translations;
size_t num_pages;
size_t reverse_map_index;
memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
base_dma_addr,
uvm_va_block_size(va_block),
g_sysmem_translations,
PAGES_PER_UVM_VA_BLOCK);
if (num_translations == 0 && page_mask)
TEST_CHECK_RET(uvm_page_mask_empty(page_mask));
num_pages = 0;
for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
num_pages += num_reverse_map_pages;
TEST_CHECK_RET(reverse_map->va_block == va_block);
TEST_CHECK_RET(nv_kref_read(&va_block->kref) >= 2);
uvm_va_block_release(reverse_map->va_block);
TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
}
if (page_mask)
TEST_CHECK_RET(num_pages == uvm_page_mask_weight(page_mask));
else
TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block));
return NV_OK;
}
// Check that the DMA addresses for all the CPU pages of the two given VA blocks
// are registered in the reverse map, using a single translation call. The
// returned virtual addresses must belong to one of the blocks. The function
// assumes a 1:1 dma-to-virt mapping for each VA block and that va_block1 is
// mapped behind va_block0.
static NV_STATUS check_reverse_map_two_blocks_batch(NvU64 base_dma_addr,
uvm_va_block_t *va_block0,
uvm_va_block_t *va_block1)
{
size_t num_pages;
size_t num_translations;
size_t reverse_map_index;
memset(g_sysmem_translations, 0, sizeof(g_sysmem_translations));
num_translations = uvm_pmm_sysmem_mappings_dma_to_virt(&g_reverse_map,
base_dma_addr,
UVM_VA_BLOCK_SIZE,
g_sysmem_translations,
PAGES_PER_UVM_VA_BLOCK);
TEST_CHECK_RET(num_translations == 2);
num_pages = 0;
for (reverse_map_index = 0; reverse_map_index < num_translations; ++reverse_map_index) {
uvm_va_block_t *block;
uvm_reverse_map_t *reverse_map = &g_sysmem_translations[reverse_map_index];
NvU64 virt_addr = uvm_reverse_map_start(reverse_map);
size_t num_reverse_map_pages = uvm_va_block_region_num_pages(reverse_map->region);
if (reverse_map_index == 0)
block = va_block0;
else
block = va_block1;
TEST_CHECK_RET(reverse_map->va_block == block);
TEST_CHECK_RET(nv_kref_read(&block->kref) >= 2);
uvm_va_block_release(reverse_map->va_block);
TEST_CHECK_RET(num_reverse_map_pages == uvm_va_block_num_cpu_pages(block));
TEST_CHECK_RET(virt_addr == block->start);
TEST_CHECK_RET(UVM_ID_IS_CPU(reverse_map->owner));
num_pages += num_reverse_map_pages;
}
TEST_CHECK_RET(num_pages == uvm_va_block_num_cpu_pages(va_block0) + uvm_va_block_num_cpu_pages(va_block1));
return NV_OK;
}
static const NvU64 g_base_dma_addr = UVM_VA_BLOCK_SIZE;
// This function adds the mappings for all the subregions in va_block defined
// by page_mask. g_base_dma_addr is used as the base DMA address for the whole
// VA block.
static NV_STATUS test_pmm_sysmem_reverse_map_single(uvm_va_block_t *va_block,
uvm_page_mask_t *page_mask,
uvm_chunk_size_t split_size,
bool merge)
{
NV_STATUS status = NV_OK;
uvm_va_block_region_t subregion;
TEST_CHECK_RET(is_power_of_2(split_size));
TEST_CHECK_RET(split_size >= PAGE_SIZE);
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
TEST_CHECK_RET(is_power_of_2(uvm_va_block_region_size(subregion)));
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr + subregion.first * PAGE_SIZE,
va_block->start + subregion.first * PAGE_SIZE,
uvm_va_block_region_size(subregion),
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
if (status != NV_OK)
return status;
}
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
if (split_size != UVM_CHUNK_SIZE_MAX) {
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
TEST_CHECK_RET(uvm_va_block_region_size(subregion) > split_size);
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&g_reverse_map,
g_base_dma_addr + subregion.first * PAGE_SIZE,
split_size);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
}
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
}
if (split_size != UVM_CHUNK_SIZE_MAX && merge) {
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
g_base_dma_addr + subregion.first * PAGE_SIZE,
uvm_va_block_region_size(subregion));
}
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
}
for_each_va_block_subregion_in_mask(subregion, page_mask, uvm_va_block_region_from_block(va_block)) {
NvU64 subregion_dma_addr = g_base_dma_addr + subregion.first * PAGE_SIZE;
if (split_size == UVM_CHUNK_SIZE_MAX || merge) {
uvm_mutex_lock(&va_block->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr);
uvm_mutex_unlock(&va_block->lock);
}
else {
size_t chunk;
size_t num_chunks = uvm_va_block_region_size(subregion) / split_size;
TEST_CHECK_RET(num_chunks > 1);
uvm_mutex_lock(&va_block->lock);
for (chunk = 0; chunk < num_chunks; ++chunk)
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, subregion_dma_addr + chunk * split_size);
uvm_mutex_unlock(&va_block->lock);
}
}
uvm_page_mask_zero(page_mask);
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, page_mask) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, page_mask) == NV_OK);
return status;
}
static uvm_page_mask_t g_page_mask;
static NV_STATUS test_pmm_sysmem_reverse_map_single_whole(uvm_va_space_t *va_space, NvU64 addr)
{
NV_STATUS status;
uvm_va_block_t *va_block;
const bool merge_array[] = {false, true};
const uvm_chunk_size_t chunk_split_array[] = { UVM_CHUNK_SIZE_4K, UVM_CHUNK_SIZE_64K, UVM_CHUNK_SIZE_MAX };
unsigned merge_index;
unsigned chunk_split_index;
status = uvm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK)
return status;
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
for (merge_index = 0; merge_index < ARRAY_SIZE(merge_array); ++merge_index) {
for (chunk_split_index = 0; chunk_split_index < ARRAY_SIZE(chunk_split_array); ++chunk_split_index) {
// The reverse map has PAGE_SIZE granularity
if (chunk_split_array[chunk_split_index] < PAGE_SIZE)
continue;
uvm_page_mask_region_fill(&g_page_mask, uvm_va_block_region_from_block(va_block));
TEST_CHECK_RET(test_pmm_sysmem_reverse_map_single(va_block,
&g_page_mask,
chunk_split_array[chunk_split_index],
merge_array[merge_index]) == NV_OK);
}
}
return status;
}
static NV_STATUS test_pmm_sysmem_reverse_map_single_pattern(uvm_va_space_t *va_space, NvU64 addr)
{
NV_STATUS status;
uvm_va_block_t *va_block;
uvm_page_index_t page_index;
status = uvm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK)
return status;
uvm_page_mask_zero(&g_page_mask);
for_each_va_block_page(page_index, va_block) {
if (page_index % 2 == 0)
uvm_page_mask_set(&g_page_mask, page_index);
}
return test_pmm_sysmem_reverse_map_single(va_block, &g_page_mask, UVM_CHUNK_SIZE_MAX, false);
}
// This function assumes that addr points at a VA range with 4 sized VA blocks
// with size UVM_VA_BLOCK_SIZE / 4.
static NV_STATUS test_pmm_sysmem_reverse_map_many_blocks(uvm_va_space_t *va_space, NvU64 addr)
{
NV_STATUS status;
uvm_va_block_t *va_block0;
uvm_va_block_t *va_block1;
NvU64 base_dma_addr0;
NvU64 base_dma_addr1;
status = uvm_va_block_find(va_space, addr + UVM_VA_BLOCK_SIZE / 4, &va_block0);
if (status != NV_OK)
return status;
status = uvm_va_block_find(va_space, addr + 3 * UVM_VA_BLOCK_SIZE / 4, &va_block1);
if (status != NV_OK)
return status;
TEST_CHECK_RET(va_block0 != va_block1);
base_dma_addr0 = g_base_dma_addr + uvm_va_block_size(va_block0);
base_dma_addr1 = base_dma_addr0 + uvm_va_block_size(va_block0);
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block0)));
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block1)));
uvm_mutex_lock(&va_block0->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
base_dma_addr0,
va_block0->start,
uvm_va_block_size(va_block0),
va_block0,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block0->lock);
TEST_CHECK_RET(status == NV_OK);
uvm_mutex_lock(&va_block1->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
base_dma_addr1,
va_block1->start,
uvm_va_block_size(va_block1),
va_block1,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block1->lock);
// Check each VA block individually
if (status == NV_OK) {
TEST_CHECK_GOTO(check_reverse_map_block_page(va_block0, base_dma_addr0, NULL) == NV_OK, error);
TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block0, base_dma_addr0, NULL) == NV_OK, error);
TEST_CHECK_GOTO(check_reverse_map_block_page(va_block1, base_dma_addr1, NULL) == NV_OK, error);
TEST_CHECK_GOTO(check_reverse_map_block_batch(va_block1, base_dma_addr1, NULL) == NV_OK, error);
// Check both VA blocks at the same time
TEST_CHECK_GOTO(check_reverse_map_two_blocks_batch(g_base_dma_addr, va_block0, va_block1) == NV_OK, error);
error:
uvm_mutex_lock(&va_block1->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr1);
uvm_mutex_unlock(&va_block1->lock);
}
uvm_mutex_lock(&va_block0->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, base_dma_addr0);
uvm_mutex_unlock(&va_block0->lock);
return status;
}
// This function registers a non-uniform distribution of chunks (mixing 4K and 64K chunks)
// and merges them back to verify that the logic is working.
static NV_STATUS test_pmm_sysmem_reverse_map_merge(uvm_va_space_t *va_space, NvU64 addr)
{
NV_STATUS status = NV_OK;
uvm_va_block_t *va_block;
const unsigned chunks_64k_pos[] =
{
16,
64,
96,
192,
208,
224,
288,
320,
384,
480
};
uvm_page_index_t page_index;
unsigned i;
if (PAGE_SIZE != UVM_PAGE_SIZE_4K)
return NV_OK;
status = uvm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK)
return status;
TEST_CHECK_RET(uvm_va_block_size(va_block) == UVM_VA_BLOCK_SIZE);
page_index = 0;
for (i = 0; i < ARRAY_SIZE(chunks_64k_pos); ++i) {
// Fill with 4K mappings until the next 64K mapping
while (page_index < chunks_64k_pos[i]) {
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr + page_index * PAGE_SIZE,
uvm_va_block_cpu_page_address(va_block, page_index),
PAGE_SIZE,
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
++page_index;
}
// Register the 64K mapping
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr + page_index * PAGE_SIZE,
uvm_va_block_cpu_page_address(va_block, page_index),
UVM_CHUNK_SIZE_64K,
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
page_index += UVM_PAGE_SIZE_64K / PAGE_SIZE;
}
// Fill the tail with 4K mappings, too
while (page_index < PAGES_PER_UVM_VA_BLOCK) {
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr + page_index * PAGE_SIZE,
uvm_va_block_cpu_page_address(va_block, page_index),
PAGE_SIZE,
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
++page_index;
}
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
uvm_mutex_lock(&va_block->lock);
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&g_reverse_map,
g_base_dma_addr,
uvm_va_block_size(va_block));
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(check_reverse_map_block_page(va_block, g_base_dma_addr, NULL) == NV_OK);
TEST_CHECK_RET(check_reverse_map_block_batch(va_block, g_base_dma_addr, NULL) == NV_OK);
uvm_mutex_lock(&va_block->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
uvm_mutex_unlock(&va_block->lock);
return status;
}
static NV_STATUS test_pmm_sysmem_reverse_map_remove_on_eviction(uvm_va_space_t *va_space, NvU64 addr)
{
uvm_va_block_t *va_block;
NV_STATUS status = uvm_va_block_find(va_space, addr, &va_block);
if (status != NV_OK)
return status;
TEST_CHECK_RET(is_power_of_2(uvm_va_block_size(va_block)));
uvm_mutex_lock(&va_block->lock);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&g_reverse_map,
g_base_dma_addr,
addr,
uvm_va_block_size(va_block),
va_block,
UVM_ID_CPU);
uvm_mutex_unlock(&va_block->lock);
uvm_mutex_lock(&va_block->lock);
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&g_reverse_map, g_base_dma_addr);
uvm_mutex_unlock(&va_block->lock);
TEST_CHECK_RET(status == NV_OK);
uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
uvm_pmm_sysmem_mappings_remove_gpu_mapping_on_eviction(&g_reverse_map, g_base_dma_addr);
return NV_OK;
}
static NV_STATUS test_pmm_sysmem_reverse_map(uvm_va_space_t *va_space, NvU64 addr1, NvU64 addr2)
{
NV_STATUS status = NV_OK;
uvm_gpu_t *volta_gpu = NULL;
uvm_gpu_t *gpu;
// Find a GPU with support for access counters with physical address
// notifications, since it is required to add or remove entries to the
// reverse map.
for_each_va_space_gpu(gpu, va_space) {
if (gpu->parent->access_counters_can_use_physical_addresses) {
// Initialize the reverse map.
status = uvm_pmm_sysmem_mappings_init(gpu, &g_reverse_map);
if (status != NV_OK)
return status;
volta_gpu = gpu;
break;
}
}
if (!volta_gpu)
return NV_ERR_INVALID_DEVICE;
status = test_pmm_sysmem_reverse_map_single_whole(va_space, addr1);
if (status == NV_OK)
status = test_pmm_sysmem_reverse_map_single_pattern(va_space, addr1);
if (status == NV_OK)
status = test_pmm_sysmem_reverse_map_many_blocks(va_space, addr2);
if (status == NV_OK)
status = test_pmm_sysmem_reverse_map_merge(va_space, addr1);
if (status == NV_OK)
status = test_pmm_sysmem_reverse_map_remove_on_eviction(va_space, addr1);
uvm_pmm_sysmem_mappings_deinit(&g_reverse_map);
return status;
}
NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *filp)
{
NV_STATUS status;
uvm_va_space_t *va_space;
va_space = uvm_va_space_get(filp);
// Take the global lock to void interferences from different instances of
// the test, since we use a bunch of global variables
uvm_mutex_lock(&g_uvm_global.global_lock);
uvm_va_space_down_write(va_space);
status = test_pmm_sysmem_reverse_map(va_space, params->range_address1, params->range_address2);
uvm_va_space_up_write(va_space);
uvm_mutex_unlock(&g_uvm_global.global_lock);
return status;
}
static NV_STATUS cpu_chunk_map_on_cpu(uvm_cpu_chunk_t *chunk, void **cpu_addr)
{
struct page **pages;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -144,6 +144,9 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
}
if (gpu->parent->replayable_faults_supported) {
UVM_ASSERT(gpu->parent->isr.access_counters);
UVM_ASSERT(gpu->parent->access_counter_buffer);
uvm_parent_gpu_replayable_faults_isr_lock(gpu->parent);
status = uvm_test_verify_bh_affinity(&gpu->parent->isr.replayable_faults,
gpu->parent->closest_cpu_numa_node);
@ -161,10 +164,11 @@ static NV_STATUS uvm_test_numa_check_affinity(UVM_TEST_NUMA_CHECK_AFFINITY_PARAM
}
if (gpu->parent->access_counters_supported) {
uvm_parent_gpu_access_counters_isr_lock(gpu->parent);
status = uvm_test_verify_bh_affinity(&gpu->parent->isr.access_counters,
// We only need to test one notification buffer, we pick index 0.
uvm_access_counters_isr_lock(&gpu->parent->access_counter_buffer[0]);
status = uvm_test_verify_bh_affinity(&gpu->parent->isr.access_counters[0],
gpu->parent->closest_cpu_numa_node);
uvm_parent_gpu_access_counters_isr_unlock(gpu->parent);
uvm_access_counters_isr_unlock(&gpu->parent->access_counter_buffer[0]);
}
}
@ -311,7 +315,6 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_DISABLE_NVLINK_PEER_ACCESS, uvm_test_disable_nvlink_peer_access);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_GET_PAGE_THRASHING_POLICY, uvm_test_get_page_thrashing_policy);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SET_PAGE_THRASHING_POLICY, uvm_test_set_page_thrashing_policy);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_SYSMEM, uvm_test_pmm_sysmem);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_REVERSE_MAP, uvm_test_pmm_reverse_map);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_VA_SPACE_MM_RETAIN, uvm_test_va_space_mm_retain);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE, uvm_test_pmm_chunk_with_elevated_page);
@ -350,6 +353,7 @@ long uvm_test_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_INJECT_TOOLS_EVENT_V2, uvm_test_inject_tools_event_v2);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_SET_P2P_SUSPENDED, uvm_test_set_p2p_suspended);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_INJECT_NVLINK_ERROR, uvm_test_inject_nvlink_error);
UVM_ROUTE_CMD_STACK_INIT_CHECK(UVM_TEST_QUERY_ACCESS_COUNTERS, uvm_test_query_access_counters);
}
return -EINVAL;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -28,8 +28,7 @@
#include "uvm_common.h"
#include "uvm_test_ioctl.h"
// Unlike UVM_INFO_PRINT, this prints on release builds
#define UVM_TEST_PRINT(fmt, ...) UVM_PRINT_FUNC(pr_info, " " fmt, ##__VA_ARGS__)
#define UVM_TEST_PRINT UVM_ERR_PRINT_ALWAYS
// WARNING: This macro will return out of the current scope
#define TEST_CHECK_RET(cond) \
@ -160,30 +159,35 @@ NV_STATUS uvm_test_range_group_tree(UVM_TEST_RANGE_GROUP_TREE_PARAMS *params, st
NV_STATUS uvm_test_range_group_range_info(UVM_TEST_RANGE_GROUP_RANGE_INFO_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_range_group_range_count(UVM_TEST_RANGE_GROUP_RANGE_COUNT_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_get_prefetch_faults_reenable_lapse(UVM_TEST_GET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_set_prefetch_faults_reenable_lapse(UVM_TEST_SET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_get_prefetch_faults_reenable_lapse(UVM_TEST_GET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_set_prefetch_faults_reenable_lapse(UVM_TEST_SET_PREFETCH_FAULTS_REENABLE_LAPSE_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_check_channel_va_space(UVM_TEST_CHECK_CHANNEL_VA_SPACE_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_sysmem(UVM_TEST_PMM_SYSMEM_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_reverse_map(UVM_TEST_PMM_REVERSE_MAP_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_chunk_with_elevated_page(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_chunk_with_elevated_page(UVM_TEST_PMM_CHUNK_WITH_ELEVATED_PAGE_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_va_space_inject_error(UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_get_gpu_time(UVM_TEST_GET_GPU_TIME_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_release_free_root_chunks(UVM_TEST_PMM_RELEASE_FREE_ROOT_CHUNKS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_pmm_release_free_root_chunks(UVM_TEST_PMM_RELEASE_FREE_ROOT_CHUNKS_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_drain_replayable_faults(UVM_TEST_DRAIN_REPLAYABLE_FAULTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_va_space_add_dummy_thread_contexts(UVM_TEST_VA_SPACE_ADD_DUMMY_THREAD_CONTEXTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_va_space_remove_dummy_thread_contexts(UVM_TEST_VA_SPACE_REMOVE_DUMMY_THREAD_CONTEXTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_va_space_add_dummy_thread_contexts(UVM_TEST_VA_SPACE_ADD_DUMMY_THREAD_CONTEXTS_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_va_space_remove_dummy_thread_contexts(UVM_TEST_VA_SPACE_REMOVE_DUMMY_THREAD_CONTEXTS_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_thread_context_sanity(UVM_TEST_THREAD_CONTEXT_SANITY_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_thread_context_perf(UVM_TEST_THREAD_CONTEXT_PERF_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_tools_flush_replay_events(UVM_TEST_TOOLS_FLUSH_REPLAY_EVENTS_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_register_unload_state_buffer(UVM_TEST_REGISTER_UNLOAD_STATE_BUFFER_PARAMS *params,
struct file *filp);
NV_STATUS uvm_test_rb_tree_directed(UVM_TEST_RB_TREE_DIRECTED_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_rb_tree_random(UVM_TEST_RB_TREE_RANDOM_PARAMS *params, struct file *filp);
NV_STATUS uvm_test_sec2_sanity(UVM_TEST_SEC2_SANITY_PARAMS *params, struct file *filp);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVidia Corporation
Copyright (c) 2015-2025 NVidia Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -926,31 +926,38 @@ typedef struct
// Change configuration of access counters. This call will disable access
// counters and reenable them using the new configuration. All previous
// notifications will be lost
// notifications will be lost.
//
// The reconfiguration affects all VA spaces that rely on the access
// counters information for the same GPU. To avoid conflicting configurations,
// only one VA space is allowed to reconfigure the GPU at a time.
//
// When the reconfiguration VA space is destroyed, the bottom-half control
// settings are reset.
//
// Error returns:
// NV_ERR_INVALID_STATE
// - The GPU has already been reconfigured in a different VA space
// - The GPU has already been reconfigured in a different VA space.
#define UVM_TEST_RECONFIGURE_ACCESS_COUNTERS UVM_TEST_IOCTL_BASE(56)
typedef struct
{
NvProcessorUuid gpu_uuid; // In
// Type UVM_ACCESS_COUNTER_GRANULARITY from nv_uvm_types.h
NvU32 mimc_granularity; // In
NvU32 momc_granularity; // In
// Type UVM_ACCESS_COUNTER_USE_LIMIT from nv_uvm_types.h
NvU32 mimc_use_limit; // In
NvU32 momc_use_limit; // In
NvU32 granularity; // In
NvU32 threshold; // In
NvBool enable_mimc_migrations; // In
NvBool enable_momc_migrations; // In
NvBool enable_migrations; // In
// Settings to control how notifications are serviced by the access counters
// bottom-half. These settings help tests to exercise races in the driver,
// e.g., unregister a GPU while (valid) pending notifications remain in the
// notification buffer.
//
// 0 max_batch_size doesn't change driver's behavior.
NvU32 max_batch_size; // In
NvBool one_iteration_per_batch; // In
NvU32 sleep_per_iteration_us; // In
NV_STATUS rmStatus; // Out
} UVM_TEST_RECONFIGURE_ACCESS_COUNTERS_PARAMS;
@ -962,13 +969,6 @@ typedef enum
UVM_TEST_ACCESS_COUNTER_RESET_MODE_MAX
} UVM_TEST_ACCESS_COUNTER_RESET_MODE;
typedef enum
{
UVM_TEST_ACCESS_COUNTER_TYPE_MIMC = 0,
UVM_TEST_ACCESS_COUNTER_TYPE_MOMC,
UVM_TEST_ACCESS_COUNTER_TYPE_MAX
} UVM_TEST_ACCESS_COUNTER_TYPE;
// Clear the contents of the access counters. This call supports different
// modes for targeted/global resets.
#define UVM_TEST_RESET_ACCESS_COUNTERS UVM_TEST_IOCTL_BASE(57)
@ -979,9 +979,6 @@ typedef struct
// Type UVM_TEST_ACCESS_COUNTER_RESET_MODE
NvU32 mode; // In
// Type UVM_TEST_ACCESS_COUNTER_TYPE
NvU32 counter_type; // In
NvU32 bank; // In
NvU32 tag; // In
NV_STATUS rmStatus; // Out
@ -1061,14 +1058,6 @@ typedef struct
NV_STATUS rmStatus; // Out
} UVM_TEST_SET_PAGE_THRASHING_POLICY_PARAMS;
#define UVM_TEST_PMM_SYSMEM UVM_TEST_IOCTL_BASE(64)
typedef struct
{
NvU64 range_address1 NV_ALIGN_BYTES(8); // In
NvU64 range_address2 NV_ALIGN_BYTES(8); // In
NV_STATUS rmStatus; // Out
} UVM_TEST_PMM_SYSMEM_PARAMS;
#define UVM_TEST_PMM_REVERSE_MAP UVM_TEST_IOCTL_BASE(65)
typedef struct
{
@ -1142,18 +1131,46 @@ typedef struct
NV_STATUS rmStatus; // Out
} UVM_TEST_ACCESS_COUNTERS_ENABLED_BY_DEFAULT_PARAMS;
// Inject an error into the VA space
// Inject an error into the VA space or into a to-be registered GPU.
//
// If migrate_vma_allocation_fail_nth is greater than 0, the nth page
// allocation within migrate_vma will fail.
//
// If va_block_allocation_fail_nth is greater than 0, the nth call to
// uvm_va_block_find_create() will fail with NV_ERR_NO_MEMORY.
//
// If gpu_access_counters_alloc_buffer is set, the parent_gpu's access counters
// buffer allocation will fail with NV_ERR_NO_MEMORY.
//
// If gpu_access_counters_alloc_block_context is set, the access counters
// buffer's block_context allocation will fail with NV_ERR_NO_MEMORY.
//
// If gpu_isr_access_counters_alloc is set, the ISR access counters allocation
// will fail with NV_ERR_NO_MEMORY.
//
// If gpu_isr_access_counters_alloc_stats_cpu is set, the ISR access counters
// buffer's stats_cpu allocation will fail with NV_ERR_NO_MEMORY.
//
// If access_counters_batch_context_notifications is set, the access counters
// batch_context's notifications allocation will fail with NV_ERR_NO_MEMORY.
//
// If access_counters_batch_context_notification_cache is set, the access
// counters batch_context's notification cache allocation will fail with
// NV_ERR_NO_MEMORY.
//
// Note that only one of the gpu_* or access_counters_* setting can be selected
// at a time.
#define UVM_TEST_VA_SPACE_INJECT_ERROR UVM_TEST_IOCTL_BASE(72)
typedef struct
{
NvU32 migrate_vma_allocation_fail_nth; // In
NvU32 va_block_allocation_fail_nth; // In
NvBool gpu_access_counters_alloc_buffer; // In
NvBool gpu_access_counters_alloc_block_context; // In
NvBool gpu_isr_access_counters_alloc; // In
NvBool gpu_isr_access_counters_alloc_stats_cpu; // In
NvBool access_counters_batch_context_notifications; // In
NvBool access_counters_batch_context_notification_cache; // In
NV_STATUS rmStatus; // Out
} UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS;
@ -1505,6 +1522,16 @@ typedef struct
NV_STATUS rmStatus; // Out
} UVM_TEST_INJECT_NVLINK_ERROR_PARAMS;
#define UVM_TEST_QUERY_ACCESS_COUNTERS UVM_TEST_IOCTL_BASE(109)
typedef struct
{
NvProcessorUuid gpu_uuid; // In
NvU8 num_notification_buffers; // Out
NvU32 num_notification_entries; // Out
NV_STATUS rmStatus; // Out
} UVM_TEST_QUERY_ACCESS_COUNTERS_PARAMS;
#ifdef __cplusplus
}
#endif

View File

@ -1305,8 +1305,7 @@ void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_c
void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
uvm_gpu_id_t gpu_id,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed_phys)
const uvm_access_counter_buffer_entry_t *buffer_entry)
{
uvm_down_read(&va_space->tools.lock);
@ -1318,18 +1317,10 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
info->eventType = UvmEventTypeTestAccessCounter;
info->srcIndex = uvm_parent_id_value_from_processor_id(gpu_id);
info->address = buffer_entry->address.address;
info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
if (buffer_entry->address.is_virtual) {
info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
info->veId = buffer_entry->virtual_info.ve_id;
}
else {
info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
}
info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
info->physOnManaged = on_managed_phys? 1 : 0;
info->address = buffer_entry->address;
info->instancePtr = buffer_entry->instance_ptr.address;
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->instance_ptr.aperture];
info->veId = buffer_entry->ve_id;
info->value = buffer_entry->counter_value;
info->subGranularity = buffer_entry->sub_granularity;
info->bank = buffer_entry->bank;
@ -1345,18 +1336,10 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
info->eventType = UvmEventTypeTestAccessCounter;
info->srcIndex = uvm_id_value(gpu_id);
info->address = buffer_entry->address.address;
info->isVirtual = buffer_entry->address.is_virtual? 1: 0;
if (buffer_entry->address.is_virtual) {
info->instancePtr = buffer_entry->virtual_info.instance_ptr.address;
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->virtual_info.instance_ptr.aperture];
info->veId = buffer_entry->virtual_info.ve_id;
}
else {
info->aperture = g_hal_to_tools_aperture_table[buffer_entry->address.aperture];
}
info->isFromCpu = buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MOMC? 1: 0;
info->physOnManaged = on_managed_phys? 1 : 0;
info->address = buffer_entry->address;
info->instancePtr = buffer_entry->instance_ptr.address;
info->instancePtrAperture = g_hal_to_tools_aperture_table[buffer_entry->instance_ptr.aperture];
info->veId = buffer_entry->ve_id;
info->value = buffer_entry->counter_value;
info->subGranularity = buffer_entry->sub_granularity;
info->bank = buffer_entry->bank;
@ -1368,18 +1351,13 @@ void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
uvm_up_read(&va_space->tools.lock);
}
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed_phys)
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, const uvm_access_counter_buffer_entry_t *buffer_entry)
{
uvm_va_space_t *va_space;
uvm_down_read(&g_tools_va_space_list_lock);
list_for_each_entry(va_space, &g_tools_va_space_list, tools.node) {
uvm_tools_record_access_counter(va_space,
gpu->id,
buffer_entry,
on_managed_phys);
uvm_tools_record_access_counter(va_space, gpu->id, buffer_entry);
}
uvm_up_read(&g_tools_va_space_list_lock);
}

View File

@ -111,14 +111,11 @@ void uvm_tools_broadcast_replay(uvm_gpu_t *gpu, uvm_push_t *push, NvU32 batch_id
void uvm_tools_broadcast_replay_sync(uvm_gpu_t *gpu, NvU32 batch_id, uvm_fault_client_type_t client_type);
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed_phys);
void uvm_tools_broadcast_access_counter(uvm_gpu_t *gpu, const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_tools_record_access_counter(uvm_va_space_t *va_space,
uvm_gpu_id_t gpu_id,
const uvm_access_counter_buffer_entry_t *buffer_entry,
bool on_managed_phys);
const uvm_access_counter_buffer_entry_t *buffer_entry);
void uvm_tools_test_hmm_split_invalidate(uvm_va_space_t *va_space);

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2017-2024 NVIDIA Corporation
Copyright (c) 2017-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -37,10 +37,10 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_turing_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Turing covers 128 TB and that's the minimum
@ -79,8 +79,6 @@ void uvm_hal_turing_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = false;
parent_gpu->fault_cancel_va_supported = true;
parent_gpu->scoped_atomics_supported = true;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -25,42 +25,174 @@
#include "uvm_global.h"
#include "uvm_gpu.h"
#include "uvm_hal.h"
#include "clc365.h"
static void clear_access_counter_notifications_interrupt(uvm_parent_gpu_t *parent_gpu)
typedef struct {
NvU8 bufferEntry[NVC365_NOTIFY_BUF_SIZE];
} access_counter_buffer_entry_c365_t;
void uvm_hal_turing_enable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters)
{
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntr;
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
reg = access_counters->rm_info.pHubIntrEnSet;
mask = access_counters->rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
void uvm_hal_turing_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
static void clear_access_counter_notifications_interrupt(uvm_access_counter_buffer_t *access_counters)
{
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnClear;
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
reg = access_counters->rm_info.pHubIntr;
mask = access_counters->rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
void uvm_hal_turing_disable_access_counter_notifications(uvm_access_counter_buffer_t *access_counters)
{
volatile NvU32 *reg;
NvU32 mask;
reg = access_counters->rm_info.pHubIntrEnClear;
mask = access_counters->rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
wmb();
// See the comment in uvm_hal_turing_disable_replayable_faults
clear_access_counter_notifications_interrupt(parent_gpu);
clear_access_counter_notifications_interrupt(access_counters);
}
void uvm_hal_turing_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get)
void uvm_hal_turing_clear_access_counter_notifications(uvm_access_counter_buffer_t *access_counters, NvU32 get)
{
clear_access_counter_notifications_interrupt(parent_gpu);
clear_access_counter_notifications_interrupt(access_counters);
wmb();
// Write GET to force the re-evaluation of the interrupt condition after the
// interrupt bit has been cleared.
UVM_GPU_WRITE_ONCE(*parent_gpu->access_counter_buffer_info.rm_info.pAccessCntrBufferGet, get);
UVM_GPU_WRITE_ONCE(*access_counters->rm_info.pAccessCntrBufferGet, get);
}
NvU32 uvm_hal_turing_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
{
return NVC365_NOTIFY_BUF_SIZE;
}
static uvm_aperture_t get_access_counter_inst_aperture(NvU32 *access_counter_entry)
{
NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_APERTURE);
switch (hw_aperture_value) {
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
return UVM_APERTURE_VID;
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
return UVM_APERTURE_SYS;
}
UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value);
return UVM_APERTURE_MAX;
}
static NvU64 get_address(uvm_parent_gpu_t *parent_gpu, NvU32 *access_counter_entry)
{
NvU64 address;
NvU64 addr_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_HI);
NvU64 addr_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_LO);
NvU32 addr_type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_TYPE);
UVM_ASSERT(addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA);
address = addr_lo + (addr_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, ADDR_LO));
address = uvm_parent_gpu_canonical_address(parent_gpu, address);
return address;
}
static NvU32 *get_access_counter_buffer_entry(uvm_access_counter_buffer_t *access_counters, NvU32 index)
{
access_counter_buffer_entry_c365_t *buffer_start;
NvU32 *access_counter_entry;
UVM_ASSERT(index < access_counters->max_notifications);
buffer_start = (access_counter_buffer_entry_c365_t *)access_counters->rm_info.bufferAddress;
access_counter_entry = (NvU32 *)&buffer_start[index];
return access_counter_entry;
}
bool uvm_hal_turing_access_counter_buffer_entry_is_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index)
{
NvU32 *access_counter_entry;
bool is_valid;
access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
is_valid = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID);
return is_valid;
}
void uvm_hal_turing_access_counter_buffer_entry_clear_valid(uvm_access_counter_buffer_t *access_counters, NvU32 index)
{
NvU32 *access_counter_entry;
access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
WRITE_HWCONST_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID, FALSE);
}
void uvm_hal_turing_access_counter_buffer_parse_entry(uvm_access_counter_buffer_t *access_counters,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry)
{
NvU32 *access_counter_entry;
NvU64 inst_hi, inst_lo;
// Valid bit must be set before this function is called
UVM_ASSERT(uvm_hal_turing_access_counter_buffer_entry_is_valid(access_counters, index));
access_counter_entry = get_access_counter_buffer_entry(access_counters, index);
UVM_ASSERT(READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, TYPE) != NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU);
buffer_entry->address = get_address(access_counters->parent_gpu, access_counter_entry);
inst_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_HI);
inst_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_LO);
buffer_entry->instance_ptr.address = inst_lo + (inst_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, INST_LO));
// HW value contains the 4K page number. Shift to build the full address
buffer_entry->instance_ptr.address <<= 12;
buffer_entry->instance_ptr.aperture = get_access_counter_inst_aperture(access_counter_entry);
buffer_entry->mmu_engine_id = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, MMU_ENGINE_ID);
buffer_entry->mmu_engine_type = UVM_MMU_ENGINE_TYPE_GRAPHICS;
// MMU engine id aligns with the fault buffer packets. Therefore, we reuse
// the helper to compute the VE ID from the fault buffer class.
buffer_entry->ve_id = access_counters->parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->mmu_engine_id,
buffer_entry->mmu_engine_type);
buffer_entry->counter_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, COUNTER_VAL);
buffer_entry->sub_granularity = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, SUB_GRANULARITY);
buffer_entry->bank = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, BANK);
buffer_entry->tag = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, NOTIFY_TAG);
// Automatically clear valid bit for the entry in the access counter buffer
uvm_hal_turing_access_counter_buffer_entry_clear_valid(access_counters, index);
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2021-2024 NVIDIA Corporation
Copyright (c) 2021-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -32,8 +32,8 @@ static void clear_replayable_faults_interrupt(uvm_parent_gpu_t *parent_gpu)
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntr;
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntr;
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
@ -54,8 +54,8 @@ void uvm_hal_turing_disable_replayable_faults(uvm_parent_gpu_t *parent_gpu)
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->fault_buffer_info.rm_info.replayable.pPmcIntrEnClear;
mask = parent_gpu->fault_buffer_info.rm_info.replayable.replayableFaultMask;
reg = parent_gpu->fault_buffer.rm_info.replayable.pPmcIntrEnClear;
mask = parent_gpu->fault_buffer.rm_info.replayable.replayableFaultMask;
UVM_GPU_WRITE_ONCE(*reg, mask);

View File

@ -361,3 +361,24 @@ void uvm_hal_turing_host_tlb_invalidate_test(uvm_push_t *push,
if (params->membar == UvmInvalidateTlbMemBarLocal)
uvm_push_get_gpu(push)->parent->host_hal->membar_gpu(push);
}
void uvm_hal_turing_access_counter_clear_all(uvm_push_t *push)
{
NV_PUSH_4U(C46F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, ALL));
}
void uvm_hal_turing_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry)
{
NV_PUSH_4U(C46F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, HWVALUE(C46F, MEM_OP_C, ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG, buffer_entry->tag),
MEM_OP_D, HWCONST(C46F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, TARGETED) |
HWCONST(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, MIMC) |
HWVALUE(C46F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank));
}

View File

@ -1323,14 +1323,11 @@ typedef struct
NvU8 aperture;
NvU8 instancePtrAperture;
NvU8 isVirtual;
NvU8 isFromCpu;
NvU8 padding8bits;
NvU8 veId;
// The physical access counter notification was triggered on a managed
// memory region. This is not set for virtual access counter notifications.
NvU8 physOnManaged;
NvU16 padding16bits;
NvU32 value;
NvU32 subGranularity;
@ -1348,26 +1345,21 @@ typedef struct
// data in a queue.
//
NvU8 eventType;
// See uvm_access_counter_buffer_entry_t for details
NvU8 aperture;
NvU8 instancePtrAperture;
NvU8 isVirtual;
NvU8 isFromCpu;
NvU8 veId;
// The physical access counter notification was triggered on a managed
// memory region. This is not set for virtual access counter notifications.
NvU8 physOnManaged;
//
// This structure is shared between UVM kernel and tools.
// Manually padding the structure so that compiler options like pragma pack
// or malign-double will have no effect on the field offsets
//
NvU8 padding8bits;
NvU16 padding16bits;
NvU16 srcIndex; // index of the gpu that received the access counter
// notification
NvU16 padding16bits;
NvU32 value;
NvU32 subGranularity;
NvU32 tag;

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -664,10 +664,11 @@ static void uvm_va_block_cpu_clear_resident_region(uvm_va_block_t *va_block, int
block_update_cpu_resident_mask(va_block);
}
// Clear residency bits from any/all processors that might have had pages resident.
// Note that both the destination processor and any CPU NUMA nodes where pages are
// migrating to need to be skipped as the block logic sets the new page residency
// before clearing the old ones (see uvm_va_block_make_resident_finish()).
// Clear residency bits from any/all processors that might have had pages
// resident. Note that both the destination processor and any CPU NUMA nodes
// where pages are migrating to need to be skipped as the block logic sets the
// new page residency before clearing the old ones
// (see uvm_va_block_make_resident_finish()).
static void uvm_va_block_cpu_clear_resident_all_chunks(uvm_va_block_t *va_block,
uvm_va_block_context_t *va_block_context,
uvm_page_mask_t *page_mask)
@ -1328,40 +1329,18 @@ static void cpu_chunk_remove_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_
if (gpu_mapping_addr == 0)
return;
uvm_pmm_sysmem_mappings_remove_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings, gpu_mapping_addr);
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
}
static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk,
uvm_va_block_t *block,
uvm_page_index_t page_index,
uvm_gpu_t *gpu)
static NV_STATUS cpu_chunk_add_sysmem_gpu_mapping(uvm_cpu_chunk_t *chunk, uvm_gpu_t *gpu)
{
NV_STATUS status;
uvm_chunk_size_t chunk_size;
// When the Confidential Computing feature is enabled the transfers don't
// use the DMA mapping of CPU chunks (since it's protected memory), but
// the DMA address of the unprotected dma buffer.
if (g_uvm_global.conf_computing_enabled)
return NV_OK;
status = uvm_cpu_chunk_map_gpu(chunk, gpu);
if (status != NV_OK)
return status;
chunk_size = uvm_cpu_chunk_get_size(chunk);
status = uvm_pmm_sysmem_mappings_add_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu),
uvm_va_block_cpu_page_address(block, page_index),
chunk_size,
block,
UVM_ID_CPU);
if (status != NV_OK)
uvm_cpu_chunk_unmap_gpu(chunk, gpu);
return status;
return uvm_cpu_chunk_map_gpu(chunk, gpu);
}
static void block_gpu_unmap_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu_t *gpu)
@ -1393,7 +1372,7 @@ static NV_STATUS block_gpu_map_phys_all_cpu_pages(uvm_va_block_t *block, uvm_gpu
uvm_id_value(gpu->id),
uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu));
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, page_index, gpu);
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, gpu);
if (status != NV_OK)
goto error;
}
@ -1468,14 +1447,10 @@ void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *block,
}
}
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
uvm_cpu_chunk_t *chunk,
uvm_page_index_t page_index)
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block, uvm_cpu_chunk_t *chunk)
{
NV_STATUS status;
uvm_gpu_id_t id;
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
uvm_va_block_region_t chunk_region = uvm_va_block_chunk_region(block, chunk_size, page_index);
// We can't iterate over va_space->registered_gpus because we might be
// on the eviction path, which does not have the VA space lock held. We have
@ -1489,7 +1464,7 @@ NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *block,
continue;
gpu = uvm_gpu_get(id);
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, block, chunk_region.first, gpu);
status = cpu_chunk_add_sysmem_gpu_mapping(chunk, gpu);
if (status != NV_OK)
goto error;
}
@ -1756,7 +1731,7 @@ static NV_STATUS block_populate_overlapping_cpu_chunks(uvm_va_block_t *block,
// before mapping.
chunk_ptr = split_chunks[i];
split_chunks[i] = NULL;
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr, running_page_index);
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr);
if (status != NV_OK)
goto done;
}
@ -1793,7 +1768,7 @@ static NV_STATUS block_populate_overlapping_cpu_chunks(uvm_va_block_t *block,
// before mapping.
chunk_ptr = small_chunks[j];
small_chunks[j] = NULL;
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr, running_page_index);
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk_ptr);
if (status != NV_OK)
goto done;
}
@ -1860,7 +1835,7 @@ static NV_STATUS block_add_cpu_chunk(uvm_va_block_t *block,
if (status != NV_OK)
goto out;
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk, page_index);
status = uvm_va_block_map_cpu_chunk_on_gpus(block, chunk);
if (status != NV_OK) {
uvm_cpu_chunk_remove_from_block(block, uvm_cpu_chunk_get_numa_node(chunk), page_index);
goto out;
@ -3155,8 +3130,8 @@ static NV_STATUS block_populate_pages(uvm_va_block_t *block,
uvm_page_mask_or(pages_staged, pages_staged, scratch_page_mask);
}
// 2. Remove any pages in pages_staged that are on any resident processor
// dest_id can copy from.
// 2. Remove any pages in pages_staged that are on any resident
// processor dest_id can copy from.
if (uvm_processor_mask_and(tmp_processor_mask, can_copy_from_processors, &block->resident)) {
for_each_id_in_mask(id, tmp_processor_mask) {
id_resident_mask = uvm_va_block_resident_mask_get(block, id, NUMA_NO_NODE);
@ -3210,14 +3185,21 @@ static uvm_gpu_chunk_t *block_phys_page_chunk(uvm_va_block_t *block, block_phys_
return chunk;
}
typedef enum {
REMOTE_EGM_ALLOWED = 0,
REMOTE_EGM_NOT_ALLOWED = 1,
} remote_egm_mode_t;
// Get the physical GPU address of a block's page from the POV of the specified
// GPU. This is the address that should be used for making PTEs for the
// specified GPU.
static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
block_phys_page_t block_page,
uvm_gpu_t *gpu)
uvm_gpu_t *gpu,
remote_egm_mode_t egm_mode)
{
uvm_va_block_gpu_state_t *accessing_gpu_state = uvm_va_block_gpu_state_get(block, gpu->id);
bool allow_remote_egm = egm_mode == REMOTE_EGM_ALLOWED;
size_t chunk_offset;
uvm_gpu_chunk_t *chunk;
@ -3231,7 +3213,7 @@ static uvm_gpu_phys_address_t block_phys_page_address(uvm_va_block_t *block,
uvm_va_space_t *va_space = uvm_va_block_get_va_space(block);
uvm_parent_gpu_t *routing_gpu = uvm_va_space_get_egm_routing_gpu(va_space, gpu, block_page.nid);
if (routing_gpu) {
if (routing_gpu && (allow_remote_egm || routing_gpu == gpu->parent)) {
struct page *page = uvm_cpu_chunk_get_cpu_page(block, chunk, block_page.page_index);
phys_addr = page_to_phys(page);
@ -3296,9 +3278,14 @@ static uvm_gpu_address_t block_phys_page_copy_address(uvm_va_block_t *block,
// CPU and local GPU accesses can rely on block_phys_page_address, but the
// resulting physical address may need to be converted into virtual.
if (UVM_ID_IS_CPU(block_page.processor) || uvm_id_equal(block_page.processor, gpu->id)) {
uvm_gpu_phys_address_t phys_addr = block_phys_page_address(block, block_page, gpu);
// Do not use remote EGM addresses internally until
// NVLINK STO handling is updated to handle EGM.
// TODO: Bug: 5068688 [UVM] Detect STO and prevent data leaks
// when accessing EGM memory
// TODO: Bug: 5007527 [UVM] Extend STO recovery to EGM enabled
// systems
uvm_gpu_phys_address_t phys_addr = block_phys_page_address(block, block_page, gpu, REMOTE_EGM_NOT_ALLOWED);
// EGM mappings use physical addresses with a PEER aperture.
if (uvm_aperture_is_peer(phys_addr.aperture)) {
UVM_ASSERT(block_check_egm_peer(uvm_va_block_get_va_space(block), gpu, block_page.nid, phys_addr));
return uvm_gpu_address_from_phys(phys_addr);
@ -3334,7 +3321,7 @@ uvm_gpu_phys_address_t uvm_va_block_res_phys_page_address(uvm_va_block_t *va_blo
UVM_ASSERT(nid != NUMA_NO_NODE);
}
return block_phys_page_address(va_block, block_phys_page(residency, nid, page_index), gpu);
return block_phys_page_address(va_block, block_phys_page(residency, nid, page_index), gpu, REMOTE_EGM_ALLOWED);
}
uvm_gpu_phys_address_t uvm_va_block_gpu_phys_page_address(uvm_va_block_t *va_block,
@ -3949,9 +3936,9 @@ static NV_STATUS block_copy_pages(uvm_va_block_t *va_block,
UVM_ASSERT(uvm_cpu_chunk_get_size(src_chunk) >= uvm_va_block_region_size(region));
UVM_ASSERT(uvm_va_block_region_size(region) <= uvm_cpu_chunk_get_size(dst_chunk));
// CPU-to-CPU copies using memcpy() don't have any inherent ordering with
// copies using GPU CEs. So, we have to make sure that all previously
// submitted work is complete.
// CPU-to-CPU copies using memcpy() don't have any inherent ordering
// with copies using GPU CEs. So, we have to make sure that all
// previously submitted work is complete.
status = uvm_tracker_wait(&va_block->tracker);
if (status != NV_OK)
return status;
@ -4204,9 +4191,9 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
uvm_processor_mask_set(&block_context->make_resident.all_involved_processors, copying_gpu->id);
// This function is called just once per VA block and needs to
// receive the "main" cause for the migration (it mainly checks if
// we are in the eviction path). Therefore, we pass cause instead
// of contig_cause
// receive the "main" cause for the migration (it mainly checks
// if we are in the eviction path). Therefore, we pass cause
// instead of contig_cause.
uvm_tools_record_block_migration_begin(block,
&push,
dst_id,
@ -4233,8 +4220,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
contig_cause = page_cause;
if (block_copy_should_use_push(block, &copy_state)) {
// When CC is enabled, transfers between GPU and CPU don't rely on
// any GPU mapping of CPU chunks, physical or virtual.
// When CC is enabled, transfers between GPU and CPU don't rely
// on any GPU mapping of CPU chunks, physical or virtual.
if (UVM_ID_IS_CPU(src_id) && g_uvm_global.conf_computing_enabled)
can_cache_src_phys_addr = false;
@ -4244,8 +4231,8 @@ static NV_STATUS block_copy_resident_pages_between(uvm_va_block_t *block,
// Computing the physical address is a non-trivial operation and
// seems to be a performance limiter on systems with 2 or more
// NVLINK links. Therefore, for physically-contiguous block
// storage, we cache the start address and compute the page address
// using the page index.
// storage, we cache the start address and compute the page
// address using the page index.
if (can_cache_src_phys_addr) {
copy_state.src.gpu_address = block_phys_page_copy_address(block,
block_phys_page(src_id,
@ -5187,12 +5174,13 @@ NV_STATUS uvm_va_block_make_resident_read_duplicate(uvm_va_block_t *va_block,
if (!scratch_residency_mask)
return NV_ERR_NO_MEMORY;
// We cannot read-duplicate on different CPU NUMA nodes since there is only one
// CPU page table. So, the page has to migrate from the source NUMA node to the
// destination one.
// We cannot read-duplicate on different CPU NUMA nodes since there is only
// one CPU page table. So, the page has to migrate from the source NUMA node
// to the destination one.
// In order to correctly map pages on the destination NUMA node, all pages
// resident on other NUMA nodes have to be unmapped. Otherwise, their WRITE
// permission will be revoked but they'll remain mapped on the source NUMA node.
// permission will be revoked but they'll remain mapped on the source NUMA
// node.
if (uvm_processor_mask_test(&va_block->resident, UVM_ID_CPU) &&
UVM_ID_IS_CPU(va_block_context->make_resident.dest_id)) {
uvm_page_mask_t *dest_nid_resident = uvm_va_block_resident_mask_get(va_block,
@ -5623,7 +5611,8 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
}
// atomic mappings from GPUs with disabled system-wide atomics are treated
// as write mappings. Therefore, we remove them from the atomic mappings mask
// as write mappings. Therefore, we remove them from the atomic mappings
// mask
uvm_processor_mask_and(atomic_mappings, atomic_mappings, &va_space->system_wide_atomics_enabled_processors);
if (!uvm_processor_mask_empty(read_mappings)) {
@ -5696,7 +5685,8 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
*residency_has_native_atomics->bitmap,
*va_space->system_wide_atomics_enabled_processors.bitmap);
// Only one processor outside of the native group can have atomics enabled
// Only one processor outside of the native group can have atomics
// enabled
UVM_ASSERT_MSG(uvm_processor_mask_get_count(atomic_mappings) == 1,
"Too many atomics mappings to %s from processors with non-native atomics\n"
"Resident: 0x%lx - Mappings R: 0x%lx W: 0x%lx A: 0x%lx -"
@ -5714,9 +5704,9 @@ static bool block_check_mappings_page(uvm_va_block_t *block,
non_native_atomics = &mapping_masks->non_native_atomics;
// One or more processors within the native group have atomics enabled.
// All processors outside of that group may have write but not atomic
// permissions.
// One or more processors within the native group have atomics
// enabled. All processors outside of that group may have write but
// not atomic permissions.
uvm_processor_mask_andnot(non_native_atomics, atomic_mappings, residency_has_native_atomics);
UVM_ASSERT_MSG(uvm_processor_mask_empty(non_native_atomics),
@ -6143,7 +6133,10 @@ static void block_gpu_pte_write_4k(uvm_va_block_t *block,
if (page_index >= contig_region.outer || nid != contig_nid) {
contig_region = block_phys_contig_region(block, page_index, resident_id, nid);
contig_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, contig_region.first), gpu);
contig_addr = block_phys_page_address(block,
block_phys_page(resident_id, nid, contig_region.first),
gpu,
REMOTE_EGM_ALLOWED);
page_addr = contig_addr;
contig_nid = nid;
}
@ -6368,7 +6361,10 @@ static void block_gpu_pte_write_big(uvm_va_block_t *block,
if (big_region.first >= contig_region.outer || nid != contig_nid) {
contig_region = block_phys_contig_region(block, big_region.first, resident_id, nid);
contig_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, contig_region.first), gpu);
contig_addr = block_phys_page_address(block,
block_phys_page(resident_id, nid, contig_region.first),
gpu,
REMOTE_EGM_ALLOWED);
page_addr = contig_addr;
contig_nid = nid;
}
@ -6520,7 +6516,7 @@ static void block_gpu_pte_write_2m(uvm_va_block_t *block,
block_mark_cpu_page_dirty(block, 0, nid);
}
page_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, 0), gpu);
page_addr = block_phys_page_address(block, block_phys_page(resident_id, nid, 0), gpu, REMOTE_EGM_ALLOWED);
pte_val = tree->hal->make_pte(page_addr.aperture, page_addr.address, new_prot, pte_flags);
uvm_pte_batch_write_pte(pte_batch, pte_addr, pte_val, pte_size);
@ -10037,16 +10033,8 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
uvm_chunk_size_t chunk_size = uvm_cpu_chunk_get_size(chunk);
uvm_chunk_size_t new_size;
uvm_gpu_t *gpu;
NvU64 gpu_mapping_addr;
uvm_processor_mask_t *gpu_split_mask;
uvm_gpu_id_t id;
NV_STATUS status;
gpu_split_mask = uvm_processor_mask_cache_alloc();
if (!gpu_split_mask)
return NV_ERR_NO_MEMORY;
if (chunk_size == UVM_CHUNK_SIZE_2M)
new_size = UVM_CHUNK_SIZE_64K;
else
@ -10054,45 +10042,11 @@ static NV_STATUS block_split_cpu_chunk_one(uvm_va_block_t *block, uvm_page_index
UVM_ASSERT(IS_ALIGNED(chunk_size, new_size));
uvm_processor_mask_zero(gpu_split_mask);
for_each_gpu_id(id) {
if (!uvm_va_block_gpu_state_get(block, id))
continue;
gpu = uvm_gpu_get(id);
// If the parent chunk has not been mapped, there is nothing to split.
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
if (gpu_mapping_addr == 0)
continue;
status = uvm_pmm_sysmem_mappings_split_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
new_size);
if (status != NV_OK)
goto merge;
uvm_processor_mask_set(gpu_split_mask, id);
}
if (new_size == UVM_CHUNK_SIZE_64K)
status = block_split_cpu_chunk_to_64k(block, nid);
else
status = block_split_cpu_chunk_to_4k(block, page_index, nid);
if (status != NV_OK) {
merge:
for_each_gpu_id_in_mask(id, gpu_split_mask) {
gpu = uvm_gpu_get(id);
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
chunk_size);
}
}
uvm_processor_mask_cache_free(gpu_split_mask);
return status;
}
@ -10109,8 +10063,8 @@ static NV_STATUS block_prealloc_cpu_chunk_storage(uvm_va_block_t *existing, uvm_
UVM_ASSERT(uvm_cpu_storage_get_type(node_state) == UVM_CPU_CHUNK_STORAGE_MIXED);
existing_mixed = uvm_cpu_storage_get_ptr(node_state);
// Pre-allocate chunk storage for the new block. By definition, the new block
// will contain either 64K and/or 4K chunks.
// Pre-allocate chunk storage for the new block. By definition, the new
// block will contain either 64K and/or 4K chunks.
//
// We do this here so there are no failures in block_split_cpu().
new_mixed = uvm_kvmalloc_zero(sizeof(*new_mixed));
@ -10182,8 +10136,8 @@ static NV_STATUS block_presplit_cpu_chunks(uvm_va_block_t *existing, uvm_va_bloc
for_each_possible_uvm_node(nid) {
splitting_chunk = uvm_cpu_chunk_get_chunk_for_page(existing, nid, page_index);
// If the page covering the split point has not been populated, there is no
// need to split.
// If the page covering the split point has not been populated, there is
// no need to split.
if (!splitting_chunk)
continue;
@ -10247,7 +10201,6 @@ static void block_merge_cpu_chunks_to_2m(uvm_va_block_t *block, uvm_page_index_t
static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t page_index, int nid)
{
uvm_cpu_chunk_t *chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
uvm_gpu_id_t id;
if (!chunk)
return;
@ -10259,25 +10212,6 @@ static void block_merge_cpu_chunks_one(uvm_va_block_t *block, uvm_page_index_t p
UVM_ASSERT(uvm_cpu_chunk_get_size(chunk) == UVM_CHUNK_SIZE_64K);
block_merge_cpu_chunks_to_2m(block, page_index, nid);
}
chunk = uvm_cpu_chunk_get_chunk_for_page(block, nid, page_index);
for_each_gpu_id(id) {
NvU64 gpu_mapping_addr;
uvm_gpu_t *gpu;
if (!uvm_va_block_gpu_state_get(block, id))
continue;
gpu = uvm_gpu_get(id);
gpu_mapping_addr = uvm_cpu_chunk_get_gpu_phys_addr(chunk, gpu);
if (gpu_mapping_addr == 0)
continue;
uvm_pmm_sysmem_mappings_merge_gpu_mappings(&gpu->pmm_reverse_sysmem_mappings,
gpu_mapping_addr,
uvm_cpu_chunk_get_size(chunk));
}
}
static void block_merge_cpu_chunks(uvm_va_block_t *existing, uvm_va_block_t *new)
@ -10695,9 +10629,6 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
size_t new_pages = uvm_va_block_num_cpu_pages(new);
size_t existing_pages, existing_pages_4k, existing_pages_big, new_pages_big;
uvm_pte_bits_gpu_t pte_bit;
uvm_cpu_chunk_t *cpu_chunk;
uvm_page_index_t page_index;
int nid;
if (!existing_gpu_state)
return;
@ -10711,14 +10642,6 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
UVM_ASSERT(PAGE_ALIGNED(existing->start));
existing_pages = (new->start - existing->start) / PAGE_SIZE;
for_each_possible_uvm_node(nid) {
for_each_cpu_chunk_in_block(cpu_chunk, page_index, new, nid) {
uvm_pmm_sysmem_mappings_reparent_gpu_mapping(&gpu->pmm_reverse_sysmem_mappings,
uvm_cpu_chunk_get_gpu_phys_addr(cpu_chunk, gpu),
new);
}
}
block_copy_split_gpu_chunks(existing, new, gpu);
block_split_page_mask(&existing_gpu_state->resident,
@ -10727,8 +10650,10 @@ static void block_split_gpu(uvm_va_block_t *existing, uvm_va_block_t *new, uvm_g
new_pages);
for (pte_bit = 0; pte_bit < UVM_PTE_BITS_GPU_MAX; pte_bit++) {
block_split_page_mask(&existing_gpu_state->pte_bits[pte_bit], existing_pages,
&new_gpu_state->pte_bits[pte_bit], new_pages);
block_split_page_mask(&existing_gpu_state->pte_bits[pte_bit],
existing_pages,
&new_gpu_state->pte_bits[pte_bit],
new_pages);
}
// Adjust page table ranges.
@ -11113,7 +11038,8 @@ static NV_STATUS do_block_add_mappings_after_migration(uvm_va_block_t *va_block,
bool map_processor_has_enabled_system_wide_atomics =
uvm_processor_mask_test(&va_space->system_wide_atomics_enabled_processors, map_processor_id);
// Write mappings from processors with disabled system-wide atomics are treated like atomics
// Write mappings from processors with disabled system-wide atomics are
// treated like atomics
if (new_map_prot == UVM_PROT_READ_WRITE && !map_processor_has_enabled_system_wide_atomics)
final_map_prot = UVM_PROT_READ_WRITE_ATOMIC;
else
@ -11346,14 +11272,17 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
block_page_authorized_processors(va_block, page_index, UVM_PROT_READ_WRITE_ATOMIC, atomic_mappings);
// Exclude processors with system-wide atomics disabled from atomic_mappings
// Exclude processors with system-wide atomics disabled from
// atomic_mappings
uvm_processor_mask_and(atomic_mappings, atomic_mappings, &va_space->system_wide_atomics_enabled_processors);
// Exclude the processor for which the mapping protections are being computed
// Exclude the processor for which the mapping protections are being
// computed
uvm_processor_mask_clear(atomic_mappings, processor_id);
// If there is any processor with atomic mapping, check if it has native atomics to the processor
// with the resident copy. If it does not, we can only map READ ONLY
// If there is any processor with atomic mapping, check if it has native
// atomics to the processor with the resident copy. If it does not, we
// can only map READ ONLY
atomic_id = uvm_processor_mask_find_first_id(atomic_mappings);
if (UVM_ID_IS_VALID(atomic_id) &&
!uvm_processor_mask_test(&va_space->has_native_atomics[uvm_id_value(residency)], atomic_id)) {
@ -11364,7 +11293,8 @@ uvm_prot_t uvm_va_block_page_compute_highest_permission(uvm_va_block_t *va_block
block_page_authorized_processors(va_block, page_index, UVM_PROT_READ_WRITE, write_mappings);
// Exclude the processor for which the mapping protections are being computed
// Exclude the processor for which the mapping protections are being
// computed
uvm_processor_mask_clear(write_mappings, processor_id);
// At this point, any processor with atomic mappings either has native
@ -11639,31 +11569,32 @@ static uvm_processor_id_t block_select_processor_residency(uvm_va_block_t *va_bl
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(preferred_location)], processor_id))
return preferred_location;
// Check if we should map the closest resident processor remotely on remote CPU fault
// Check if we should map the closest resident processor remotely on remote
// CPU fault
//
// When faulting on CPU, there's a linux process on behalf of it, which is associated
// with a unique VM pointed by current->mm. A block of memory residing on GPU is also
// associated with VM, pointed by va_block_context->mm. If they match, it's a regular
// (local) fault, and we may want to migrate a page from GPU to CPU.
// If it's a 'remote' fault, i.e. linux process differs from one associated with block
// VM, we might preserve residence.
// When faulting on CPU, there's a linux process on behalf of it, which is
// associated with a unique VM pointed by current->mm. A block of memory
// residing on GPU is also associated with VM, pointed by
// va_block_context->mm. If they match, it's a regular (local) fault, and we
// may want to migrate a page from GPU to CPU. If it's a 'remote' fault,
// i.e., linux process differs from one associated with block VM, we might
// preserve residence.
//
// Establishing a remote fault without access counters means the memory could stay in
// the wrong spot for a long time, which is why we prefer to avoid creating remote
// mappings. However when NIC accesses a memory residing on GPU, it's worth to keep it
// in place for NIC accesses.
// Establishing a remote fault without access counters means the memory
// could stay in the wrong spot for a long time, which is why we prefer to
// avoid creating remote mappings. However when NIC accesses a memory
// residing on GPU, it's worth to keep it in place for NIC accesses.
//
// The logic that's used to detect remote faulting also keeps memory in place for
// ptrace accesses. We would prefer to control those policies separately, but the
// NIC case takes priority.
// If the accessing processor is CPU, we're either handling a fault
// from other than owning process, or we're handling an MOMC
// notification. Only prevent migration for the former.
// The logic that's used to detect remote faulting also keeps memory in
// place for ptrace accesses. We would prefer to control those policies
// separately, but the NIC case takes priority. If the accessing processor
// is the CPU, we're handling a fault from other than the owning process,
// we want to prevent a migration.
if (UVM_ID_IS_CPU(processor_id) &&
operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS &&
uvm_processor_mask_test(&va_space->accessible_from[uvm_id_value(closest_resident_processor)], processor_id) &&
va_block_context->mm != current->mm) {
UVM_ASSERT(va_block_context->mm != NULL);
UVM_ASSERT(operation != UVM_SERVICE_OPERATION_ACCESS_COUNTERS);
return closest_resident_processor;
}
@ -11693,7 +11624,8 @@ static int block_select_node_residency(uvm_va_block_t *va_block,
// For HMM allocations UVM doesn't always control allocation of the
// destination page as the kernel may have already allocated one. Therefore
// we can't respect the preferred node ID for HMM pages.
// TODO: Bug 4453874: [UVM-HMM] Respect the preferred CPU NUMA Node ID when making a HMM page resident
// TODO: Bug 4453874: [UVM-HMM] Respect the preferred CPU NUMA Node ID when
// making a HMM page resident
if (uvm_va_block_is_hmm(va_block))
return NUMA_NO_NODE;
@ -11867,9 +11799,12 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
break;
case UVM_SERVICE_OPERATION_ACCESS_COUNTERS:
cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
service_context->block_context->make_resident.access_counters_buffer_index =
service_context->access_counters_buffer_index;
break;
default:
UVM_ASSERT_MSG(false, "Invalid operation value %d\n", service_context->operation);
// Set cause to silence compiler warning that it may be unused.
cause = UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER;
break;
@ -11955,16 +11890,21 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
if (status != NV_OK)
return status;
// TODO: Bug 5069427: [uvm] Fix the migration STO error checks.
// Same as above for nvlink errors. Check the source GPU as well
// as all its peers.
uvm_spin_lock(&gpu->peer_info.peer_gpu_lock);
for_each_gpu_in_mask(peer_gpu, &gpu->peer_info.peer_gpu_mask) {
status = uvm_gpu_check_nvlink_error_no_rm(peer_gpu);
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
uvm_processor_mask_set(&service_context->gpus_to_check_for_nvlink_errors, peer_gpu->id);
if (status != NV_OK)
if (status != NV_OK) {
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
return status;
}
}
uvm_spin_unlock(&gpu->peer_info.peer_gpu_lock);
status = uvm_gpu_check_nvlink_error_no_rm(gpu);
if (status == NV_WARN_MORE_PROCESSING_REQUIRED)
@ -13542,7 +13482,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
}
else {
params->resident_physical_address[count] =
block_phys_page_address(block, block_page, uvm_gpu_get(id)).address;
block_phys_page_address(block, block_page, uvm_gpu_get(id), REMOTE_EGM_ALLOWED).address;
}
++count;
@ -13572,7 +13512,7 @@ NV_STATUS uvm_test_va_residency_info(UVM_TEST_VA_RESIDENCY_INFO_PARAMS *params,
block_page = block_phys_page(processor_to_map, nid, page_index);
if (!UVM_ID_IS_CPU(id)) {
uvm_gpu_t *gpu = uvm_gpu_get(id);
uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, gpu);
uvm_gpu_phys_address_t gpu_phys_addr = block_phys_page_address(block, block_page, gpu, REMOTE_EGM_ALLOWED);
NvU64 phys_addr = gpu_phys_addr.address;
if (UVM_ID_IS_CPU(block_page.processor)) {

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -205,12 +205,12 @@ typedef struct
//
// The indices represent the corresponding big PTEs in the block's interior.
// For example, a block with alignment and size of one 4k page on either
// side of a big page will only use bit 0. Use uvm_va_block_big_page_index to look
// the big_ptes index of a page.
// side of a big page will only use bit 0. Use uvm_va_block_big_page_index
// to look up the big_ptes index of a page.
//
// The block might not be able to fit any big PTEs, in which case this
// bitmap is always zero. Use uvm_va_block_gpu_num_big_pages to find the number of
// valid bits in this mask.
// bitmap is always zero. Use uvm_va_block_gpu_num_big_pages to find the
// number of valid bits in this mask.
DECLARE_BITMAP(big_ptes, MAX_BIG_PAGES_PER_UVM_VA_BLOCK);
// See the comments for uvm_va_block_mmap_t::cpu.pte_bits.
@ -565,8 +565,8 @@ struct uvm_va_block_wrapper_struct
// testing only.
bool inject_eviction_error;
// Force the next successful chunk allocation to then fail. Used for testing
// only to simulate driver metadata allocation failure.
// Force the next successful chunk allocation to then fail. Used for
// testing only to simulate driver metadata allocation failure.
bool inject_populate_error;
// Force the next split on this block to fail.
@ -1250,8 +1250,8 @@ NV_STATUS uvm_va_block_cpu_fault(uvm_va_block_t *va_block,
// context.
//
// service_context must not be NULL and policy for service_context->region must
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context->prefetch_hint is set by this function.
//
@ -1282,8 +1282,8 @@ NV_STATUS uvm_va_block_service_locked(uvm_processor_id_t processor_id,
// pages to new_residency.
//
// service_context must not be NULL and policy for service_context->region must
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context->prefetch_hint should be set before calling this function.
//
@ -1311,8 +1311,8 @@ NV_STATUS uvm_va_block_service_copy(uvm_processor_id_t processor_id,
// to the new residency (which may be remote).
//
// service_context must not be NULL and policy for service_context->region must
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// match. See the comments for uvm_va_block_check_policy_is_valid(). If
// va_block is a HMM block, va_block_context->hmm.vma must be valid. See the
// comments for uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
// service_context must be initialized by calling uvm_va_block_service_copy()
// before calling this function.
@ -1499,8 +1499,8 @@ uvm_gpu_chunk_t *uvm_va_block_lookup_gpu_chunk(uvm_va_block_t *va_block, uvm_gpu
//
// service_context and service_context->block_context must not be NULL and
// policy for the region must match. See the comments for
// uvm_va_block_check_policy_is_valid(). If va_block is a HMM block,
// service->block_context->hmm.vma must be valid. See the comments for
// uvm_va_block_check_policy_is_valid(). If va_block is a HMM block,
// service->block_context->hmm.vma must be valid. See the comments for
// uvm_hmm_check_context_vma_is_valid() in uvm_hmm.h.
//
// LOCKING: The caller must hold the va_block lock. If
@ -1550,7 +1550,8 @@ void uvm_va_block_retry_init(uvm_va_block_retry_t *uvm_va_block_retry);
// Frees all the remaining free chunks and unpins all the used chunks.
void uvm_va_block_retry_deinit(uvm_va_block_retry_t *uvm_va_block_retry, uvm_va_block_t *va_block);
// Evict all chunks from the block that are subchunks of the passed in root_chunk.
// Evict all chunks from the block that are subchunks of the passed in
// root_chunk.
//
// Add all the work tracking the eviction to the tracker.
//
@ -2139,16 +2140,12 @@ struct page *uvm_cpu_chunk_get_cpu_page(uvm_va_block_t *va_block, uvm_cpu_chunk_
struct page *uvm_va_block_get_cpu_page(uvm_va_block_t *va_block, uvm_page_index_t page_index);
// Physically map a CPU chunk so it is DMA'able from all registered GPUs.
// nid cannot be NUMA_NO_NODE.
// Locking: The va_block lock must be held.
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk,
uvm_page_index_t page_index);
NV_STATUS uvm_va_block_map_cpu_chunk_on_gpus(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);
// Physically unmap a CPU chunk from all registered GPUs.
// Locking: The va_block lock must be held.
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block,
uvm_cpu_chunk_t *chunk);
void uvm_va_block_unmap_cpu_chunk_on_gpus(uvm_va_block_t *va_block, uvm_cpu_chunk_t *chunk);
// Remove any CPU chunks in the given region.
// Locking: The va_block lock must be held.
@ -2163,8 +2160,7 @@ NvU64 uvm_va_block_get_physical_size(uvm_va_block_t *block,
uvm_page_index_t page_index);
// Get CPU page size or 0 if it is not mapped
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block,
uvm_page_index_t page_index);
NvU64 uvm_va_block_page_size_cpu(uvm_va_block_t *va_block, uvm_page_index_t page_index);
// Get GPU page size or 0 if it is not mapped on the given GPU
NvU64 uvm_va_block_page_size_gpu(uvm_va_block_t *va_block, uvm_gpu_id_t gpu_id, uvm_page_index_t page_index);
@ -2262,8 +2258,8 @@ NV_STATUS uvm_va_block_populate_page_cpu(uvm_va_block_t *va_block,
// otherwise it will be initialized and deinitialized by the macro.
//
// The macro also locks and unlocks the block's lock internally as it's expected
// that the block's lock has been unlocked and relocked whenever the function call
// returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the
// that the block's lock has been unlocked and relocked whenever the function
// call returns NV_ERR_MORE_PROCESSING_REQUIRED and this makes it clear that the
// block's state is not locked across these calls.
#define UVM_VA_BLOCK_LOCK_RETRY(va_block, block_retry, call) ({ \
NV_STATUS __status; \

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2023 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -235,6 +235,10 @@ typedef struct
// Event that triggered the call
uvm_make_resident_cause_t cause;
// Access counters notification buffer index. Only valid when cause is
// UVM_MAKE_RESIDENT_CAUSE_ACCESS_COUNTER.
NvU32 access_counters_buffer_index;
} make_resident;
// State used by the mapping APIs (unmap, map, revoke). This could be used

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -558,7 +558,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
nv_kthread_q_flush(&gpu->parent->isr.kill_channel_q);
if (gpu->parent->access_counters_supported)
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
uvm_gpu_access_counters_disable(gpu, va_space);
}
@ -576,7 +576,7 @@ void uvm_va_space_destroy(uvm_va_space_t *va_space)
uvm_deferred_free_object_list(&deferred_free_list);
// Normally we'd expect this to happen as part of uvm_mm_release()
// Normally we'd expect this to happen as part of uvm_release_mm()
// but if userspace never initialized uvm_mm_fd that won't happen.
// We don't have to take the va_space_mm spinlock and update state
// here because we know no other thread can be in or subsequently
@ -760,7 +760,7 @@ NV_STATUS uvm_va_space_register_gpu(uvm_va_space_t *va_space,
bool gpu_can_access_sysmem = true;
uvm_processor_mask_t *peers_to_release = NULL;
status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &gpu);
status = uvm_gpu_retain_by_uuid(gpu_uuid, user_rm_device, &va_space->test.parent_gpu_error, &gpu);
if (status != NV_OK)
return status;
@ -936,7 +936,7 @@ done:
// registered GPU: the enablement step would have failed before even
// discovering that the GPU is already registered.
if (uvm_parent_gpu_access_counters_required(gpu->parent))
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
uvm_gpu_access_counters_disable(gpu, va_space);
uvm_gpu_release(gpu);
}
@ -1011,7 +1011,7 @@ NV_STATUS uvm_va_space_unregister_gpu(uvm_va_space_t *va_space, const NvProcesso
// acquires the VA space lock after the unregistration does. Both outcomes
// result on valid states.
if (disable_access_counters)
uvm_parent_gpu_access_counters_disable(gpu->parent, va_space);
uvm_gpu_access_counters_disable(gpu, va_space);
// mmap_lock is needed to establish CPU mappings to any pages evicted from
// the GPU if accessed by CPU is set for them.
@ -2207,6 +2207,17 @@ NV_STATUS uvm_test_va_space_inject_error(UVM_TEST_VA_SPACE_INJECT_ERROR_PARAMS *
atomic_set(&va_space->test.migrate_vma_allocation_fail_nth, params->migrate_vma_allocation_fail_nth);
atomic_set(&va_space->test.va_block_allocation_fail_nth, params->va_block_allocation_fail_nth);
va_space->test.parent_gpu_error.access_counters_alloc_buffer = params->gpu_access_counters_alloc_buffer;
va_space->test.parent_gpu_error.access_counters_alloc_block_context =
params->gpu_access_counters_alloc_block_context;
va_space->test.parent_gpu_error.access_counters_batch_context_notifications =
params->access_counters_batch_context_notifications;
va_space->test.parent_gpu_error.access_counters_batch_context_notification_cache =
params->access_counters_batch_context_notification_cache;
va_space->test.parent_gpu_error.isr_access_counters_alloc = params->gpu_isr_access_counters_alloc;
va_space->test.parent_gpu_error.isr_access_counters_alloc_stats_cpu =
params->gpu_isr_access_counters_alloc_stats_cpu;
return NV_OK;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2015-2024 NVIDIA Corporation
Copyright (c) 2015-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -424,6 +424,8 @@ struct uvm_va_space_struct
bool force_cpu_to_cpu_copy_with_ce;
bool allow_allocation_from_movable;
uvm_test_parent_gpu_inject_error_t parent_gpu_error;
} test;
// Queue item for deferred f_ops->release() handling

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -37,10 +37,10 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->utlb_per_gpc_count = uvm_volta_get_utlbs_per_gpc(parent_gpu);
parent_gpu->fault_buffer_info.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
parent_gpu->fault_buffer.replayable.utlb_count = parent_gpu->rm_info.gpcCount * parent_gpu->utlb_per_gpc_count;
{
uvm_fault_buffer_entry_t *dummy;
UVM_ASSERT(parent_gpu->fault_buffer_info.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
UVM_ASSERT(parent_gpu->fault_buffer.replayable.utlb_count <= (1 << (sizeof(dummy->fault_source.utlb_id) * 8)));
}
// A single top level PDE on Volta covers 128 TB and that's the minimum
@ -82,9 +82,9 @@ void uvm_hal_volta_arch_init_properties(uvm_parent_gpu_t *parent_gpu)
parent_gpu->non_replayable_faults_supported = true;
parent_gpu->access_counters_supported = true;
parent_gpu->access_counters_can_use_physical_addresses = true;
// Although access counters are supported in HW, it only notifies memory
// accesses using physical addresses, which is not supported in SW.
parent_gpu->access_counters_supported = false;
parent_gpu->fault_cancel_va_supported = true;

View File

@ -1,228 +0,0 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
DEALINGS IN THE SOFTWARE.
*******************************************************************************/
#include "uvm_linux.h"
#include "uvm_global.h"
#include "uvm_gpu.h"
#include "uvm_hal.h"
#include "clc365.h"
#include "uvm_volta_fault_buffer.h"
typedef struct {
NvU8 bufferEntry[NVC365_NOTIFY_BUF_SIZE];
} access_counter_buffer_entry_c365_t;
void uvm_hal_volta_enable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
{
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnSet;
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
void uvm_hal_volta_disable_access_counter_notifications(uvm_parent_gpu_t *parent_gpu)
{
volatile NvU32 *reg;
NvU32 mask;
reg = parent_gpu->access_counter_buffer_info.rm_info.pHubIntrEnClear;
mask = parent_gpu->access_counter_buffer_info.rm_info.accessCounterMask;
UVM_GPU_WRITE_ONCE(*reg, mask);
}
void uvm_hal_volta_clear_access_counter_notifications(uvm_parent_gpu_t *parent_gpu, NvU32 get)
{
// No-op, this function is only used by pulse-based interrupt GPUs.
}
NvU32 uvm_hal_volta_access_counter_buffer_entry_size(uvm_parent_gpu_t *parent_gpu)
{
return NVC365_NOTIFY_BUF_SIZE;
}
static uvm_aperture_t get_access_counter_inst_aperture(NvU32 *access_counter_entry)
{
NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_APERTURE);
switch (hw_aperture_value) {
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
return UVM_APERTURE_VID;
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
return UVM_APERTURE_SYS;
}
UVM_ASSERT_MSG(false, "Invalid inst aperture value: %d\n", hw_aperture_value);
return UVM_APERTURE_MAX;
}
static uvm_aperture_t get_access_counter_aperture(NvU32 *access_counter_entry)
{
NvU32 hw_aperture_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, APERTURE);
NvU32 peer_id = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, PEER_ID);
switch (hw_aperture_value) {
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_VID_MEM:
return UVM_APERTURE_VID;
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_PEER_MEM:
return UVM_APERTURE_PEER(peer_id);
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_COHERENT:
case NVC365_NOTIFY_BUF_ENTRY_APERTURE_SYS_MEM_NONCOHERENT:
return UVM_APERTURE_SYS;
}
UVM_ASSERT_MSG(false, "Invalid aperture value: %d\n", hw_aperture_value);
return UVM_APERTURE_MAX;
}
static uvm_gpu_address_t get_address(uvm_parent_gpu_t *parent_gpu, NvU32 *access_counter_entry)
{
NvU64 address;
bool is_virtual;
NvU64 addr_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_HI);
NvU64 addr_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_LO);
NvU32 addr_type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, ADDR_TYPE);
address = addr_lo + (addr_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, ADDR_LO));
is_virtual = (addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GVA);
if (is_virtual) {
address = uvm_parent_gpu_canonical_address(parent_gpu, address);
return uvm_gpu_address_virtual(address);
}
else {
uvm_aperture_t aperture = get_access_counter_aperture(access_counter_entry);
UVM_ASSERT(parent_gpu->access_counters_can_use_physical_addresses);
UVM_ASSERT_MSG(addr_type_value == NVC365_NOTIFY_BUF_ENTRY_ADDR_TYPE_GPA,
"Invalid address type%u\n", addr_type_value);
return uvm_gpu_address_physical(aperture, address);
}
}
static uvm_access_counter_type_t get_access_counter_type(NvU32 *access_counter_entry)
{
NvU32 type_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, TYPE);
if (type_value == NVC365_NOTIFY_BUF_ENTRY_TYPE_CPU)
return UVM_ACCESS_COUNTER_TYPE_MOMC;
else
return UVM_ACCESS_COUNTER_TYPE_MIMC;
}
static NvU32 *get_access_counter_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
access_counter_buffer_entry_c365_t *buffer_start;
NvU32 *access_counter_entry;
UVM_ASSERT(index < parent_gpu->access_counter_buffer_info.max_notifications);
buffer_start = (access_counter_buffer_entry_c365_t *)parent_gpu->access_counter_buffer_info.rm_info.bufferAddress;
access_counter_entry = (NvU32 *)&buffer_start[index];
return access_counter_entry;
}
bool uvm_hal_volta_access_counter_buffer_entry_is_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
NvU32 *access_counter_entry;
bool is_valid;
access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
is_valid = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID);
return is_valid;
}
void uvm_hal_volta_access_counter_buffer_entry_clear_valid(uvm_parent_gpu_t *parent_gpu, NvU32 index)
{
NvU32 *access_counter_entry;
access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
WRITE_HWCONST_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, VALID, FALSE);
}
void uvm_hal_volta_access_counter_buffer_parse_entry(uvm_parent_gpu_t *parent_gpu,
NvU32 index,
uvm_access_counter_buffer_entry_t *buffer_entry)
{
NvU32 *access_counter_entry;
// Valid bit must be set before this function is called
UVM_ASSERT(uvm_hal_volta_access_counter_buffer_entry_is_valid(parent_gpu, index));
access_counter_entry = get_access_counter_buffer_entry(parent_gpu, index);
buffer_entry->counter_type = get_access_counter_type(access_counter_entry);
buffer_entry->address = get_address(parent_gpu, access_counter_entry);
if (buffer_entry->address.is_virtual) {
NvU64 inst_hi, inst_lo;
inst_hi = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_HI);
inst_lo = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, INST_LO);
buffer_entry->virtual_info.instance_ptr.address =
inst_lo + (inst_hi << HWSIZE_MW(C365, NOTIFY_BUF_ENTRY, INST_LO));
// HW value contains the 4K page number. Shift to build the full address
buffer_entry->virtual_info.instance_ptr.address <<= 12;
buffer_entry->virtual_info.instance_ptr.aperture = get_access_counter_inst_aperture(access_counter_entry);
buffer_entry->virtual_info.mmu_engine_id =
READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, MMU_ENGINE_ID);
buffer_entry->virtual_info.mmu_engine_type = UVM_MMU_ENGINE_TYPE_GRAPHICS;
// MMU engine id aligns with the fault buffer packets. Therefore, we
// reuse the helper to compute the VE ID from the fault buffer class.
buffer_entry->virtual_info.ve_id =
parent_gpu->fault_buffer_hal->get_ve_id(buffer_entry->virtual_info.mmu_engine_id,
buffer_entry->virtual_info.mmu_engine_type);
}
else if (buffer_entry->counter_type == UVM_ACCESS_COUNTER_TYPE_MIMC) {
// Ignore any set bit beyond 47 since it is the maximum physical address
// supported by the GPU. See the definition of
// uvm_gpu_t::dma_addressable_start for why higher bits might be set.
const NvU64 mask_46_0 = (0x1UL << 47) - 1;
buffer_entry->address.address &= mask_46_0;
}
buffer_entry->counter_value = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, COUNTER_VAL);
buffer_entry->sub_granularity = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, SUB_GRANULARITY);
buffer_entry->bank = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, BANK);
buffer_entry->tag = READ_HWVALUE_MW(access_counter_entry, C365, NOTIFY_BUF_ENTRY, NOTIFY_TAG);
// Automatically clear valid bit for the entry in the access counter buffer
uvm_hal_volta_access_counter_buffer_entry_clear_valid(parent_gpu, index);
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2024 NVIDIA Corporation
Copyright (c) 2016-2025 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -38,7 +38,7 @@ typedef struct {
NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
{
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferPut);
NvU32 put = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferPut);
NvU32 index = READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, PTR);
UVM_ASSERT(READ_HWVALUE(put, _PFB_PRI_MMU, FAULT_BUFFER_PUT, GETPTR_CORRUPTED) ==
NV_PFB_PRI_MMU_FAULT_BUFFER_PUT_GETPTR_CORRUPTED_NO);
@ -48,8 +48,8 @@ NvU32 uvm_hal_volta_fault_buffer_read_put(uvm_parent_gpu_t *parent_gpu)
NvU32 uvm_hal_volta_fault_buffer_read_get(uvm_parent_gpu_t *parent_gpu)
{
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet);
UVM_ASSERT(get < parent_gpu->fault_buffer_info.replayable.max_faults);
NvU32 get = UVM_GPU_READ_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet);
UVM_ASSERT(get < parent_gpu->fault_buffer.replayable.max_faults);
return READ_HWVALUE(get, _PFB_PRI_MMU, FAULT_BUFFER_GET, PTR);
}
@ -58,7 +58,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
{
NvU32 get = HWVALUE(_PFB_PRI_MMU, FAULT_BUFFER_GET, PTR, index);
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
// If HW has detected an overflow condition (PUT == GET - 1 and a fault has
// arrived, which is dropped due to no more space in the fault buffer), it
@ -70,7 +70,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
// resulting in the overflow condition being instantly reasserted. However,
// if the index is updated first and then the OVERFLOW bit is cleared such
// a collision will not cause a reassertion of the overflow condition.
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, get);
// Clearing GETPTR_CORRUPTED and OVERFLOW is not needed when GSP-RM owns
// the HW replayable fault buffer, because UVM does not write to the actual
@ -82,7 +82,7 @@ void uvm_hal_volta_fault_buffer_write_get(uvm_parent_gpu_t *parent_gpu, NvU32 in
// Clear the GETPTR_CORRUPTED and OVERFLOW bits.
get |= HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, GETPTR_CORRUPTED, CLEAR) |
HWCONST(_PFB_PRI_MMU, FAULT_BUFFER_GET, OVERFLOW, CLEAR);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer_info.rm_info.replayable.pFaultBufferGet, get);
UVM_GPU_WRITE_ONCE(*parent_gpu->fault_buffer.rm_info.replayable.pFaultBufferGet, get);
}
// TODO: Bug 1835884: [uvm] Query the maximum number of subcontexts from RM
@ -234,9 +234,9 @@ static NvU32 *get_fault_buffer_entry(uvm_parent_gpu_t *parent_gpu, NvU32 index)
fault_buffer_entry_c369_t *buffer_start;
NvU32 *fault_entry;
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
buffer_start = (fault_buffer_entry_c369_t *)parent_gpu->fault_buffer_info.rm_info.replayable.bufferAddress;
buffer_start = (fault_buffer_entry_c369_t *)parent_gpu->fault_buffer.rm_info.replayable.bufferAddress;
fault_entry = (NvU32 *)&buffer_start[index];
return fault_entry;
@ -247,10 +247,10 @@ static UvmFaultMetadataPacket *get_fault_buffer_entry_metadata(uvm_parent_gpu_t
{
UvmFaultMetadataPacket *fault_entry_metadata;
UVM_ASSERT(index < parent_gpu->fault_buffer_info.replayable.max_faults);
UVM_ASSERT(index < parent_gpu->fault_buffer.replayable.max_faults);
UVM_ASSERT(g_uvm_global.conf_computing_enabled);
fault_entry_metadata = parent_gpu->fault_buffer_info.rm_info.replayable.bufferMetadata;
fault_entry_metadata = parent_gpu->fault_buffer.rm_info.replayable.bufferMetadata;
UVM_ASSERT(fault_entry_metadata != NULL);
return fault_entry_metadata + index;
@ -359,7 +359,7 @@ static void parse_fault_entry_common(uvm_parent_gpu_t *parent_gpu,
UVM_ASSERT(gpc_utlb_id < parent_gpu->utlb_per_gpc_count);
utlb_id = buffer_entry->fault_source.gpc_id * parent_gpu->utlb_per_gpc_count + gpc_utlb_id;
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer_info.replayable.utlb_count);
UVM_ASSERT(utlb_id < parent_gpu->fault_buffer.replayable.utlb_count);
buffer_entry->fault_source.utlb_id = utlb_id;
}

View File

@ -1,5 +1,5 @@
/*******************************************************************************
Copyright (c) 2016-2021 NVIDIA Corporation
Copyright (c) 2016-2024 NVIDIA Corporation
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
@ -136,64 +136,6 @@ void uvm_hal_volta_host_clear_faulted_channel_method(uvm_push_t *push,
clear_type_value);
}
void uvm_hal_volta_access_counter_clear_all(uvm_push_t *push)
{
NV_PUSH_4U(C36F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWCONST(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, ALL));
}
static NvU32 get_access_counter_type_value(uvm_access_counter_type_t type)
{
if (type == UVM_ACCESS_COUNTER_TYPE_MIMC)
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MIMC;
else if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TYPE_MOMC;
else
UVM_ASSERT_MSG(false, "Invalid access counter type %u\n", type);
return 0;
}
static NvU32 get_access_counter_targeted_type_value(uvm_access_counter_type_t type)
{
if (type == UVM_ACCESS_COUNTER_TYPE_MIMC)
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MIMC;
else if (type == UVM_ACCESS_COUNTER_TYPE_MOMC)
return NVC36F_MEM_OP_D_ACCESS_COUNTER_CLR_TARGETED_TYPE_MOMC;
else
UVM_ASSERT_MSG(false, "Invalid access counter type %u\n", type);
return 0;
}
void uvm_hal_volta_access_counter_clear_type(uvm_push_t *push, uvm_access_counter_type_t type)
{
NvU32 type_value = get_access_counter_type_value(type);
NV_PUSH_4U(C36F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, 0,
MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, type_value));
}
void uvm_hal_volta_access_counter_clear_targeted(uvm_push_t *push,
const uvm_access_counter_buffer_entry_t *buffer_entry)
{
NvU32 targeted_type_value = get_access_counter_targeted_type_value(buffer_entry->counter_type);
NV_PUSH_4U(C36F, MEM_OP_A, 0,
MEM_OP_B, 0,
MEM_OP_C, HWVALUE(C36F, MEM_OP_C, ACCESS_COUNTER_CLR_TARGETED_NOTIFY_TAG, buffer_entry->tag),
MEM_OP_D, HWCONST(C36F, MEM_OP_D, OPERATION, ACCESS_COUNTER_CLR) |
HWCONST(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TYPE, TARGETED) |
HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_TYPE, targeted_type_value) |
HWVALUE(C36F, MEM_OP_D, ACCESS_COUNTER_CLR_TARGETED_BANK, buffer_entry->bank));
}
void uvm_hal_volta_host_tlb_invalidate_va(uvm_push_t *push,
uvm_gpu_phys_address_t pdb,
NvU32 depth,

View File

@ -143,6 +143,11 @@ nvidia_vma_access(
return -EINVAL;
}
if (write && !(mmap_context->prot & NV_PROTECT_WRITEABLE))
{
return -EACCES;
}
if (nv->flags & NV_FLAG_CONTROL)
{
at = NV_VMA_PRIVATE(vma);

View File

@ -217,7 +217,7 @@ NV_STATUS nvGpuOpsOwnAccessCntrIntr(struct gpuSession *session,
NV_STATUS nvGpuOpsEnableAccessCntr(struct gpuDevice *device,
gpuAccessCntrInfo *pAccessCntrInfo,
gpuAccessCntrConfig *pAccessCntrConfig);
const gpuAccessCntrConfig *pAccessCntrConfig);
NV_STATUS nvGpuOpsDisableAccessCntr(struct gpuDevice *device, gpuAccessCntrInfo *pAccessCntrInfo);

View File

@ -931,7 +931,7 @@ EXPORT_SYMBOL(nvUvmInterfaceInitAccessCntrInfo);
NV_STATUS nvUvmInterfaceEnableAccessCntr(uvmGpuDeviceHandle device,
UvmGpuAccessCntrInfo *pAccessCntrInfo,
UvmGpuAccessCntrConfig *pAccessCntrConfig)
const UvmGpuAccessCntrConfig *pAccessCntrConfig)
{
nvidia_stack_t *sp = NULL;
NV_STATUS status;

View File

@ -159,6 +159,7 @@ NV_CONFTEST_FUNCTION_COMPILE_TESTS += cc_attr_guest_sev_snp
NV_CONFTEST_FUNCTION_COMPILE_TESTS += hv_get_isolation_type
NV_CONFTEST_FUNCTION_COMPILE_TESTS += seq_read_iter
NV_CONFTEST_FUNCTION_COMPILE_TESTS += follow_pfn
NV_CONFTEST_FUNCTION_COMPILE_TESTS += ptep_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_get
NV_CONFTEST_FUNCTION_COMPILE_TESTS += drm_gem_object_put_unlocked
NV_CONFTEST_FUNCTION_COMPILE_TESTS += add_memory_driver_managed
@ -229,6 +230,8 @@ NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_memory_block_size_b
NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto
NV_CONFTEST_SYMBOL_COMPILE_TESTS += crypto_akcipher_verify
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_follow_pte
NV_CONFTEST_SYMBOL_COMPILE_TESTS += follow_pte_arg_vma
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_present_follow_pfnmap_start
NV_CONFTEST_SYMBOL_COMPILE_TESTS += is_export_symbol_gpl_pci_ats_supported
NV_CONFTEST_SYMBOL_COMPILE_TESTS += ecc_digits_from_bytes

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 1999-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 1999-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -32,14 +32,27 @@
#define NV_NUM_PIN_PAGES_PER_ITERATION 0x80000
#endif
static inline int nv_follow_pfn(struct vm_area_struct *vma,
unsigned long address,
unsigned long *pfn)
static inline int nv_follow_flavors(struct vm_area_struct *vma,
unsigned long address,
unsigned long *pfn)
{
#if defined(NV_FOLLOW_PFN_PRESENT)
return follow_pfn(vma, address, pfn);
#else
#if NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
#if NV_IS_EXPORT_SYMBOL_PRESENT_follow_pfnmap_start
struct follow_pfnmap_args args = {};
int rc;
args.address = address;
args.vma = vma;
rc = follow_pfnmap_start(&args);
if (rc)
return rc;
*pfn = args.pfn;
follow_pfnmap_end(&args);
return 0;
#elif NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
int status = 0;
spinlock_t *ptl;
pte_t *ptep;
@ -47,17 +60,40 @@ static inline int nv_follow_pfn(struct vm_area_struct *vma,
if (!(vma->vm_flags & (VM_IO | VM_PFNMAP)))
return status;
//
// The first argument of follow_pte() was changed from
// mm_struct to vm_area_struct in kernel 6.10.
//
#if defined(NV_FOLLOW_PTE_ARG1_VMA)
status = follow_pte(vma, address, &ptep, &ptl);
#else
status = follow_pte(vma->vm_mm, address, &ptep, &ptl);
#endif
if (status)
return status;
#if defined(NV_PTEP_GET_PRESENT)
*pfn = pte_pfn(ptep_get(ptep));
#else
*pfn = pte_pfn(READ_ONCE(*ptep));
#endif
// The lock is acquired inside follow_pte()
pte_unmap_unlock(ptep, ptl);
return 0;
#else // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
#else
return -1;
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pte
#endif // NV_IS_EXPORT_SYMBOL_PRESENT_follow_pfnmap_start
}
static inline int nv_follow_pfn(struct vm_area_struct *vma,
unsigned long address,
unsigned long *pfn)
{
#if defined(NV_FOLLOW_PFN_PRESENT)
return follow_pfn(vma, address, pfn);
#else
return nv_follow_flavors(vma, address, pfn);
#endif
}

View File

@ -315,6 +315,12 @@ namespace DisplayPort
//
bool bNoFallbackInPostLQA;
//
// Set to true when we do not want DSC to be limited
// to 16 BPP for multitile on Blackwell++
//
bool bDisableDscMaxBppLimit;
bool bReportDeviceLostBeforeNew;
bool bDisableSSC;
bool bEnableFastLT;
@ -335,6 +341,8 @@ namespace DisplayPort
bool bForceHeadShutdownPerMonitor;
bool bEnableLowerBppCheckForDsc;
//
// Dual SST Partner connector object pointer
ConnectorImpl *pCoupledConnector;

View File

@ -168,6 +168,7 @@ namespace DisplayPort
bool bIgnoreDscCap; // Ignore DSC even if sink reports DSC capability
bool bDisableDownspread;
bool bForceHeadShutdown;
bool bDisableDscMaxBppLimit;
bool bSkipCableIdCheck;
bool bAllocateManualTimeslots;
}_WARFlags;

View File

@ -101,7 +101,10 @@
// Bug 5088957 : Force head shutdown in DpLib
#define NV_DP_REGKEY_FORCE_HEAD_SHUTDOWN "DP_WAR_5088957"
//
// Bug 5041041 : Enable Lower BPP check for DSC
#define NV_DP_REGKEY_ENABLE_LOWER_BPP_CHECK_FOR_DSC "DP_ENABLE_LOWER_BPP_CHECK"
// Data Base used to store all the regkey values.
// The actual data base is declared statically in dp_evoadapter.cpp.
// All entries set to 0 before initialized by the first EvoMainLink constructor.
@ -141,6 +144,7 @@ struct DP_REGKEY_DATABASE
bool bSkipZeroOuiCache;
bool bDisable5019537Fix;
bool bForceHeadShutdown;
bool bEnableLowerBppCheckForDsc;
};
extern struct DP_REGKEY_DATABASE dpRegkeyDatabase;

View File

@ -158,7 +158,8 @@ void DPCDHALImpl2x::parseAndReadCaps()
_ANSI_128B_132B, _YES,
buffer[0]);
if (caps2x.bDP2xChannelCodingSupported == true)
// Read this unconditionally when the connection is tunneled
if (caps2x.bDP2xChannelCodingSupported == true || caps.dpInTunnelingCaps.bIsSupported)
{
// 0x2215
if (AuxRetry::ack == bus.read(NV_DPCD20_128B_132B_SUPPORTED_LINK_RATES, &buffer[0], 1))
@ -264,7 +265,7 @@ void DPCDHALImpl2x::parseAndReadCaps()
if (caps2x.dpInTunnelingCaps.bDP2xChannelCodingSupported)
{
if (AuxRetry::ack ==
bus.read(NV_DPCD20_DP_TUNNELING_MAIN_LINK_CHANNEL_CODING, &byte, sizeof byte))
bus.read(NV_DPCD20_DP_TUNNELING_128B132B_LINK_RATES, &byte, sizeof byte))
{
caps2x.dpInTunnelingCaps.bUHBR_10GSupported =
FLD_TEST_DRF(_DPCD20, _DP_TUNNELING_128B132B_LINK_RATES, _10_0_GPBS_SUPPORTED, _YES, byte);
@ -342,12 +343,18 @@ AuxRetry::status DPCDHALImpl2x::notifySDPErrDetectionCapability()
bool DPCDHALImpl2x::isDp2xChannelCodingCapable()
{
// return false if the device does not support 128b/132b.
if (!caps2x.bDP2xChannelCodingSupported)
return false;
// return false if DP-IN Tunneling is supported but not support 128b/132b.
if (caps.dpInTunnelingCaps.bIsSupported && !caps2x.dpInTunnelingCaps.bDP2xChannelCodingSupported)
return false;
// However when dpTunneling is enabled, read the tunneling cap instead
if (caps.dpInTunnelingCaps.bIsSupported)
{
// return false if DP-IN Tunneling is supported but not support 128b/132b.
if (!caps2x.dpInTunnelingCaps.bDP2xChannelCodingSupported)
return false;
}
else
{
if (!caps2x.bDP2xChannelCodingSupported)
return false;
}
// return true if there is no LTTPR.
if (!bLttprSupported || (caps.phyRepeaterCount == 0))
@ -410,6 +417,14 @@ NvU32 DPCDHALImpl2x::getUHBRSupported()
bool bUHBR_13_5GSupported = caps2x.bUHBR_13_5GSupported;
bool bUHBR_20GSupported = caps2x.bUHBR_20GSupported;
// When tunneling is supported and bw allocation is enabled, override the caps from tunneling caps
if (caps.dpInTunnelingCaps.bIsSupported && bIsDpTunnelBwAllocationEnabled)
{
bUHBR_10GSupported = caps2x.dpInTunnelingCaps.bUHBR_10GSupported;
bUHBR_13_5GSupported = caps2x.dpInTunnelingCaps.bUHBR_13_5GSupported;
bUHBR_20GSupported = caps2x.dpInTunnelingCaps.bUHBR_20GSupported;
}
if (!bIgnoreCableIdCaps)
{
bUHBR_10GSupported = bUHBR_10GSupported && caps2x.cableCaps.bUHBR_10GSupported;
@ -424,13 +439,6 @@ NvU32 DPCDHALImpl2x::getUHBRSupported()
bUHBR_20GSupported = bUHBR_20GSupported && caps2x.repeaterCaps.bUHBR_20GSupported;
}
if (caps.dpInTunnelingCaps.bIsSupported && bIsDpTunnelBwAllocationEnabled)
{
bUHBR_10GSupported = bUHBR_10GSupported && caps2x.dpInTunnelingCaps.bUHBR_10GSupported;
bUHBR_13_5GSupported = bUHBR_13_5GSupported && caps2x.dpInTunnelingCaps.bUHBR_13_5GSupported;
bUHBR_20GSupported = bUHBR_20GSupported && caps2x.dpInTunnelingCaps.bUHBR_20GSupported;
}
if (bUHBR_10GSupported)
{
uhbrCaps = FLD_SET_DRF(0073_CTRL_CMD_DP, _GET_CAPS_UHBR_SUPPORTED, _UHBR10_0, _YES, uhbrCaps);

View File

@ -185,6 +185,7 @@ void ConnectorImpl::applyRegkeyOverrides(const DP_REGKEY_DATABASE& dpRegkeyDatab
this->bSkipZeroOuiCache = dpRegkeyDatabase.bSkipZeroOuiCache;
this->bDisable5019537Fix = dpRegkeyDatabase.bDisable5019537Fix;
this->bForceHeadShutdownFromRegkey = dpRegkeyDatabase.bForceHeadShutdown;
this->bEnableLowerBppCheckForDsc = dpRegkeyDatabase.bEnableLowerBppCheckForDsc;
}
void ConnectorImpl::setPolicyModesetOrderMitigation(bool enabled)
@ -1367,12 +1368,38 @@ bool ConnectorImpl::compoundQueryAttachMST(Group * target,
if (compoundQueryAttachMSTIsDscPossible(target, modesetParams, pDscParams))
{
unsigned int forceDscBitsPerPixelX16 = pDscParams->bitsPerPixelX16;
result = compoundQueryAttachMSTDsc(target, modesetParams, &localInfo,
pDscParams, pErrorCode);
if (!result)
{
return false;
}
compoundQueryResult = compoundQueryAttachMSTGeneric(target, modesetParams, &localInfo,
pDscParams, pErrorCode);
//
// compoundQueryAttachMST Generic might fail due to the insufficient bandwidth ,
// We only check whether bpp can be fit in the available bandwidth based on the tranied link config in compoundQueryAttachMSTDsc function.
// There might be cases where the default 10 bpp might fit in the available bandwidth based on the trained link config,
// however, the bandwidth might be insufficient at the actual bottleneck link between source and sink to drive the mode, causing CompoundQueryAttachMSTGeneric to fail.
// Incase of CompoundQueryAttachMSTGeneric failure, instead of returning false, check whether the mode can be supported with the max dsc compression bpp
// and return true if it can be supported.
if (!compoundQueryResult && forceDscBitsPerPixelX16 == 0U && this->bEnableLowerBppCheckForDsc)
{
pDscParams->bitsPerPixelX16 = MAX_DSC_COMPRESSION_BPPX16;
result = compoundQueryAttachMSTDsc(target, modesetParams, &localInfo,
pDscParams, pErrorCode);
if (!result)
{
return false;
}
return compoundQueryAttachMSTGeneric(target, modesetParams, &localInfo,
pDscParams, pErrorCode);
}
return compoundQueryResult;
}
return compoundQueryAttachMSTGeneric(target, modesetParams, &localInfo,
@ -1564,6 +1591,7 @@ bool ConnectorImpl::compoundQueryAttachMSTDsc(Group * target,
warData.dpData.dpMode = DSC_DP_MST;
warData.dpData.hBlank = modesetParams.modesetInfo.rasterWidth - modesetParams.modesetInfo.surfaceWidth;
warData.connectorType = DSC_DP;
warData.dpData.bDisableDscMaxBppLimit = bDisableDscMaxBppLimit;
//
// Dplib needs to pass sliceCountMask to clients
@ -1636,7 +1664,9 @@ bool ConnectorImpl::compoundQueryAttachMSTDsc(Group * target,
localInfo->localModesetInfo.bEnableDsc = true;
localInfo->localModesetInfo.depth = bitsPerPixelX16;
if (modesetParams.colorFormat == dpColorFormat_YCbCr422 &&
dev->dscCaps.dscDecoderColorFormatCaps.bYCbCrNative422)
dev->dscCaps.dscDecoderColorFormatCaps.bYCbCrNative422 &&
(dscInfo.gpuCaps.encoderColorFormatMask & DSC_ENCODER_COLOR_FORMAT_Y_CB_CR_NATIVE_422) &&
(dscInfo.sinkCaps.decoderColorFormatMask & DSC_DECODER_COLOR_FORMAT_Y_CB_CR_NATIVE_422))
{
localInfo->localModesetInfo.colorFormat = dpColorFormat_YCbCr422_Native;
}
@ -1790,12 +1820,24 @@ bool ConnectorImpl::compoundQueryAttachMSTGeneric(Group * target,
if ( tail->bandwidth.compound_query_state.timeslots_used_by_query > tail->bandwidth.compound_query_state.totalTimeSlots)
{
compoundQueryResult = false;
SET_DP_IMP_ERROR(pErrorCode, DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH)
if(this->bEnableLowerBppCheckForDsc)
{
tail->bandwidth.compound_query_state.timeslots_used_by_query -= linkConfig->slotsForPBN(base_pbn);
tail->bandwidth.compound_query_state.bandwidthAllocatedForIndex &= ~(1 << compoundQueryCount);
}
SET_DP_IMP_ERROR(pErrorCode, DP_IMP_ERROR_INSUFFICIENT_BANDWIDTH);
}
}
tail = (DeviceImpl*)tail->getParent();
}
}
// If the compoundQueryResult is false, we need to reset the compoundQueryLocalLinkPBN
if (!compoundQueryResult && this->bEnableLowerBppCheckForDsc)
{
compoundQueryLocalLinkPBN -= slots_pbn;
}
return compoundQueryResult;
}
bool ConnectorImpl::compoundQueryAttachSST(Group * target,
@ -1938,6 +1980,8 @@ bool ConnectorImpl::compoundQueryAttachSST(Group * target,
warData.dpData.hBlank = modesetParams.modesetInfo.rasterWidth - modesetParams.modesetInfo.surfaceWidth;
warData.dpData.dpMode = DSC_DP_SST;
warData.connectorType = DSC_DP;
warData.dpData.bDisableDscMaxBppLimit = bDisableDscMaxBppLimit;
if (main->isEDP())
{
warData.dpData.bIsEdp = true;
@ -6067,7 +6111,6 @@ void ConnectorImpl::flushTimeslotsToHardware()
void ConnectorImpl::beforeDeleteStream(GroupImpl * group, bool forFlushMode)
{
//
// During flush entry, if the link is not trained, retrain
// the link so that ACT can be ack'd by the sink.
@ -6079,11 +6122,18 @@ void ConnectorImpl::beforeDeleteStream(GroupImpl * group, bool forFlushMode)
// head is not actively driving pixels and this needs to be handled
// differently .
//
if(forFlushMode && linkUseMultistream())
if (forFlushMode && linkUseMultistream())
{
if(isLinkLost())
{
train(activeLinkConfig, false);
if(!this->bDisable5019537Fix)
{
train(highestAssessedLC, false);
}
else
{
train(activeLinkConfig, false);
}
}
}
@ -7307,8 +7357,11 @@ void ConnectorImpl::notifyShortPulse()
{
return;
}
//save the previous highest assessed LC
// Save the previous highest assessed LC
LinkConfiguration previousAssessedLC = highestAssessedLC;
// Save original active link configuration.
LinkConfiguration originalActiveLinkConfig = activeLinkConfig;
if (main->isConnectorUSBTypeC() &&
activeLinkConfig.bIs128b132bChannelCoding &&
@ -7316,11 +7369,27 @@ void ConnectorImpl::notifyShortPulse()
{
if (activeLinkConfig.isValid() && enableFlush())
{
train(activeLinkConfig, true);
if (!this->bDisable5019537Fix)
{
train(originalActiveLinkConfig, true);
}
else
{
train(activeLinkConfig, true);
}
disableFlush();
}
main->invalidateLinkRatesInFallbackTable(activeLinkConfig.peakRate);
hal->overrideCableIdCap(activeLinkConfig.peakRate, false);
if (!this->bDisable5019537Fix)
{
main->invalidateLinkRatesInFallbackTable(originalActiveLinkConfig.peakRate);
hal->overrideCableIdCap(originalActiveLinkConfig.peakRate, false);
}
else
{
main->invalidateLinkRatesInFallbackTable(activeLinkConfig.peakRate);
hal->overrideCableIdCap(activeLinkConfig.peakRate, false);
}
highestAssessedLC = getMaxLinkConfig();
@ -7334,8 +7403,16 @@ void ConnectorImpl::notifyShortPulse()
if (activeLinkConfig.isValid() && enableFlush())
{
LinkConfiguration originalActiveLinkConfig = activeLinkConfig;
if (!train(activeLinkConfig, false))
bool bTrainSuccess = false;
if (!this->bDisable5019537Fix)
{
bTrainSuccess = train(originalActiveLinkConfig, false);
}
else
{
bTrainSuccess = train(activeLinkConfig, false);
}
if (!bTrainSuccess)
{
//
// If original link config could not be restored force
@ -8210,6 +8287,7 @@ void ConnectorImpl::configInit()
allocatedDpTunnelBwShadow = 0;
bDP2XPreferNonDSCForLowPClk = false;
bForceHeadShutdownPerMonitor = false;
bDisableDscMaxBppLimit = false;
}
bool ConnectorImpl::dpUpdateDscStream(Group *target, NvU32 dscBpp)

View File

@ -1713,5 +1713,10 @@ void ConnectorImpl2x::handleEdidWARs(Edid & edid, DiscoveryManager::Device & dev
bForceHeadShutdownPerMonitor = true;
}
}
if (edid.WARFlags.bDisableDscMaxBppLimit)
{
bDisableDscMaxBppLimit = true;
}
}

View File

@ -104,7 +104,8 @@ const struct
{NV_DP_REGKEY_DISABLE_DOWNSPREAD, &dpRegkeyDatabase.bDownspreadDisabled, DP_REG_VAL_BOOL},
{NV_DP_REGKEY_SKIP_ZERO_OUI_CACHE, &dpRegkeyDatabase.bSkipZeroOuiCache, DP_REG_VAL_BOOL},
{NV_DP_REGKEY_DISABLE_FIX_FOR_5019537, &dpRegkeyDatabase.bDisable5019537Fix, DP_REG_VAL_BOOL},
{NV_DP_REGKEY_FORCE_HEAD_SHUTDOWN, &dpRegkeyDatabase.bForceHeadShutdown, DP_REG_VAL_BOOL}
{NV_DP_REGKEY_FORCE_HEAD_SHUTDOWN, &dpRegkeyDatabase.bForceHeadShutdown, DP_REG_VAL_BOOL},
{NV_DP_REGKEY_ENABLE_LOWER_BPP_CHECK_FOR_DSC, &dpRegkeyDatabase.bEnableLowerBppCheckForDsc, DP_REG_VAL_BOOL}
};
EvoMainLink::EvoMainLink(EvoInterface * provider, Timer * timer) :

View File

@ -596,6 +596,11 @@ void Edid::applyEdidWorkArounds(NvU32 warFlag, const DpMonitorDenylistData *pDen
DP_PRINTF(DP_NOTICE, "DP-WAR> Panel incorrectly exposing DSC capability. Ignoring it.");
DP_PRINTF(DP_NOTICE, "DP-WAR> Bug 3543158");
}
else if (ProductID == 0x5B9A)
{
this->WARFlags.bDisableDscMaxBppLimit = true;
DP_PRINTF(DP_NOTICE, "DP-WAR> Disable DSC max BPP limit of 16 for DSC.");
}
break;
case 0xB306:
if (ProductID == 0x3228)

View File

@ -36,25 +36,25 @@
// and then checked back in. You cannot make changes to these sections without
// corresponding changes to the buildmeister script
#ifndef NV_BUILD_BRANCH
#define NV_BUILD_BRANCH r572_46
#define NV_BUILD_BRANCH r572_77
#endif
#ifndef NV_PUBLIC_BRANCH
#define NV_PUBLIC_BRANCH r572_46
#define NV_PUBLIC_BRANCH r572_77
#endif
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r572_46-344"
#define NV_BUILD_CHANGELIST_NUM (35599303)
#define NV_BUILD_BRANCH_VERSION "rel/gpu_drv/r570/r572_77-376"
#define NV_BUILD_CHANGELIST_NUM (35688848)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "rel/gpu_drv/r570/r572_46-344"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35599303)
#define NV_BUILD_NAME "rel/gpu_drv/r570/r572_77-376"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35688848)
#else /* Windows builds */
#define NV_BUILD_BRANCH_VERSION "r572_46-7"
#define NV_BUILD_CHANGELIST_NUM (35597621)
#define NV_BUILD_BRANCH_VERSION "r572_77-2"
#define NV_BUILD_CHANGELIST_NUM (35681611)
#define NV_BUILD_TYPE "Official"
#define NV_BUILD_NAME "572.61"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35597621)
#define NV_BUILD_NAME "572.80"
#define NV_LAST_OFFICIAL_CHANGELIST_NUM (35681611)
#define NV_BUILD_BRANCH_BASE_VERSION R570
#endif
// End buildmeister python edited section

View File

@ -4,7 +4,7 @@
#if defined(NV_LINUX) || defined(NV_BSD) || defined(NV_SUNOS) || defined(NV_VMWARE) || defined(NV_QNX) || defined(NV_INTEGRITY) || \
(defined(RMCFG_FEATURE_PLATFORM_GSP) && RMCFG_FEATURE_PLATFORM_GSP == 1)
#define NV_VERSION_STRING "570.124.06"
#define NV_VERSION_STRING "570.133.07"
#else

View File

@ -2347,6 +2347,7 @@ DSC_GeneratePPS
in->bits_per_component = pModesetInfo->bitsPerComponent;
in->linebuf_depth = MIN((pDscInfo->sinkCaps.lineBufferBitDepth), (pDscInfo->gpuCaps.lineBufferBitDepth));
in->block_pred_enable = pDscInfo->sinkCaps.bBlockPrediction;
in->multi_tile = (pDscInfo->gpuCaps.maxNumHztSlices > 4U) ? 1 : 0;
switch (pModesetInfo->colorFormat)
{
@ -2526,8 +2527,9 @@ DSC_GeneratePPS
// because of architectural limitation we can't use bits_per_pixel
// more than 16.
//
if ((pModesetInfo->bDualMode || (pDscInfo->gpuCaps.maxNumHztSlices > 4U)) &&
(in->bits_per_pixel > 256 /*bits_per_pixel = 16*/))
if ((pModesetInfo->bDualMode ||
(in->multi_tile && (!pWARData || (pWARData && !pWARData->dpData.bDisableDscMaxBppLimit))))
&& (in->bits_per_pixel > 256 /*bits_per_pixel = 16*/))
{
ret = NVT_STATUS_INVALID_BPP;
goto done;
@ -2547,8 +2549,9 @@ DSC_GeneratePPS
// because of architectural limitation we can't use bits_per_pixel more
// than 16. So forcing it to 16.
//
if ((pModesetInfo->bDualMode || (pDscInfo->gpuCaps.maxNumHztSlices > 4U)) &&
(in->bits_per_pixel > 256 /*bits_per_pixel = 16*/))
if ((pModesetInfo->bDualMode ||
(in->multi_tile && (!pWARData || (pWARData && !pWARData->dpData.bDisableDscMaxBppLimit))))
&& (in->bits_per_pixel > 256 /*bits_per_pixel = 16*/))
{
// ERROR - DSC Dual Mode, because of architectural limitation we can't use bits_per_pixel more than 16.
// ERROR - Forcing it to 16.
@ -2590,7 +2593,6 @@ DSC_GeneratePPS
in->pixel_clkMHz = (NvU32)(pModesetInfo->pixelClockHz / 1000000L);
in->dual_mode = pModesetInfo->bDualMode;
in->drop_mode = pModesetInfo->bDropMode;
in->multi_tile = (pDscInfo->gpuCaps.maxNumHztSlices > 4U) ? 1 : 0;
in->slice_count_mask = pDscInfo->sinkCaps.sliceCountSupportedMask;
in->peak_throughput_mode0 = pDscInfo->sinkCaps.peakThroughputMode0;
in->peak_throughput_mode1 = pDscInfo->sinkCaps.peakThroughputMode1;

View File

@ -254,6 +254,7 @@ typedef struct
DSC_DP_MODE dpMode;
NvU32 hBlank;
NvBool bIsEdp;
NvBool bDisableDscMaxBppLimit;
NvBool bIs128b132bChannelCoding;
}dpData;
} WAR_DATA;

View File

@ -8230,6 +8230,12 @@ nvswitch_initialize_interrupt_tree_ls10
// NVLIPT
_nvswitch_initialize_nvlipt_interrupts_ls10(device);
// Disable non-fatal and legacy interrupts in TNVL mode
if (nvswitch_is_tnvl_mode_enabled(device))
{
nvswitch_tnvl_disable_interrupts(device);
}
}
//

View File

@ -1250,6 +1250,14 @@ nvswitch_tnvl_disable_interrupts_ls10
nvswitch_device *device
)
{
if (!nvswitch_is_tnvl_mode_enabled(device))
{
NVSWITCH_PRINT_SXID(device, NVSWITCH_ERR_HW_HOST_TNVL_ERROR,
"Failed to disable non-fatal/legacy interrupts. TNVL mode is not enabled\n");
return;
}
//
// In TNVL locked disable non-fatal NVLW, NPG, and legacy interrupt,
// disable additional non-fatals on those partitions.

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2020-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2020-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -3437,6 +3437,18 @@ typedef struct NV2080_CTRL_INTERNAL_FIFO_GET_NUM_SECURE_CHANNELS_PARAMS {
NvU32 maxCeSecureChannels;
} NV2080_CTRL_INTERNAL_FIFO_GET_NUM_SECURE_CHANNELS_PARAMS;
/*!
* NV2080_CTRL_CMD_INTERNAL_PERF_PFM_REQ_HNDLR_PRH_DEPENDENCY_CHECK
*
* This command checks if all the dependant modules to PRH have been initialized.
*
* Possible status values returned are:
* NV_OK
* NV_ERR_INVALID_STATE
* NV_ERR_NOT_SUPPORTED
*/
#define NV2080_CTRL_CMD_INTERNAL_PERF_PFM_REQ_HNDLR_PRH_DEPENDENCY_CHECK (0x20800a18) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_INTERNAL_INTERFACE_ID << 8) | 0x18" */
/*
* NV2080_CTRL_CMD_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS
*
@ -3454,7 +3466,7 @@ typedef struct NV2080_CTRL_INTERNAL_FIFO_GET_NUM_SECURE_CHANNELS_PARAMS {
* NV_ERR_INVALID_ARGUMENT
* NV_ERR_NOT_SUPPORTED
*/
#define NV2080_CTRL_CMD_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS (0x20800adb) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_INTERNAL_INTERFACE_ID << 8) | NV2080_CTRL_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS_PARAMS_MESSAGE_ID" */
#define NV2080_CTRL_CMD_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS (0x20800adb) /* finn: Evaluated from "(FINN_NV20_SUBDEVICE_0_INTERNAL_INTERFACE_ID << 8) | NV2080_CTRL_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS_PARAMS_MESSAGE_ID" */
#define NV2080_CTRL_INTERNAL_BIF_DISABLE_SYSTEM_MEMORY_ACCESS_PARAMS_MESSAGE_ID (0xDBU)

View File

@ -149,8 +149,11 @@
#define RESERVED8_ERROR (153)
#define GPU_RECOVERY_ACTION_CHANGED (154)
#define NVLINK_SW_DEFINED_ERROR (155)
#define ROBUST_CHANNEL_LAST_ERROR (157)
#define RESOURCE_RETIREMENT_EVENT (156)
#define RESOURCE_RETIREMENT_FAILURE (157)
#define CHANNEL_RETIREMENT_EVENT (160)
#define CHANNEL_RETIREMENT_FAILURE (161)
#define ROBUST_CHANNEL_LAST_ERROR (161)
// Indexed CE reference
#define ROBUST_CHANNEL_CE_ERROR(x) \

View File

@ -159,6 +159,7 @@ NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_NOT_READY, 0x00000081, "Nvlink Fabri
NV_STATUS_CODE(NV_ERR_NVLINK_FABRIC_FAILURE, 0x00000082, "Nvlink Fabric Probe failed")
NV_STATUS_CODE(NV_ERR_GPU_MEMORY_ONLINING_FAILURE, 0x00000083, "GPU Memory Onlining failed")
NV_STATUS_CODE(NV_ERR_REDUCTION_MANAGER_NOT_AVAILABLE, 0x00000084, "Reduction Manager is not available")
NV_STATUS_CODE(NV_ERR_RESOURCE_RETIREMENT_ERROR, 0x00000086, "An error occurred while trying to retire a resource")
// Warnings:
NV_STATUS_CODE(NV_WARN_HOT_SWITCH, 0x00010001, "WARNING Hot switch")

View File

@ -615,6 +615,25 @@ ENTRY(0x2238, 0x16B7, 0x10de, "NVIDIA A10M-5C"),
ENTRY(0x2238, 0x16B8, 0x10de, "NVIDIA A10M-10C"),
ENTRY(0x2238, 0x16B9, 0x10de, "NVIDIA A10M-20C"),
ENTRY(0x2238, 0x16E6, 0x10de, "NVIDIA A10M-1"),
ENTRY(0x230E, 0x20F5, 0x10de, "NVIDIA H20L-1-15CME"),
ENTRY(0x230E, 0x20F6, 0x10de, "NVIDIA H20L-1-15C"),
ENTRY(0x230E, 0x20F7, 0x10de, "NVIDIA H20L-1-30C"),
ENTRY(0x230E, 0x20F8, 0x10de, "NVIDIA H20L-2-30C"),
ENTRY(0x230E, 0x20F9, 0x10de, "NVIDIA H20L-3-60C"),
ENTRY(0x230E, 0x20FA, 0x10de, "NVIDIA H20L-4-60C"),
ENTRY(0x230E, 0x20FB, 0x10de, "NVIDIA H20L-7-120C"),
ENTRY(0x230E, 0x20FC, 0x10de, "NVIDIA H20L-4C"),
ENTRY(0x230E, 0x20FD, 0x10de, "NVIDIA H20L-5C"),
ENTRY(0x230E, 0x20FE, 0x10de, "NVIDIA H20L-6C"),
ENTRY(0x230E, 0x20FF, 0x10de, "NVIDIA H20L-8C"),
ENTRY(0x230E, 0x2100, 0x10de, "NVIDIA H20L-10C"),
ENTRY(0x230E, 0x2101, 0x10de, "NVIDIA H20L-12C"),
ENTRY(0x230E, 0x2102, 0x10de, "NVIDIA H20L-15C"),
ENTRY(0x230E, 0x2103, 0x10de, "NVIDIA H20L-20C"),
ENTRY(0x230E, 0x2104, 0x10de, "NVIDIA H20L-30C"),
ENTRY(0x230E, 0x2105, 0x10de, "NVIDIA H20L-40C"),
ENTRY(0x230E, 0x2106, 0x10de, "NVIDIA H20L-60C"),
ENTRY(0x230E, 0x2107, 0x10de, "NVIDIA H20L-120C"),
ENTRY(0x2321, 0x1853, 0x10de, "NVIDIA H100L-1-12CME"),
ENTRY(0x2321, 0x1854, 0x10de, "NVIDIA H100L-1-12C"),
ENTRY(0x2321, 0x1855, 0x10de, "NVIDIA H100L-1-24C"),

View File

@ -17,6 +17,7 @@ static inline void _get_chip_id_for_alias_pgpu(NvU32 *dev_id, NvU32 *subdev_id)
{ 0x20B7, 0x1804, 0x20B7, 0x1532 },
{ 0x20B9, 0x157F, 0x20B7, 0x1532 },
{ 0x20FD, 0x17F8, 0x20F5, 0x0 },
{ 0x230E, 0x20DF, 0x230E, 0x20DF },
{ 0x2324, 0x17A8, 0x2324, 0x17A6 },
{ 0x2329, 0x198C, 0x2329, 0x198B },
{ 0x232C, 0x2064, 0x232C, 0x2063 },
@ -119,6 +120,13 @@ static const struct {
{0x20F610DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1094}, // GRID A800-4-20C
{0x20F610DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1095}, // GRID A800-7-40C
{0x20F610DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1091}, // GRID A800-1-10C
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1461}, // NVIDIA H20L-1-15CME
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1462}, // NVIDIA H20L-1-15C
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1463}, // NVIDIA H20L-1-30C
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_QUARTER_GPU , 1464}, // NVIDIA H20L-2-30C
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_HALF_GPU , 1465}, // NVIDIA H20L-3-60C
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_HALF_GPU , 1466}, // NVIDIA H20L-4-60C
{0x230E10DE, NV2080_CTRL_GPU_PARTITION_FLAG_FULL_GPU , 1467}, // NVIDIA H20L-7-120C
{0x232110DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU | DRF_DEF(2080, _CTRL_GPU_PARTITION_FLAG, _REQ_DEC_JPG_OFA, _ENABLE), 1061}, // NVIDIA H100L-1-12CME
{0x232110DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_EIGHTHED_GPU , 1062}, // NVIDIA H100L-1-12C
{0x232110DE, NV2080_CTRL_GPU_PARTITION_FLAG_ONE_MINI_QUARTER_GPU , 1063}, // NVIDIA H100L-1-24C

View File

@ -690,6 +690,10 @@ void nvEvo1SendHdmiInfoFrame(const NVDispEvoRec *pDispEvo,
nvkms_memcpy(&infoframe[1], &((const NvU8*) pInfoFrameHeader)[1],
headerSize - 1);
/* copy the payload, starting after the 3-byte header and checksum */
nvkms_memcpy(&infoframe[headerSize + (needChecksum ? sizeof(checksum) : 0)],
pPayload, infoframeSize - headerSize /* payload size */);
/*
* XXX Redundant since needsChecksum implies
* _HDMI_PKT_TRANSMIT_CTRL_CHKSUM_HW_EN via
@ -705,10 +709,6 @@ void nvEvo1SendHdmiInfoFrame(const NVDispEvoRec *pDispEvo,
infoframe[headerSize] = ~checksum + 1;
}
/* copy the payload, starting after the 3-byte header and checksum */
nvkms_memcpy(&infoframe[headerSize + (needChecksum ? sizeof(checksum) : 0)],
pPayload, infoframeSize - headerSize /* payload size */);
ret = NvHdmiPkt_PacketWrite(pDevEvo->hdmiLib.handle,
pDispEvo->displayOwner,
pHeadState->activeRmId,

View File

@ -2521,7 +2521,8 @@ static NvBool ConstructAdvancedInfoFramePacket(
* XXX If required, add support for the large infoframe with
* multiple infoframes grouped together.
*/
nvAssert((infoframeSize + (needChecksum ? 1 : 0)) <= packetLen);
nvAssert((infoframeSize + 1 /* + HB3 */ + (needChecksum ? 1 : 0)) <=
packetLen);
pPacket[0] = hdmiPacketType; /* HB0 */
@ -2554,10 +2555,8 @@ static NvBool ConstructAdvancedInfoFramePacket(
if (needChecksum) {
pPacket[4] = 0; /* PB0: checksum */
/*
* XXX Redundant since we always call with swChecksum=FALSE and
* _HDMI_PKT_TRANSMIT_CTRL_CHKSUM_HW_EN
*/
nvkms_memcpy(&pPacket[5], pPayload, payloadLen); /* PB1~ */
if (swChecksum) {
NvU8 checksum = 0;
@ -2566,8 +2565,6 @@ static NvBool ConstructAdvancedInfoFramePacket(
}
pPacket[4] = ~checksum + 1;
}
nvkms_memcpy(&pPacket[5], pPayload, payloadLen); /* PB1~ */
} else {
nvAssert(!swChecksum);
nvkms_memcpy(&pPacket[4], pPayload, payloadLen); /* PB0~ */
@ -2587,6 +2584,7 @@ static void SendHdmiInfoFrameCA(const NVDispEvoRec *pDispEvo,
NVHDMIPKT_TYPE hdmiLibType;
NVHDMIPKT_RESULT ret;
ADVANCED_INFOFRAME advancedInfoFrame = { };
NvBool swChecksum;
/*
* These structures are weird. The NVT_VIDEO_INFOFRAME,
* NVT_VENDOR_SPECIFIC_INFOFRAME,
@ -2616,10 +2614,21 @@ static void SendHdmiInfoFrameCA(const NVDispEvoRec *pDispEvo,
advancedInfoFrame.location = INFOFRAME_CTRL_LOC_VBLANK;
advancedInfoFrame.hwChecksum = needChecksum;
// Large infoframes are incompatible with hwChecksum
nvAssert(!(advancedInfoFrame.isLargeInfoframe &&
advancedInfoFrame.hwChecksum));
// XXX WAR bug 5124145 by always computing checksum in software if needed.
swChecksum = needChecksum;
// If we need a checksum: hwChecksum, swChecksum, or both must be enabled.
nvAssert(!needChecksum ||
(advancedInfoFrame.hwChecksum || swChecksum));
if (!ConstructAdvancedInfoFramePacket(pInfoFrameHeader,
infoFrameSize,
needChecksum,
FALSE /* swChecksum */,
swChecksum,
packet,
sizeof(packet))) {
return;

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2022-2024 NVIDIA CORPORATION & AFFILIATES
* SPDX-FileCopyrightText: Copyright (c) 2022-2025 NVIDIA CORPORATION & AFFILIATES
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -33,6 +33,7 @@
// management partition and CPU-RM/other uprocs.
//
#define NVDM_TYPE_RESET 0x4
#define NVDM_TYPE_HULK 0x11
#define NVDM_TYPE_FIRMWARE_UPDATE 0x12
#define NVDM_TYPE_PRC 0x13

View File

@ -1,5 +1,5 @@
/*
* SPDX-FileCopyrightText: Copyright (c) 2000-2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-FileCopyrightText: Copyright (c) 2000-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* SPDX-License-Identifier: MIT
*
* Permission is hereby granted, free of charge, to any person obtaining a
@ -211,21 +211,18 @@
// to any specific hardware.
//
//
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0 0x000000C8
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_ID 7:0
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_NEXT 15:8
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_LENGTH 23:16
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_SIG_LO 31:24
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1 0x000000CC
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_SIG_HI 15:0
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_VERSION 18:16
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_PEER_CLIQUE_ID 22:19
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING 23:23
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DEFAULT 0x00000000
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RELAXED_ORDERING_DISABLE 0x00000001
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RSVD 31:24
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0 0x000000C8
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_ID 7:0
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_NEXT 15:8
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_LENGTH 23:16
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_0_SIG_LO 31:24
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1 0x000000CC
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_SIG_HI 15:0
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_VERSION 18:16
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_PEER_CLIQUE_ID 22:19
#define NV_PCI_VIRTUAL_P2P_APPROVAL_CAP_1_RSVD 31:23
#define NV_PCI_VIRTUAL_P2P_APPROVAL_SIGNATURE 0x00503250
#define NV_PCI_VIRTUAL_P2P_APPROVAL_SIGNATURE 0x00503250
// Chipset-specific definitions.
// Intel SantaRosa definitions

View File

@ -498,6 +498,9 @@ typedef struct nv_state_t
NvU32 dispIsoStreamId;
NvU32 dispNisoStreamId;
} iommus;
/* Console is managed by drm drivers or NVKMS */
NvBool client_managed_console;
} nv_state_t;
#define NVFP_TYPE_NONE 0x0
@ -542,9 +545,9 @@ typedef struct UvmGpuNvlinkInfo_tag *nvgpuNvlinkInfo_t;
typedef struct UvmGpuEccInfo_tag *nvgpuEccInfo_t;
typedef struct UvmGpuFaultInfo_tag *nvgpuFaultInfo_t;
typedef struct UvmGpuAccessCntrInfo_tag *nvgpuAccessCntrInfo_t;
typedef struct UvmGpuAccessCntrConfig_tag *nvgpuAccessCntrConfig_t;
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
typedef struct UvmGpuAccessCntrConfig_tag nvgpuAccessCntrConfig_t;
typedef struct UvmGpuInfo_tag nvgpuInfo_t;
typedef struct UvmGpuClientInfo_tag nvgpuClientInfo_t;
typedef struct UvmPmaAllocationOptions_tag *nvgpuPmaAllocationOptions_t;
typedef struct UvmPmaStatistics_tag *nvgpuPmaStatistics_t;
typedef struct UvmGpuMemoryInfo_tag *nvgpuMemoryInfo_t;

View File

@ -2398,7 +2398,7 @@ NV_STATUS NV_API_CALL rm_power_management(
// For GPU driving console, disable console access here, to ensure no console
// writes through BAR1 can interfere with physical RM's setup of BAR1
//
if (rm_get_uefi_console_status(pNv))
if (pNv->client_managed_console)
{
os_disable_console_access();
bConsoleDisabled = NV_TRUE;

View File

@ -5555,3 +5555,21 @@ void osAllocatedRmClient(void *pOsInfo)
if (nvfp != NULL)
nvfp->bCleanupRmapi = NV_TRUE;
}
/*!
* @brief Update variable to indicate console managed by drm driver.
*
* @param[in] OBJGPU GPU object pointer
*
* @returns void
*/
void
osDisableConsoleManagement
(
OBJGPU *pGpu
)
{
nv_state_t *nv = NV_GET_NV_STATE(pGpu);
nv->client_managed_console = NV_TRUE;
}

View File

@ -913,7 +913,6 @@ static void
RmDeterminePrimaryDevice(OBJGPU *pGpu)
{
nv_state_t *nv = NV_GET_NV_STATE(pGpu);
NvBool bFrameBufferConsoleDevice = NV_FALSE;
// Skip updating nv->primary_vga while RM is recovering after GPU reset
if (nv->flags & NV_FLAG_IN_RECOVERY)
@ -946,15 +945,15 @@ RmDeterminePrimaryDevice(OBJGPU *pGpu)
//
// If GPU is driving any frame buffer console(vesafb, efifb etc)
// mark the GPU as Primary.
// mark the console as client driven and GPU as Primary.
//
bFrameBufferConsoleDevice = rm_get_uefi_console_status(nv);
nv->client_managed_console = rm_get_uefi_console_status(nv);
NV_DEV_PRINTF(NV_DBG_SETUP, nv, " is %s UEFI console device\n",
bFrameBufferConsoleDevice ? "primary" : "not primary");
nv->client_managed_console ? "primary" : "not primary");
pGpu->setProperty(pGpu, PDB_PROP_GPU_PRIMARY_DEVICE,
(bFrameBufferConsoleDevice || !!nv->primary_vga));
(nv->client_managed_console || !!nv->primary_vga));
}
static void
@ -1839,7 +1838,7 @@ NvBool RmInitAdapter(
// For GPU driving console, disable console access here, to ensure no console
// writes through BAR1 can interfere with physical RM's setup of BAR1
//
if (rm_get_uefi_console_status(nv))
if (nv->client_managed_console)
{
os_disable_console_access();
consoleDisabled = NV_TRUE;

View File

@ -87,7 +87,7 @@ RmSaveDisplayState
NV2080_CTRL_CMD_INTERNAL_DISPLAY_PRE_UNIX_CONSOLE_PARAMS preUnixConsoleParams = {0};
NV2080_CTRL_CMD_INTERNAL_DISPLAY_POST_UNIX_CONSOLE_PARAMS postUnixConsoleParams = {0};
if (IS_VIRTUAL(pGpu) || pKernelDisplay == NULL)
if (IS_VIRTUAL(pGpu) || (pKernelDisplay == NULL) || nv->client_managed_console)
{
return;
}
@ -157,20 +157,12 @@ static void RmRestoreDisplayState
NV2080_CTRL_CMD_INTERNAL_DISPLAY_PRE_UNIX_CONSOLE_PARAMS preUnixConsoleParams = {0};
NV2080_CTRL_CMD_INTERNAL_DISPLAY_POST_UNIX_CONSOLE_PARAMS postUnixConsoleParams = {0};
NV_ASSERT_OR_RETURN_VOID(pKernelDisplay != NULL);
//
// vGPU:
// Since vGPU does all real hardware management in the host,
// there is nothing to do at this point in the guest OS.
//
// Since vGPU does all real hardware management in the
// host, there is nothing to do at this point in the
// guest OS (where IS_VIRTUAL(pGpu) is true).
//
if (IS_VIRTUAL(pGpu))
if (IS_VIRTUAL(pGpu) || (pKernelDisplay == NULL) || nv->client_managed_console)
{
// we don't have VGA state that's needing to be restored.
NV_PRINTF(LEVEL_INFO, "skipping RestoreDisplayState on VGPU (0x%x)\n",
pGpu->gpuId);
return;
}

Some files were not shown because too many files have changed in this diff Show More