From 76f3c5f23160ae1ec575be46eb5689d345aa58bb Mon Sep 17 00:00:00 2001 From: Weicheng Date: Wed, 18 Sep 2024 19:25:12 +0800 Subject: [PATCH] List important device properties. --- programming/cuda.md | 54 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) diff --git a/programming/cuda.md b/programming/cuda.md index b1c3ee0..7f3139b 100644 --- a/programming/cuda.md +++ b/programming/cuda.md @@ -54,6 +54,60 @@ struct dim3 { }; ``` +## Device Properties + +```c +#include + +int main() { + // ... + int deviceCount = 0; + cudaGetDeviceCount(&deviceCount); + + int dev = 0; + cudaSetDevice(dev); + + int driverVersion = 0; + cudaDriverGetVersion(&driverVersion); + + int runtimeVersion = 0; + cudaRuntimeGetVersion(&runtimeVersion); + + cudaDeviceProp deviceProp; + CHECK(cudaGetDeviceProperties(&deviceProp, dev)); + + // ... +} +``` + +其中 [`cudaDeviceProp`](https://docs.nvidia.com/cuda/cuda-runtime-api/structcudaDeviceProp.html) 型变量的主要成员包括: + +| 类型 | 成员名 | 含义 | 典型值 (A100 80GB PCIe) | +| :--: | :----: | :--: | :--: | +| `int` | `clockRate` | (deprecated) clock frequency in kHz | 1.41 GHz | +| `int` | `l2CacheSize` | size of L2 cache in bytes | 41943040 | +| `int` | `major`, `minor` | major and minor compute capability | 12.2 | +| `int` | `maxThreadsPerMultiProcessor` | maximum resident threads per multiprocessor | 2048 | +| `int` | `maxThreadsPerBlock` | maximum number of threads per block | 1024 | +| `int` | `memoryclockRate` | (deprecated) peak memory clock frequency in kHz | 1512 MHz | +| `int` | `memoryBusWidth` | global memory bus width in bits | 5120-bit | +| `int` | `multiProcessorCount` | number of multiprocessors on device | 108 | +| `char[256]` | `name` | ASCII string identifying device | `"NVIDIA A100 80GB PCIe"` | +| `int` | `regsPerBlock` | 32-bit registers available per block | 65536 | +| `int` | `sharedMemPerBlock` | shared memory available per block in bytes | 49152 | +| `size_t` | `totalGlobalMem` | global memory available on device in bytes | 79.15 GBytes | +| `int` | `warpSize` | warp size in threads | 32 | + +也可以用 `nvidia-smi` 命令获取 GPU 信息: + +```shell +# 显式已每个已安装 GPU 的 ID +nvidia-smi -L + +# 显式 MEMORY | UTILIZATION | CLOCK 相关信息 +nvidia-smi -q -i 0 -d [ MEMORY | UTILIZATION | CLOCK ] +``` + ## *Hello, world* in CUDA [`hello.cu`](./cuda/hello.cu) 关键行: