-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy path01-nbody.cu
151 lines (119 loc) · 3.99 KB
/
01-nbody.cu
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#include <math.h>
#include <stdio.h>
#include <stdlib.h>
#include "timer.h"
#include "check.h"
#define SOFTENING 1e-9f
/*
* Each body contains x, y, and z coordinate positions,
* as well as velocities in the x, y, and z directions.
*/
typedef struct { float x, y, z, vx, vy, vz; } Body;
/*
* Do not modify this function. A constraint of this exercise is
* that it remain a host function.
*/
void randomizeBodies(float *data, int n) {
for (int i = 0; i < n; i++) {
data[i] = 2.0f * (rand() / (float)RAND_MAX) - 1.0f;
}
}
/*
* This function calculates the gravitational impact of all bodies in the system
* on all others, but does not update their positions.
*/
__global__
void bodyForce(Body *p, float dt, int n) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
int stride = blockDim.x * gridDim.x;
for (int i = index; i < n; i += stride) {
float Fx = 0.0f; float Fy = 0.0f; float Fz = 0.0f;
for (int j = 0; j < n; j++) {
float dx = p[j].x - p[i].x;
float dy = p[j].y - p[i].y;
float dz = p[j].z - p[i].z;
float distSqr = dx*dx + dy*dy + dz*dz + SOFTENING;
float invDist = rsqrtf(distSqr);
float invDist3 = invDist * invDist * invDist;
Fx += dx * invDist3; Fy += dy * invDist3; Fz += dz * invDist3;
}
p[i].vx += dt*Fx;
p[i].vy += dt*Fy;
p[i].vz += dt*Fz;
}
}
__global__ void add(Body*p, float dt,int n) {
int index = threadIdx.x + blockIdx.x * blockDim.x;
int stride = blockDim.x * gridDim.x;
for (int i = index; i < n; i += stride) {
p[i].x += p[i].vx*dt;
p[i].y += p[i].vy*dt;
p[i].z += p[i].vz*dt;
}
}
int main(const int argc, const char** argv) {
int deviceId;
int numberOfSMs;
cudaGetDevice(&deviceId);
cudaDeviceGetAttribute(&numberOfSMs, cudaDevAttrMultiProcessorCount, deviceId);
/*
* Do not change the value for `nBodies` here. If you would like to modify it,
* pass values into the command line.
*/
int nBodies = 2<<11;
int salt = 0;
if (argc > 1) nBodies = 2<<atoi(argv[1]);
/*
* This salt is for assessment reasons. Tampering with it will result in automatic failure.
*/
if (argc > 2) salt = atoi(argv[2]);
const float dt = 0.01f; // time step
const int nIters = 10; // simulation iterations
int bytes = nBodies * sizeof(Body);
float *buf;
cudaMallocManaged(&buf, bytes);
//cudaMemPrefetchAsync(buf, bytes, deviceId);
Body *p = (Body*)buf;
/*
* As a constraint of this exercise, `randomizeBodies` must remain a host function.
*/
randomizeBodies(buf, 6 * nBodies); // Init pos / vel data
size_t threadsPerBlock = 256;
size_t numberOfBlocks = 32 * numberOfSMs;
double totalTime = 0.0;
/*
* This simulation will run for 10 cycles of time, calculating gravitational
* interaction amongst bodies, and adjusting their positions to reflect.
*/
/*******************************************************************/
// Do not modify these 2 lines of code.
for (int iter = 0; iter < nIters; iter++) {
StartTimer();
/*******************************************************************/
/*
* You will likely wish to refactor the work being done in `bodyForce`,
* as well as the work to integrate the positions.
*/
bodyForce<<< numberOfBlocks, threadsPerBlock >>>(p, dt, nBodies); // compute interbody forces
cudaDeviceSynchronize();
add<<< numberOfBlocks, threadsPerBlock >>>(p, dt, nBodies);
// Do not modify the code in this section.
const double tElapsed = GetTimer() / 1000.0;
totalTime += tElapsed;
}
cudaDeviceSynchronize();
double avgTime = totalTime / (double)(nIters);
float billionsOfOpsPerSecond = 1e-9 * nBodies * nBodies / avgTime;
#ifdef ASSESS
checkPerformance(buf, billionsOfOpsPerSecond, salt);
#else
checkAccuracy(buf, nBodies);
printf("%d Bodies: average %0.3f Billion Interactions / second\n", nBodies, billionsOfOpsPerSecond);
salt += 1;
#endif
/*******************************************************************/
/*
* Feel free to modify code below.
*/
cudaFree(buf);
}