-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.cpp
74 lines (58 loc) · 1.72 KB
/
main.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
#include "bitonic_sort.cuh"
#include <cuda_runtime.h>
#include <stdio.h>
#include <stdlib.h>
// Function to check if the array is sorted
bool isSorted(int *arr, int size) {
for (int i = 1; i < size; i++) {
if (arr[i] < arr[i - 1])
return false;
}
return true;
}
int main() {
const int SIZE = 1048576; // Must be a multiple of 32 for this example
// Allocate and initialize host array
int *h_arr = new int[SIZE];
for (int i = 0; i < SIZE; i++) {
h_arr[i] = rand() % 1000; // Random integers between 0 and 999
}
// Allocate device array
int *d_arr;
cudaMalloc(&d_arr, SIZE * sizeof(int));
// Copy host array to device
cudaMemcpy(d_arr, h_arr, SIZE * sizeof(int), cudaMemcpyHostToDevice);
// Create CUDA events for timing
cudaEvent_t start, stop;
cudaEventCreate(&start);
cudaEventCreate(&stop);
// Record the start event
cudaEventRecord(start, nullptr);
// Launch kernel
launchBitonicSort(d_arr, SIZE);
// Record the stop event
cudaEventRecord(stop, nullptr);
cudaEventSynchronize(stop);
// Calculate elapsed time
float milliseconds = 0;
cudaEventElapsedTime(&milliseconds, start, stop);
// Copy result back to host
cudaMemcpy(h_arr, d_arr, SIZE * sizeof(int), cudaMemcpyDeviceToHost);
// Check if sorted
bool sorted = isSorted(h_arr, SIZE);
printf("Array is %s\n", sorted ? "sorted" : "not sorted");
// Print first few elements to verify
printf("First 32 elements: ");
for (int i = 0; i < 32; i++) {
printf("%d ", h_arr[i]);
}
printf("\n");
// Print timing information
printf("Kernel execution time: %f milliseconds\n", milliseconds);
// Clean up
delete[] h_arr;
cudaFree(d_arr);
cudaEventDestroy(start);
cudaEventDestroy(stop);
return 0;
}