-
Notifications
You must be signed in to change notification settings - Fork 41
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
242 additions
and
157 deletions.
There are no files selected for viewing
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -30,4 +30,4 @@ jobs: | |
|
||
- name: aplusb | ||
working-directory: ${{github.workspace}} | ||
run: ./build/enumDevices | ||
run: ./build/aplusb |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,2 @@ | ||
add_subdirectory(clew) | ||
add_subdirectory(utils) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
#ifdef __CLION_IDE__ | ||
// Этот include виден только для CLion парсера, это позволяет IDE "знать" ключевые слова вроде __kernel, __global | ||
// а также уметь подсказывать OpenCL методы, описанные в данном инклюде (такие как get_global_id(...) и get_local_id(...)) | ||
#include "clion_defines.cl" | ||
#endif | ||
|
||
#line 8// Седьмая строчка теперь восьмая (при ошибках компиляции в логе компиляции будут указаны корректные строчки благодаря этой директиве) | ||
|
||
// TODO 5 реализуйте кернел: | ||
// - От обычной функции кернел отличается модификатором __kernel и тем, что возвращаемый тип всегда void | ||
// - На вход дано три массива float чисел; единственное, чем они отличаются от обычных указателей - модификатором __global, т.к. это глобальная память устройства (видеопамять) | ||
// - Четвертым и последним аргументом должно быть передано количество элементов в каждом массиве (unsigned int, главное, чтобы тип был согласован с типом в соответствующем clSetKernelArg в T0D0 10) | ||
|
||
__kernel void aplusb(...) { | ||
// Узнать, какой workItem выполняется в этом потоке поможет функция get_global_id | ||
// см. в документации https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/ | ||
// OpenCL Compiler -> Built-in Functions -> Work-Item Functions | ||
|
||
// P.S. В общем случае количество элементов для сложения может быть некратно размеру WorkGroup, тогда размер рабочего пространства округлен вверх от числа элементов до кратности на размер WorkGroup | ||
// и в таком случае, если сделать обращение к массиву просто по индексу=get_global_id(0), будет undefined behaviour (вплоть до повисания ОС) | ||
// поэтому нужно либо дополнить массив данных длиной до кратности размеру рабочей группы, | ||
// либо сделать return в кернеле до обращения к данным в тех WorkItems, где get_global_id(0) выходит за границы данных (явной проверкой) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
#ifndef clion_defines_cl // pragma once | ||
#define clion_defines_cl | ||
|
||
#ifdef __CLION_IDE__ | ||
|
||
#define __kernel | ||
#define __global | ||
#define __local | ||
#define __constant | ||
#define __private | ||
|
||
#define half float | ||
|
||
struct float2 { float x; }; | ||
struct float3 { float x, y, z; }; | ||
struct float4 { float x, y, z, w; }; | ||
|
||
// https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/commonFunctions.html | ||
#define gentype float | ||
gentype clamp (gentype x, float minval, float maxval); | ||
gentype degrees (gentype radians); | ||
gentype max (gentype x, gentype y); | ||
gentype min (gentype x, gentype y); | ||
gentype mix (gentype x, gentype y, gentype a); | ||
gentype radians (gentype degrees); | ||
gentype sign (gentype x); | ||
gentype smoothstep (gentype edge0, gentype edge1, gentype x); | ||
gentype step (gentype edge, gentype x); | ||
#undef gentype | ||
|
||
// https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/barrier.html | ||
enum cl_mem_fence_flags | ||
{ | ||
CLK_LOCAL_MEM_FENCE, | ||
CLK_GLOBAL_MEM_FENCE | ||
}; | ||
void barrier(cl_mem_fence_flags flags); | ||
|
||
// https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/vectorDataLoadandStoreFunctions.html | ||
#define gentype float | ||
#define gentypen float4 | ||
gentypen vload4 (size_t offset, const gentype *p); | ||
void vstore4 (gentypen data, size_t offset, gentype *p); | ||
void vstore4 (gentypen data, size_t offset, gentype *p); | ||
#undef gentypen | ||
#undef gentype | ||
float vload_half (size_t offset, const half *p); | ||
float4 vload_half4 (size_t offset, const half *p); | ||
void vstore_half (float data, size_t offset, half *p); | ||
void vstore_half4 (float4 data, size_t offset, half *p); | ||
float4 vloada_half4 (size_t offset, const half *p); | ||
void vstorea_half4 (float4 data, size_t offset, half *p); | ||
|
||
// https://www.khronos.org/registry/OpenCL/sdk/1.2/docs/man/xhtml/workItemFunctions.html | ||
uint get_work_dim (); | ||
size_t get_global_size (uint dimindx); | ||
size_t get_global_id (uint dimindx); | ||
size_t get_local_size (uint dimindx); | ||
size_t get_local_id (uint dimindx); | ||
size_t get_num_groups (uint dimindx); | ||
size_t get_group_id (uint dimindx); | ||
size_t get_global_offset (uint dimindx); | ||
|
||
#ifndef STATIC_KEYWORD | ||
#define STATIC_KEYWORD static | ||
#endif | ||
|
||
#endif | ||
|
||
// 64 for AMD, 32 for NVidia, 8 for intel GPUs, 1 for CPU | ||
#define WARP_SIZE 64 | ||
|
||
#endif // pragma once |
Oops, something went wrong.