Commit d5327ddc authored by Jeffrey Wilcke's avatar Jeffrey Wilcke

Merge pull request #1869 from Gustav-Simonsson/gpu_miner

all: Add GPU mining, disabled by default
parents b7477540 ec6a548e
......@@ -16,8 +16,8 @@
},
{
"ImportPath": "github.com/ethereum/ethash",
"Comment": "v23.1-234-g062e40a",
"Rev": "062e40a1a1671f5a5102862b56e4c56f68a732f5"
"Comment": "v23.1-235-gb39e007",
"Rev": "b39e007d393ab5945b4c0748a7415b7e31c5db04"
},
{
"ImportPath": "github.com/fatih/color",
......
/*
Package cl provides a binding to the OpenCL api. It's mostly a low-level
wrapper that avoids adding functionality while still making the interface
a little more friendly and easy to use.
Resource life-cycle management:
For any CL object that gets created (buffer, queue, kernel, etc..) you should
call object.Release() when finished with it to free the CL resources. This
explicitely calls the related clXXXRelease method for the type. However,
as a fallback there is a finalizer set for every resource item that takes
care of it (eventually) if Release isn't called. In this way you can have
better control over the life cycle of resources while having a fall back
to avoid leaks. This is similar to how file handles and such are handled
in the Go standard packages.
*/
package cl
// #include "headers/1.2/opencl.h"
// #cgo CFLAGS: -Iheaders/1.2
// #cgo darwin LDFLAGS: -framework OpenCL
// #cgo linux LDFLAGS: -lOpenCL
import "C"
import "errors"
var ErrUnsupported = errors.New("cl: unsupported")
package cl
import (
"math/rand"
"reflect"
"strings"
"testing"
)
var kernelSource = `
__kernel void square(
__global float* input,
__global float* output,
const unsigned int count)
{
int i = get_global_id(0);
if(i < count)
output[i] = input[i] * input[i];
}
`
func getObjectStrings(object interface{}) map[string]string {
v := reflect.ValueOf(object)
t := reflect.TypeOf(object)
strs := make(map[string]string)
numMethods := t.NumMethod()
for i := 0; i < numMethods; i++ {
method := t.Method(i)
if method.Type.NumIn() == 1 && method.Type.NumOut() == 1 && method.Type.Out(0).Kind() == reflect.String {
// this is a string-returning method with (presumably) only a pointer receiver parameter
// call it
outs := v.Method(i).Call([]reflect.Value{})
// put the result in our map
strs[method.Name] = (outs[0].Interface()).(string)
}
}
return strs
}
func TestPlatformStringsContainNoNULs(t *testing.T) {
platforms, err := GetPlatforms()
if err != nil {
t.Fatalf("Failed to get platforms: %+v", err)
}
for _, p := range platforms {
for key, value := range getObjectStrings(p) {
if strings.Contains(value, "\x00") {
t.Fatalf("platform string %q = %+q contains NUL", key, value)
}
}
}
}
func TestDeviceStringsContainNoNULs(t *testing.T) {
platforms, err := GetPlatforms()
if err != nil {
t.Fatalf("Failed to get platforms: %+v", err)
}
for _, p := range platforms {
devs, err := p.GetDevices(DeviceTypeAll)
if err != nil {
t.Fatalf("Failed to get devices for platform %q: %+v", p.Name(), err)
}
for _, d := range devs {
for key, value := range getObjectStrings(d) {
if strings.Contains(value, "\x00") {
t.Fatalf("device string %q = %+q contains NUL", key, value)
}
}
}
}
}
func TestHello(t *testing.T) {
var data [1024]float32
for i := 0; i < len(data); i++ {
data[i] = rand.Float32()
}
platforms, err := GetPlatforms()
if err != nil {
t.Fatalf("Failed to get platforms: %+v", err)
}
for i, p := range platforms {
t.Logf("Platform %d:", i)
t.Logf(" Name: %s", p.Name())
t.Logf(" Vendor: %s", p.Vendor())
t.Logf(" Profile: %s", p.Profile())
t.Logf(" Version: %s", p.Version())
t.Logf(" Extensions: %s", p.Extensions())
}
platform := platforms[0]
devices, err := platform.GetDevices(DeviceTypeAll)
if err != nil {
t.Fatalf("Failed to get devices: %+v", err)
}
if len(devices) == 0 {
t.Fatalf("GetDevices returned no devices")
}
deviceIndex := -1
for i, d := range devices {
if deviceIndex < 0 && d.Type() == DeviceTypeGPU {
deviceIndex = i
}
t.Logf("Device %d (%s): %s", i, d.Type(), d.Name())
t.Logf(" Address Bits: %d", d.AddressBits())
t.Logf(" Available: %+v", d.Available())
// t.Logf(" Built-In Kernels: %s", d.BuiltInKernels())
t.Logf(" Compiler Available: %+v", d.CompilerAvailable())
t.Logf(" Double FP Config: %s", d.DoubleFPConfig())
t.Logf(" Driver Version: %s", d.DriverVersion())
t.Logf(" Error Correction Supported: %+v", d.ErrorCorrectionSupport())
t.Logf(" Execution Capabilities: %s", d.ExecutionCapabilities())
t.Logf(" Extensions: %s", d.Extensions())
t.Logf(" Global Memory Cache Type: %s", d.GlobalMemCacheType())
t.Logf(" Global Memory Cacheline Size: %d KB", d.GlobalMemCachelineSize()/1024)
t.Logf(" Global Memory Size: %d MB", d.GlobalMemSize()/(1024*1024))
t.Logf(" Half FP Config: %s", d.HalfFPConfig())
t.Logf(" Host Unified Memory: %+v", d.HostUnifiedMemory())
t.Logf(" Image Support: %+v", d.ImageSupport())
t.Logf(" Image2D Max Dimensions: %d x %d", d.Image2DMaxWidth(), d.Image2DMaxHeight())
t.Logf(" Image3D Max Dimenionns: %d x %d x %d", d.Image3DMaxWidth(), d.Image3DMaxHeight(), d.Image3DMaxDepth())
// t.Logf(" Image Max Buffer Size: %d", d.ImageMaxBufferSize())
// t.Logf(" Image Max Array Size: %d", d.ImageMaxArraySize())
// t.Logf(" Linker Available: %+v", d.LinkerAvailable())
t.Logf(" Little Endian: %+v", d.EndianLittle())
t.Logf(" Local Mem Size Size: %d KB", d.LocalMemSize()/1024)
t.Logf(" Local Mem Type: %s", d.LocalMemType())
t.Logf(" Max Clock Frequency: %d", d.MaxClockFrequency())
t.Logf(" Max Compute Units: %d", d.MaxComputeUnits())
t.Logf(" Max Constant Args: %d", d.MaxConstantArgs())
t.Logf(" Max Constant Buffer Size: %d KB", d.MaxConstantBufferSize()/1024)
t.Logf(" Max Mem Alloc Size: %d KB", d.MaxMemAllocSize()/1024)
t.Logf(" Max Parameter Size: %d", d.MaxParameterSize())
t.Logf(" Max Read-Image Args: %d", d.MaxReadImageArgs())
t.Logf(" Max Samplers: %d", d.MaxSamplers())
t.Logf(" Max Work Group Size: %d", d.MaxWorkGroupSize())
t.Logf(" Max Work Item Dimensions: %d", d.MaxWorkItemDimensions())
t.Logf(" Max Work Item Sizes: %d", d.MaxWorkItemSizes())
t.Logf(" Max Write-Image Args: %d", d.MaxWriteImageArgs())
t.Logf(" Memory Base Address Alignment: %d", d.MemBaseAddrAlign())
t.Logf(" Native Vector Width Char: %d", d.NativeVectorWidthChar())
t.Logf(" Native Vector Width Short: %d", d.NativeVectorWidthShort())
t.Logf(" Native Vector Width Int: %d", d.NativeVectorWidthInt())
t.Logf(" Native Vector Width Long: %d", d.NativeVectorWidthLong())
t.Logf(" Native Vector Width Float: %d", d.NativeVectorWidthFloat())
t.Logf(" Native Vector Width Double: %d", d.NativeVectorWidthDouble())
t.Logf(" Native Vector Width Half: %d", d.NativeVectorWidthHalf())
t.Logf(" OpenCL C Version: %s", d.OpenCLCVersion())
// t.Logf(" Parent Device: %+v", d.ParentDevice())
t.Logf(" Profile: %s", d.Profile())
t.Logf(" Profiling Timer Resolution: %d", d.ProfilingTimerResolution())
t.Logf(" Vendor: %s", d.Vendor())
t.Logf(" Version: %s", d.Version())
}
if deviceIndex < 0 {
deviceIndex = 0
}
device := devices[deviceIndex]
t.Logf("Using device %d", deviceIndex)
context, err := CreateContext([]*Device{device})
if err != nil {
t.Fatalf("CreateContext failed: %+v", err)
}
// imageFormats, err := context.GetSupportedImageFormats(0, MemObjectTypeImage2D)
// if err != nil {
// t.Fatalf("GetSupportedImageFormats failed: %+v", err)
// }
// t.Logf("Supported image formats: %+v", imageFormats)
queue, err := context.CreateCommandQueue(device, 0)
if err != nil {
t.Fatalf("CreateCommandQueue failed: %+v", err)
}
program, err := context.CreateProgramWithSource([]string{kernelSource})
if err != nil {
t.Fatalf("CreateProgramWithSource failed: %+v", err)
}
if err := program.BuildProgram(nil, ""); err != nil {
t.Fatalf("BuildProgram failed: %+v", err)
}
kernel, err := program.CreateKernel("square")
if err != nil {
t.Fatalf("CreateKernel failed: %+v", err)
}
for i := 0; i < 3; i++ {
name, err := kernel.ArgName(i)
if err == ErrUnsupported {
break
} else if err != nil {
t.Errorf("GetKernelArgInfo for name failed: %+v", err)
break
} else {
t.Logf("Kernel arg %d: %s", i, name)
}
}
input, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
if err != nil {
t.Fatalf("CreateBuffer failed for input: %+v", err)
}
output, err := context.CreateEmptyBuffer(MemReadOnly, 4*len(data))
if err != nil {
t.Fatalf("CreateBuffer failed for output: %+v", err)
}
if _, err := queue.EnqueueWriteBufferFloat32(input, true, 0, data[:], nil); err != nil {
t.Fatalf("EnqueueWriteBufferFloat32 failed: %+v", err)
}
if err := kernel.SetArgs(input, output, uint32(len(data))); err != nil {
t.Fatalf("SetKernelArgs failed: %+v", err)
}
local, err := kernel.WorkGroupSize(device)
if err != nil {
t.Fatalf("WorkGroupSize failed: %+v", err)
}
t.Logf("Work group size: %d", local)
size, _ := kernel.PreferredWorkGroupSizeMultiple(nil)
t.Logf("Preferred Work Group Size Multiple: %d", size)
global := len(data)
d := len(data) % local
if d != 0 {
global += local - d
}
if _, err := queue.EnqueueNDRangeKernel(kernel, nil, []int{global}, []int{local}, nil); err != nil {
t.Fatalf("EnqueueNDRangeKernel failed: %+v", err)
}
if err := queue.Finish(); err != nil {
t.Fatalf("Finish failed: %+v", err)
}
results := make([]float32, len(data))
if _, err := queue.EnqueueReadBufferFloat32(output, true, 0, results, nil); err != nil {
t.Fatalf("EnqueueReadBufferFloat32 failed: %+v", err)
}
correct := 0
for i, v := range data {
if results[i] == v*v {
correct++
}
}
if correct != len(data) {
t.Fatalf("%d/%d correct values", correct, len(data))
}
}
package cl
// #include <stdlib.h>
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"runtime"
"unsafe"
)
const maxImageFormats = 256
type Context struct {
clContext C.cl_context
devices []*Device
}
type MemObject struct {
clMem C.cl_mem
size int
}
func releaseContext(c *Context) {
if c.clContext != nil {
C.clReleaseContext(c.clContext)
c.clContext = nil
}
}
func releaseMemObject(b *MemObject) {
if b.clMem != nil {
C.clReleaseMemObject(b.clMem)
b.clMem = nil
}
}
func newMemObject(mo C.cl_mem, size int) *MemObject {
memObject := &MemObject{clMem: mo, size: size}
runtime.SetFinalizer(memObject, releaseMemObject)
return memObject
}
func (b *MemObject) Release() {
releaseMemObject(b)
}
// TODO: properties
func CreateContext(devices []*Device) (*Context, error) {
deviceIds := buildDeviceIdList(devices)
var err C.cl_int
clContext := C.clCreateContext(nil, C.cl_uint(len(devices)), &deviceIds[0], nil, nil, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clContext == nil {
return nil, ErrUnknown
}
context := &Context{clContext: clContext, devices: devices}
runtime.SetFinalizer(context, releaseContext)
return context, nil
}
func (ctx *Context) GetSupportedImageFormats(flags MemFlag, imageType MemObjectType) ([]ImageFormat, error) {
var formats [maxImageFormats]C.cl_image_format
var nFormats C.cl_uint
if err := C.clGetSupportedImageFormats(ctx.clContext, C.cl_mem_flags(flags), C.cl_mem_object_type(imageType), maxImageFormats, &formats[0], &nFormats); err != C.CL_SUCCESS {
return nil, toError(err)
}
fmts := make([]ImageFormat, nFormats)
for i, f := range formats[:nFormats] {
fmts[i] = ImageFormat{
ChannelOrder: ChannelOrder(f.image_channel_order),
ChannelDataType: ChannelDataType(f.image_channel_data_type),
}
}
return fmts, nil
}
func (ctx *Context) CreateCommandQueue(device *Device, properties CommandQueueProperty) (*CommandQueue, error) {
var err C.cl_int
clQueue := C.clCreateCommandQueue(ctx.clContext, device.id, C.cl_command_queue_properties(properties), &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clQueue == nil {
return nil, ErrUnknown
}
commandQueue := &CommandQueue{clQueue: clQueue, device: device}
runtime.SetFinalizer(commandQueue, releaseCommandQueue)
return commandQueue, nil
}
func (ctx *Context) CreateProgramWithSource(sources []string) (*Program, error) {
cSources := make([]*C.char, len(sources))
for i, s := range sources {
cs := C.CString(s)
cSources[i] = cs
defer C.free(unsafe.Pointer(cs))
}
var err C.cl_int
clProgram := C.clCreateProgramWithSource(ctx.clContext, C.cl_uint(len(sources)), &cSources[0], nil, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clProgram == nil {
return nil, ErrUnknown
}
program := &Program{clProgram: clProgram, devices: ctx.devices}
runtime.SetFinalizer(program, releaseProgram)
return program, nil
}
func (ctx *Context) CreateBufferUnsafe(flags MemFlag, size int, dataPtr unsafe.Pointer) (*MemObject, error) {
var err C.cl_int
clBuffer := C.clCreateBuffer(ctx.clContext, C.cl_mem_flags(flags), C.size_t(size), dataPtr, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clBuffer == nil {
return nil, ErrUnknown
}
return newMemObject(clBuffer, size), nil
}
func (ctx *Context) CreateEmptyBuffer(flags MemFlag, size int) (*MemObject, error) {
return ctx.CreateBufferUnsafe(flags, size, nil)
}
func (ctx *Context) CreateEmptyBufferFloat32(flags MemFlag, size int) (*MemObject, error) {
return ctx.CreateBufferUnsafe(flags, 4*size, nil)
}
func (ctx *Context) CreateBuffer(flags MemFlag, data []byte) (*MemObject, error) {
return ctx.CreateBufferUnsafe(flags, len(data), unsafe.Pointer(&data[0]))
}
//float64
func (ctx *Context) CreateBufferFloat32(flags MemFlag, data []float32) (*MemObject, error) {
return ctx.CreateBufferUnsafe(flags, 4*len(data), unsafe.Pointer(&data[0]))
}
func (ctx *Context) CreateUserEvent() (*Event, error) {
var err C.cl_int
clEvent := C.clCreateUserEvent(ctx.clContext, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
return newEvent(clEvent), nil
}
func (ctx *Context) Release() {
releaseContext(ctx)
}
// http://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/clCreateSubBuffer.html
// func (memObject *MemObject) CreateSubBuffer(flags MemFlag, bufferCreateType BufferCreateType, )
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #include "cl_ext.h"
// #endif
import "C"
import (
"strings"
"unsafe"
)
const maxDeviceCount = 64
type DeviceType uint
const (
DeviceTypeCPU DeviceType = C.CL_DEVICE_TYPE_CPU
DeviceTypeGPU DeviceType = C.CL_DEVICE_TYPE_GPU
DeviceTypeAccelerator DeviceType = C.CL_DEVICE_TYPE_ACCELERATOR
DeviceTypeDefault DeviceType = C.CL_DEVICE_TYPE_DEFAULT
DeviceTypeAll DeviceType = C.CL_DEVICE_TYPE_ALL
)
type FPConfig int
const (
FPConfigDenorm FPConfig = C.CL_FP_DENORM // denorms are supported
FPConfigInfNaN FPConfig = C.CL_FP_INF_NAN // INF and NaNs are supported
FPConfigRoundToNearest FPConfig = C.CL_FP_ROUND_TO_NEAREST // round to nearest even rounding mode supported
FPConfigRoundToZero FPConfig = C.CL_FP_ROUND_TO_ZERO // round to zero rounding mode supported
FPConfigRoundToInf FPConfig = C.CL_FP_ROUND_TO_INF // round to positive and negative infinity rounding modes supported
FPConfigFMA FPConfig = C.CL_FP_FMA // IEEE754-2008 fused multiply-add is supported
FPConfigSoftFloat FPConfig = C.CL_FP_SOFT_FLOAT // Basic floating-point operations (such as addition, subtraction, multiplication) are implemented in software
)
var fpConfigNameMap = map[FPConfig]string{
FPConfigDenorm: "Denorm",
FPConfigInfNaN: "InfNaN",
FPConfigRoundToNearest: "RoundToNearest",
FPConfigRoundToZero: "RoundToZero",
FPConfigRoundToInf: "RoundToInf",
FPConfigFMA: "FMA",
FPConfigSoftFloat: "SoftFloat",
}
func (c FPConfig) String() string {
var parts []string
for bit, name := range fpConfigNameMap {
if c&bit != 0 {
parts = append(parts, name)
}
}
if parts == nil {
return ""
}
return strings.Join(parts, "|")
}
func (dt DeviceType) String() string {
var parts []string
if dt&DeviceTypeCPU != 0 {
parts = append(parts, "CPU")
}
if dt&DeviceTypeGPU != 0 {
parts = append(parts, "GPU")
}
if dt&DeviceTypeAccelerator != 0 {
parts = append(parts, "Accelerator")
}
if dt&DeviceTypeDefault != 0 {
parts = append(parts, "Default")
}
if parts == nil {
parts = append(parts, "None")
}
return strings.Join(parts, "|")
}
type Device struct {
id C.cl_device_id
}
func buildDeviceIdList(devices []*Device) []C.cl_device_id {
deviceIds := make([]C.cl_device_id, len(devices))
for i, d := range devices {
deviceIds[i] = d.id
}
return deviceIds
}
// Obtain the list of devices available on a platform. 'platform' refers
// to the platform returned by GetPlatforms or can be nil. If platform
// is nil, the behavior is implementation-defined.
func GetDevices(platform *Platform, deviceType DeviceType) ([]*Device, error) {
var deviceIds [maxDeviceCount]C.cl_device_id
var numDevices C.cl_uint
var platformId C.cl_platform_id
if platform != nil {
platformId = platform.id
}
if err := C.clGetDeviceIDs(platformId, C.cl_device_type(deviceType), C.cl_uint(maxDeviceCount), &deviceIds[0], &numDevices); err != C.CL_SUCCESS {
return nil, toError(err)
}
if numDevices > maxDeviceCount {
numDevices = maxDeviceCount
}
devices := make([]*Device, numDevices)
for i := 0; i < int(numDevices); i++ {
devices[i] = &Device{id: deviceIds[i]}
}
return devices, nil
}
func (d *Device) nullableId() C.cl_device_id {
if d == nil {
return nil
}
return d.id
}
func (d *Device) GetInfoString(param C.cl_device_info, panicOnError bool) (string, error) {
var strC [1024]C.char
var strN C.size_t
if err := C.clGetDeviceInfo(d.id, param, 1024, unsafe.Pointer(&strC), &strN); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return "", toError(err)
}
// OpenCL strings are NUL-terminated, and the terminator is included in strN
// Go strings aren't NUL-terminated, so subtract 1 from the length
return C.GoStringN((*C.char)(unsafe.Pointer(&strC)), C.int(strN-1)), nil
}
func (d *Device) getInfoUint(param C.cl_device_info, panicOnError bool) (uint, error) {
var val C.cl_uint
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return 0, toError(err)
}
return uint(val), nil
}
func (d *Device) getInfoSize(param C.cl_device_info, panicOnError bool) (int, error) {
var val C.size_t
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return 0, toError(err)
}
return int(val), nil
}
func (d *Device) getInfoUlong(param C.cl_device_info, panicOnError bool) (int64, error) {
var val C.cl_ulong
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return 0, toError(err)
}
return int64(val), nil
}
func (d *Device) getInfoBool(param C.cl_device_info, panicOnError bool) (bool, error) {
var val C.cl_bool
if err := C.clGetDeviceInfo(d.id, param, C.size_t(unsafe.Sizeof(val)), unsafe.Pointer(&val), nil); err != C.CL_SUCCESS {
if panicOnError {
panic("Should never fail")
}
return false, toError(err)
}
return val == C.CL_TRUE, nil
}
func (d *Device) Name() string {
str, _ := d.GetInfoString(C.CL_DEVICE_NAME, true)
return str
}
func (d *Device) Vendor() string {
str, _ := d.GetInfoString(C.CL_DEVICE_VENDOR, true)
return str
}
func (d *Device) Extensions() string {
str, _ := d.GetInfoString(C.CL_DEVICE_EXTENSIONS, true)
return str
}
func (d *Device) OpenCLCVersion() string {
str, _ := d.GetInfoString(C.CL_DEVICE_OPENCL_C_VERSION, true)
return str
}
func (d *Device) Profile() string {
str, _ := d.GetInfoString(C.CL_DEVICE_PROFILE, true)
return str
}
func (d *Device) Version() string {
str, _ := d.GetInfoString(C.CL_DEVICE_VERSION, true)
return str
}
func (d *Device) DriverVersion() string {
str, _ := d.GetInfoString(C.CL_DRIVER_VERSION, true)
return str
}
// The default compute device address space size specified as an
// unsigned integer value in bits. Currently supported values are 32 or 64 bits.
func (d *Device) AddressBits() int {
val, _ := d.getInfoUint(C.CL_DEVICE_ADDRESS_BITS, true)
return int(val)
}
// Size of global memory cache line in bytes.
func (d *Device) GlobalMemCachelineSize() int {
val, _ := d.getInfoUint(C.CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, true)
return int(val)
}
// Maximum configured clock frequency of the device in MHz.
func (d *Device) MaxClockFrequency() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CLOCK_FREQUENCY, true)
return int(val)
}
// The number of parallel compute units on the OpenCL device.
// A work-group executes on a single compute unit. The minimum value is 1.
func (d *Device) MaxComputeUnits() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_COMPUTE_UNITS, true)
return int(val)
}
// Max number of arguments declared with the __constant qualifier in a kernel.
// The minimum value is 8 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxConstantArgs() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_CONSTANT_ARGS, true)
return int(val)
}
// Max number of simultaneous image objects that can be read by a kernel.
// The minimum value is 128 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) MaxReadImageArgs() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_READ_IMAGE_ARGS, true)
return int(val)
}
// Maximum number of samplers that can be used in a kernel. The minimum
// value is 16 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE. (Also see sampler_t.)
func (d *Device) MaxSamplers() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_SAMPLERS, true)
return int(val)
}
// Maximum dimensions that specify the global and local work-item IDs used
// by the data parallel execution model. (Refer to clEnqueueNDRangeKernel).
// The minimum value is 3 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxWorkItemDimensions() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, true)
return int(val)
}
// Max number of simultaneous image objects that can be written to by a
// kernel. The minimum value is 8 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) MaxWriteImageArgs() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MAX_WRITE_IMAGE_ARGS, true)
return int(val)
}
// The minimum value is the size (in bits) of the largest OpenCL built-in
// data type supported by the device (long16 in FULL profile, long16 or
// int16 in EMBEDDED profile) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MemBaseAddrAlign() int {
val, _ := d.getInfoUint(C.CL_DEVICE_MEM_BASE_ADDR_ALIGN, true)
return int(val)
}
func (d *Device) NativeVectorWidthChar() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, true)
return int(val)
}
func (d *Device) NativeVectorWidthShort() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, true)
return int(val)
}
func (d *Device) NativeVectorWidthInt() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, true)
return int(val)
}
func (d *Device) NativeVectorWidthLong() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, true)
return int(val)
}
func (d *Device) NativeVectorWidthFloat() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, true)
return int(val)
}
func (d *Device) NativeVectorWidthDouble() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, true)
return int(val)
}
func (d *Device) NativeVectorWidthHalf() int {
val, _ := d.getInfoUint(C.CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, true)
return int(val)
}
// Max height of 2D image in pixels. The minimum value is 8192
// if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image2DMaxHeight() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_HEIGHT, true)
return int(val)
}
// Max width of 2D image or 1D image not created from a buffer object in
// pixels. The minimum value is 8192 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image2DMaxWidth() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE2D_MAX_WIDTH, true)
return int(val)
}
// Max depth of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image3DMaxDepth() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_DEPTH, true)
return int(val)
}
// Max height of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image3DMaxHeight() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_HEIGHT, true)
return int(val)
}
// Max width of 3D image in pixels. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) Image3DMaxWidth() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE3D_MAX_WIDTH, true)
return int(val)
}
// Max size in bytes of the arguments that can be passed to a kernel. The
// minimum value is 1024 for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
// For this minimum value, only a maximum of 128 arguments can be passed to a kernel.
func (d *Device) MaxParameterSize() int {
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_PARAMETER_SIZE, true)
return int(val)
}
// Maximum number of work-items in a work-group executing a kernel on a
// single compute unit, using the data parallel execution model. (Refer
// to clEnqueueNDRangeKernel). The minimum value is 1.
func (d *Device) MaxWorkGroupSize() int {
val, _ := d.getInfoSize(C.CL_DEVICE_MAX_WORK_GROUP_SIZE, true)
return int(val)
}
// Describes the resolution of device timer. This is measured in nanoseconds.
func (d *Device) ProfilingTimerResolution() int {
val, _ := d.getInfoSize(C.CL_DEVICE_PROFILING_TIMER_RESOLUTION, true)
return int(val)
}
// Size of local memory arena in bytes. The minimum value is 32 KB for
// devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) LocalMemSize() int64 {
val, _ := d.getInfoUlong(C.CL_DEVICE_LOCAL_MEM_SIZE, true)
return val
}
// Max size in bytes of a constant buffer allocation. The minimum value is
// 64 KB for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxConstantBufferSize() int64 {
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, true)
return val
}
// Max size of memory object allocation in bytes. The minimum value is max
// (1/4th of CL_DEVICE_GLOBAL_MEM_SIZE, 128*1024*1024) for devices that are
// not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxMemAllocSize() int64 {
val, _ := d.getInfoUlong(C.CL_DEVICE_MAX_MEM_ALLOC_SIZE, true)
return val
}
// Size of global device memory in bytes.
func (d *Device) GlobalMemSize() int64 {
val, _ := d.getInfoUlong(C.CL_DEVICE_GLOBAL_MEM_SIZE, true)
return val
}
func (d *Device) Available() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_AVAILABLE, true)
return val
}
func (d *Device) CompilerAvailable() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_COMPILER_AVAILABLE, true)
return val
}
func (d *Device) EndianLittle() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_ENDIAN_LITTLE, true)
return val
}
// Is CL_TRUE if the device implements error correction for all
// accesses to compute device memory (global and constant). Is
// CL_FALSE if the device does not implement such error correction.
func (d *Device) ErrorCorrectionSupport() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_ERROR_CORRECTION_SUPPORT, true)
return val
}
func (d *Device) HostUnifiedMemory() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_HOST_UNIFIED_MEMORY, true)
return val
}
func (d *Device) ImageSupport() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_IMAGE_SUPPORT, true)
return val
}
func (d *Device) Type() DeviceType {
var deviceType C.cl_device_type
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_TYPE, C.size_t(unsafe.Sizeof(deviceType)), unsafe.Pointer(&deviceType), nil); err != C.CL_SUCCESS {
panic("Failed to get device type")
}
return DeviceType(deviceType)
}
// Describes double precision floating-point capability of the OpenCL device
func (d *Device) DoubleFPConfig() FPConfig {
var fpConfig C.cl_device_fp_config
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_DOUBLE_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil); err != C.CL_SUCCESS {
panic("Failed to get double FP config")
}
return FPConfig(fpConfig)
}
// Describes the OPTIONAL half precision floating-point capability of the OpenCL device
func (d *Device) HalfFPConfig() FPConfig {
var fpConfig C.cl_device_fp_config
err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_HALF_FP_CONFIG, C.size_t(unsafe.Sizeof(fpConfig)), unsafe.Pointer(&fpConfig), nil)
if err != C.CL_SUCCESS {
return FPConfig(0)
}
return FPConfig(fpConfig)
}
// Type of local memory supported. This can be set to CL_LOCAL implying dedicated
// local memory storage such as SRAM, or CL_GLOBAL. For custom devices, CL_NONE
// can also be returned indicating no local memory support.
func (d *Device) LocalMemType() LocalMemType {
var memType C.cl_device_local_mem_type
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_LOCAL_MEM_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
return LocalMemType(C.CL_NONE)
}
return LocalMemType(memType)
}
// Describes the execution capabilities of the device. The mandated minimum capability is CL_EXEC_KERNEL.
func (d *Device) ExecutionCapabilities() ExecCapability {
var execCap C.cl_device_exec_capabilities
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_EXECUTION_CAPABILITIES, C.size_t(unsafe.Sizeof(execCap)), unsafe.Pointer(&execCap), nil); err != C.CL_SUCCESS {
panic("Failed to get execution capabilities")
}
return ExecCapability(execCap)
}
func (d *Device) GlobalMemCacheType() MemCacheType {
var memType C.cl_device_mem_cache_type
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, C.size_t(unsafe.Sizeof(memType)), unsafe.Pointer(&memType), nil); err != C.CL_SUCCESS {
return MemCacheType(C.CL_NONE)
}
return MemCacheType(memType)
}
// Maximum number of work-items that can be specified in each dimension of the work-group to clEnqueueNDRangeKernel.
//
// Returns n size_t entries, where n is the value returned by the query for CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS.
//
// The minimum value is (1, 1, 1) for devices that are not of type CL_DEVICE_TYPE_CUSTOM.
func (d *Device) MaxWorkItemSizes() []int {
dims := d.MaxWorkItemDimensions()
sizes := make([]C.size_t, dims)
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_MAX_WORK_ITEM_SIZES, C.size_t(int(unsafe.Sizeof(sizes[0]))*dims), unsafe.Pointer(&sizes[0]), nil); err != C.CL_SUCCESS {
panic("Failed to get max work item sizes")
}
intSizes := make([]int, dims)
for i, s := range sizes {
intSizes[i] = int(s)
}
return intSizes
}
// +build cl12
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import "unsafe"
const FPConfigCorrectlyRoundedDivideSqrt FPConfig = C.CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT
func init() {
fpConfigNameMap[FPConfigCorrectlyRoundedDivideSqrt] = "CorrectlyRoundedDivideSqrt"
}
func (d *Device) BuiltInKernels() string {
str, _ := d.getInfoString(C.CL_DEVICE_BUILT_IN_KERNELS, true)
return str
}
// Is CL_FALSE if the implementation does not have a linker available. Is CL_TRUE if the linker is available. This can be CL_FALSE for the embedded platform profile only. This must be CL_TRUE if CL_DEVICE_COMPILER_AVAILABLE is CL_TRUE
func (d *Device) LinkerAvailable() bool {
val, _ := d.getInfoBool(C.CL_DEVICE_LINKER_AVAILABLE, true)
return val
}
func (d *Device) ParentDevice() *Device {
var deviceId C.cl_device_id
if err := C.clGetDeviceInfo(d.id, C.CL_DEVICE_PARENT_DEVICE, C.size_t(unsafe.Sizeof(deviceId)), unsafe.Pointer(&deviceId), nil); err != C.CL_SUCCESS {
panic("ParentDevice failed")
}
if deviceId == nil {
return nil
}
return &Device{id: deviceId}
}
// Max number of pixels for a 1D image created from a buffer object. The minimum value is 65536 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE.
func (d *Device) ImageMaxBufferSize() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_BUFFER_SIZE, true)
return int(val)
}
// Max number of images in a 1D or 2D image array. The minimum value is 2048 if CL_DEVICE_IMAGE_SUPPORT is CL_TRUE
func (d *Device) ImageMaxArraySize() int {
val, _ := d.getInfoSize(C.CL_DEVICE_IMAGE_MAX_ARRAY_SIZE, true)
return int(val)
}
/*******************************************************************************
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
#ifndef __OPENCL_CL_H
#define __OPENCL_CL_H
#include <cl_platform.h>
#ifdef __cplusplus
extern "C" {
#endif
/******************************************************************************/
typedef struct _cl_platform_id * cl_platform_id;
typedef struct _cl_device_id * cl_device_id;
typedef struct _cl_context * cl_context;
typedef struct _cl_command_queue * cl_command_queue;
typedef struct _cl_mem * cl_mem;
typedef struct _cl_program * cl_program;
typedef struct _cl_kernel * cl_kernel;
typedef struct _cl_event * cl_event;
typedef struct _cl_sampler * cl_sampler;
typedef cl_uint cl_bool; /* WARNING! Unlike cl_ types in cl_platform.h, cl_bool is not guaranteed to be the same size as the bool in kernels. */
typedef cl_ulong cl_bitfield;
typedef cl_bitfield cl_device_type;
typedef cl_uint cl_platform_info;
typedef cl_uint cl_device_info;
typedef cl_bitfield cl_device_fp_config;
typedef cl_uint cl_device_mem_cache_type;
typedef cl_uint cl_device_local_mem_type;
typedef cl_bitfield cl_device_exec_capabilities;
typedef cl_bitfield cl_command_queue_properties;
typedef intptr_t cl_device_partition_property;
typedef cl_bitfield cl_device_affinity_domain;
typedef intptr_t cl_context_properties;
typedef cl_uint cl_context_info;
typedef cl_uint cl_command_queue_info;
typedef cl_uint cl_channel_order;
typedef cl_uint cl_channel_type;
typedef cl_bitfield cl_mem_flags;
typedef cl_uint cl_mem_object_type;
typedef cl_uint cl_mem_info;
typedef cl_bitfield cl_mem_migration_flags;
typedef cl_uint cl_image_info;
typedef cl_uint cl_buffer_create_type;
typedef cl_uint cl_addressing_mode;
typedef cl_uint cl_filter_mode;
typedef cl_uint cl_sampler_info;
typedef cl_bitfield cl_map_flags;
typedef cl_uint cl_program_info;
typedef cl_uint cl_program_build_info;
typedef cl_uint cl_program_binary_type;
typedef cl_int cl_build_status;
typedef cl_uint cl_kernel_info;
typedef cl_uint cl_kernel_arg_info;
typedef cl_uint cl_kernel_arg_address_qualifier;
typedef cl_uint cl_kernel_arg_access_qualifier;
typedef cl_bitfield cl_kernel_arg_type_qualifier;
typedef cl_uint cl_kernel_work_group_info;
typedef cl_uint cl_event_info;
typedef cl_uint cl_command_type;
typedef cl_uint cl_profiling_info;
typedef struct _cl_image_format {
cl_channel_order image_channel_order;
cl_channel_type image_channel_data_type;
} cl_image_format;
typedef struct _cl_image_desc {
cl_mem_object_type image_type;
size_t image_width;
size_t image_height;
size_t image_depth;
size_t image_array_size;
size_t image_row_pitch;
size_t image_slice_pitch;
cl_uint num_mip_levels;
cl_uint num_samples;
cl_mem buffer;
} cl_image_desc;
typedef struct _cl_buffer_region {
size_t origin;
size_t size;
} cl_buffer_region;
/******************************************************************************/
/* Error Codes */
#define CL_SUCCESS 0
#define CL_DEVICE_NOT_FOUND -1
#define CL_DEVICE_NOT_AVAILABLE -2
#define CL_COMPILER_NOT_AVAILABLE -3
#define CL_MEM_OBJECT_ALLOCATION_FAILURE -4
#define CL_OUT_OF_RESOURCES -5
#define CL_OUT_OF_HOST_MEMORY -6
#define CL_PROFILING_INFO_NOT_AVAILABLE -7
#define CL_MEM_COPY_OVERLAP -8
#define CL_IMAGE_FORMAT_MISMATCH -9
#define CL_IMAGE_FORMAT_NOT_SUPPORTED -10
#define CL_BUILD_PROGRAM_FAILURE -11
#define CL_MAP_FAILURE -12
#define CL_MISALIGNED_SUB_BUFFER_OFFSET -13
#define CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST -14
#define CL_COMPILE_PROGRAM_FAILURE -15
#define CL_LINKER_NOT_AVAILABLE -16
#define CL_LINK_PROGRAM_FAILURE -17
#define CL_DEVICE_PARTITION_FAILED -18
#define CL_KERNEL_ARG_INFO_NOT_AVAILABLE -19
#define CL_INVALID_VALUE -30
#define CL_INVALID_DEVICE_TYPE -31
#define CL_INVALID_PLATFORM -32
#define CL_INVALID_DEVICE -33
#define CL_INVALID_CONTEXT -34
#define CL_INVALID_QUEUE_PROPERTIES -35
#define CL_INVALID_COMMAND_QUEUE -36
#define CL_INVALID_HOST_PTR -37
#define CL_INVALID_MEM_OBJECT -38
#define CL_INVALID_IMAGE_FORMAT_DESCRIPTOR -39
#define CL_INVALID_IMAGE_SIZE -40
#define CL_INVALID_SAMPLER -41
#define CL_INVALID_BINARY -42
#define CL_INVALID_BUILD_OPTIONS -43
#define CL_INVALID_PROGRAM -44
#define CL_INVALID_PROGRAM_EXECUTABLE -45
#define CL_INVALID_KERNEL_NAME -46
#define CL_INVALID_KERNEL_DEFINITION -47
#define CL_INVALID_KERNEL -48
#define CL_INVALID_ARG_INDEX -49
#define CL_INVALID_ARG_VALUE -50
#define CL_INVALID_ARG_SIZE -51
#define CL_INVALID_KERNEL_ARGS -52
#define CL_INVALID_WORK_DIMENSION -53
#define CL_INVALID_WORK_GROUP_SIZE -54
#define CL_INVALID_WORK_ITEM_SIZE -55
#define CL_INVALID_GLOBAL_OFFSET -56
#define CL_INVALID_EVENT_WAIT_LIST -57
#define CL_INVALID_EVENT -58
#define CL_INVALID_OPERATION -59
#define CL_INVALID_GL_OBJECT -60
#define CL_INVALID_BUFFER_SIZE -61
#define CL_INVALID_MIP_LEVEL -62
#define CL_INVALID_GLOBAL_WORK_SIZE -63
#define CL_INVALID_PROPERTY -64
#define CL_INVALID_IMAGE_DESCRIPTOR -65
#define CL_INVALID_COMPILER_OPTIONS -66
#define CL_INVALID_LINKER_OPTIONS -67
#define CL_INVALID_DEVICE_PARTITION_COUNT -68
/* OpenCL Version */
#define CL_VERSION_1_0 1
#define CL_VERSION_1_1 1
#define CL_VERSION_1_2 1
/* cl_bool */
#define CL_FALSE 0
#define CL_TRUE 1
#define CL_BLOCKING CL_TRUE
#define CL_NON_BLOCKING CL_FALSE
/* cl_platform_info */
#define CL_PLATFORM_PROFILE 0x0900
#define CL_PLATFORM_VERSION 0x0901
#define CL_PLATFORM_NAME 0x0902
#define CL_PLATFORM_VENDOR 0x0903
#define CL_PLATFORM_EXTENSIONS 0x0904
/* cl_device_type - bitfield */
#define CL_DEVICE_TYPE_DEFAULT (1 << 0)
#define CL_DEVICE_TYPE_CPU (1 << 1)
#define CL_DEVICE_TYPE_GPU (1 << 2)
#define CL_DEVICE_TYPE_ACCELERATOR (1 << 3)
#define CL_DEVICE_TYPE_CUSTOM (1 << 4)
#define CL_DEVICE_TYPE_ALL 0xFFFFFFFF
/* cl_device_info */
#define CL_DEVICE_TYPE 0x1000
#define CL_DEVICE_VENDOR_ID 0x1001
#define CL_DEVICE_MAX_COMPUTE_UNITS 0x1002
#define CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS 0x1003
#define CL_DEVICE_MAX_WORK_GROUP_SIZE 0x1004
#define CL_DEVICE_MAX_WORK_ITEM_SIZES 0x1005
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR 0x1006
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT 0x1007
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT 0x1008
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG 0x1009
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT 0x100A
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE 0x100B
#define CL_DEVICE_MAX_CLOCK_FREQUENCY 0x100C
#define CL_DEVICE_ADDRESS_BITS 0x100D
#define CL_DEVICE_MAX_READ_IMAGE_ARGS 0x100E
#define CL_DEVICE_MAX_WRITE_IMAGE_ARGS 0x100F
#define CL_DEVICE_MAX_MEM_ALLOC_SIZE 0x1010
#define CL_DEVICE_IMAGE2D_MAX_WIDTH 0x1011
#define CL_DEVICE_IMAGE2D_MAX_HEIGHT 0x1012
#define CL_DEVICE_IMAGE3D_MAX_WIDTH 0x1013
#define CL_DEVICE_IMAGE3D_MAX_HEIGHT 0x1014
#define CL_DEVICE_IMAGE3D_MAX_DEPTH 0x1015
#define CL_DEVICE_IMAGE_SUPPORT 0x1016
#define CL_DEVICE_MAX_PARAMETER_SIZE 0x1017
#define CL_DEVICE_MAX_SAMPLERS 0x1018
#define CL_DEVICE_MEM_BASE_ADDR_ALIGN 0x1019
#define CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE 0x101A
#define CL_DEVICE_SINGLE_FP_CONFIG 0x101B
#define CL_DEVICE_GLOBAL_MEM_CACHE_TYPE 0x101C
#define CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE 0x101D
#define CL_DEVICE_GLOBAL_MEM_CACHE_SIZE 0x101E
#define CL_DEVICE_GLOBAL_MEM_SIZE 0x101F
#define CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE 0x1020
#define CL_DEVICE_MAX_CONSTANT_ARGS 0x1021
#define CL_DEVICE_LOCAL_MEM_TYPE 0x1022
#define CL_DEVICE_LOCAL_MEM_SIZE 0x1023
#define CL_DEVICE_ERROR_CORRECTION_SUPPORT 0x1024
#define CL_DEVICE_PROFILING_TIMER_RESOLUTION 0x1025
#define CL_DEVICE_ENDIAN_LITTLE 0x1026
#define CL_DEVICE_AVAILABLE 0x1027
#define CL_DEVICE_COMPILER_AVAILABLE 0x1028
#define CL_DEVICE_EXECUTION_CAPABILITIES 0x1029
#define CL_DEVICE_QUEUE_PROPERTIES 0x102A
#define CL_DEVICE_NAME 0x102B
#define CL_DEVICE_VENDOR 0x102C
#define CL_DRIVER_VERSION 0x102D
#define CL_DEVICE_PROFILE 0x102E
#define CL_DEVICE_VERSION 0x102F
#define CL_DEVICE_EXTENSIONS 0x1030
#define CL_DEVICE_PLATFORM 0x1031
#define CL_DEVICE_DOUBLE_FP_CONFIG 0x1032
/* 0x1033 reserved for CL_DEVICE_HALF_FP_CONFIG */
#define CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF 0x1034
#define CL_DEVICE_HOST_UNIFIED_MEMORY 0x1035
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR 0x1036
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT 0x1037
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_INT 0x1038
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG 0x1039
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT 0x103A
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE 0x103B
#define CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF 0x103C
#define CL_DEVICE_OPENCL_C_VERSION 0x103D
#define CL_DEVICE_LINKER_AVAILABLE 0x103E
#define CL_DEVICE_BUILT_IN_KERNELS 0x103F
#define CL_DEVICE_IMAGE_MAX_BUFFER_SIZE 0x1040
#define CL_DEVICE_IMAGE_MAX_ARRAY_SIZE 0x1041
#define CL_DEVICE_PARENT_DEVICE 0x1042
#define CL_DEVICE_PARTITION_MAX_SUB_DEVICES 0x1043
#define CL_DEVICE_PARTITION_PROPERTIES 0x1044
#define CL_DEVICE_PARTITION_AFFINITY_DOMAIN 0x1045
#define CL_DEVICE_PARTITION_TYPE 0x1046
#define CL_DEVICE_REFERENCE_COUNT 0x1047
#define CL_DEVICE_PREFERRED_INTEROP_USER_SYNC 0x1048
#define CL_DEVICE_PRINTF_BUFFER_SIZE 0x1049
#define CL_DEVICE_IMAGE_PITCH_ALIGNMENT 0x104A
#define CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT 0x104B
/* cl_device_fp_config - bitfield */
#define CL_FP_DENORM (1 << 0)
#define CL_FP_INF_NAN (1 << 1)
#define CL_FP_ROUND_TO_NEAREST (1 << 2)
#define CL_FP_ROUND_TO_ZERO (1 << 3)
#define CL_FP_ROUND_TO_INF (1 << 4)
#define CL_FP_FMA (1 << 5)
#define CL_FP_SOFT_FLOAT (1 << 6)
#define CL_FP_CORRECTLY_ROUNDED_DIVIDE_SQRT (1 << 7)
/* cl_device_mem_cache_type */
#define CL_NONE 0x0
#define CL_READ_ONLY_CACHE 0x1
#define CL_READ_WRITE_CACHE 0x2
/* cl_device_local_mem_type */
#define CL_LOCAL 0x1
#define CL_GLOBAL 0x2
/* cl_device_exec_capabilities - bitfield */
#define CL_EXEC_KERNEL (1 << 0)
#define CL_EXEC_NATIVE_KERNEL (1 << 1)
/* cl_command_queue_properties - bitfield */
#define CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE (1 << 0)
#define CL_QUEUE_PROFILING_ENABLE (1 << 1)
/* cl_context_info */
#define CL_CONTEXT_REFERENCE_COUNT 0x1080
#define CL_CONTEXT_DEVICES 0x1081
#define CL_CONTEXT_PROPERTIES 0x1082
#define CL_CONTEXT_NUM_DEVICES 0x1083
/* cl_context_properties */
#define CL_CONTEXT_PLATFORM 0x1084
#define CL_CONTEXT_INTEROP_USER_SYNC 0x1085
/* cl_device_partition_property */
#define CL_DEVICE_PARTITION_EQUALLY 0x1086
#define CL_DEVICE_PARTITION_BY_COUNTS 0x1087
#define CL_DEVICE_PARTITION_BY_COUNTS_LIST_END 0x0
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN 0x1088
/* cl_device_affinity_domain */
#define CL_DEVICE_AFFINITY_DOMAIN_NUMA (1 << 0)
#define CL_DEVICE_AFFINITY_DOMAIN_L4_CACHE (1 << 1)
#define CL_DEVICE_AFFINITY_DOMAIN_L3_CACHE (1 << 2)
#define CL_DEVICE_AFFINITY_DOMAIN_L2_CACHE (1 << 3)
#define CL_DEVICE_AFFINITY_DOMAIN_L1_CACHE (1 << 4)
#define CL_DEVICE_AFFINITY_DOMAIN_NEXT_PARTITIONABLE (1 << 5)
/* cl_command_queue_info */
#define CL_QUEUE_CONTEXT 0x1090
#define CL_QUEUE_DEVICE 0x1091
#define CL_QUEUE_REFERENCE_COUNT 0x1092
#define CL_QUEUE_PROPERTIES 0x1093
/* cl_mem_flags - bitfield */
#define CL_MEM_READ_WRITE (1 << 0)
#define CL_MEM_WRITE_ONLY (1 << 1)
#define CL_MEM_READ_ONLY (1 << 2)
#define CL_MEM_USE_HOST_PTR (1 << 3)
#define CL_MEM_ALLOC_HOST_PTR (1 << 4)
#define CL_MEM_COPY_HOST_PTR (1 << 5)
/* reserved (1 << 6) */
#define CL_MEM_HOST_WRITE_ONLY (1 << 7)
#define CL_MEM_HOST_READ_ONLY (1 << 8)
#define CL_MEM_HOST_NO_ACCESS (1 << 9)
/* cl_mem_migration_flags - bitfield */
#define CL_MIGRATE_MEM_OBJECT_HOST (1 << 0)
#define CL_MIGRATE_MEM_OBJECT_CONTENT_UNDEFINED (1 << 1)
/* cl_channel_order */
#define CL_R 0x10B0
#define CL_A 0x10B1
#define CL_RG 0x10B2
#define CL_RA 0x10B3
#define CL_RGB 0x10B4
#define CL_RGBA 0x10B5
#define CL_BGRA 0x10B6
#define CL_ARGB 0x10B7
#define CL_INTENSITY 0x10B8
#define CL_LUMINANCE 0x10B9
#define CL_Rx 0x10BA
#define CL_RGx 0x10BB
#define CL_RGBx 0x10BC
#define CL_DEPTH 0x10BD
#define CL_DEPTH_STENCIL 0x10BE
/* cl_channel_type */
#define CL_SNORM_INT8 0x10D0
#define CL_SNORM_INT16 0x10D1
#define CL_UNORM_INT8 0x10D2
#define CL_UNORM_INT16 0x10D3
#define CL_UNORM_SHORT_565 0x10D4
#define CL_UNORM_SHORT_555 0x10D5
#define CL_UNORM_INT_101010 0x10D6
#define CL_SIGNED_INT8 0x10D7
#define CL_SIGNED_INT16 0x10D8
#define CL_SIGNED_INT32 0x10D9
#define CL_UNSIGNED_INT8 0x10DA
#define CL_UNSIGNED_INT16 0x10DB
#define CL_UNSIGNED_INT32 0x10DC
#define CL_HALF_FLOAT 0x10DD
#define CL_FLOAT 0x10DE
#define CL_UNORM_INT24 0x10DF
/* cl_mem_object_type */
#define CL_MEM_OBJECT_BUFFER 0x10F0
#define CL_MEM_OBJECT_IMAGE2D 0x10F1
#define CL_MEM_OBJECT_IMAGE3D 0x10F2
#define CL_MEM_OBJECT_IMAGE2D_ARRAY 0x10F3
#define CL_MEM_OBJECT_IMAGE1D 0x10F4
#define CL_MEM_OBJECT_IMAGE1D_ARRAY 0x10F5
#define CL_MEM_OBJECT_IMAGE1D_BUFFER 0x10F6
/* cl_mem_info */
#define CL_MEM_TYPE 0x1100
#define CL_MEM_FLAGS 0x1101
#define CL_MEM_SIZE 0x1102
#define CL_MEM_HOST_PTR 0x1103
#define CL_MEM_MAP_COUNT 0x1104
#define CL_MEM_REFERENCE_COUNT 0x1105
#define CL_MEM_CONTEXT 0x1106
#define CL_MEM_ASSOCIATED_MEMOBJECT 0x1107
#define CL_MEM_OFFSET 0x1108
/* cl_image_info */
#define CL_IMAGE_FORMAT 0x1110
#define CL_IMAGE_ELEMENT_SIZE 0x1111
#define CL_IMAGE_ROW_PITCH 0x1112
#define CL_IMAGE_SLICE_PITCH 0x1113
#define CL_IMAGE_WIDTH 0x1114
#define CL_IMAGE_HEIGHT 0x1115
#define CL_IMAGE_DEPTH 0x1116
#define CL_IMAGE_ARRAY_SIZE 0x1117
#define CL_IMAGE_BUFFER 0x1118
#define CL_IMAGE_NUM_MIP_LEVELS 0x1119
#define CL_IMAGE_NUM_SAMPLES 0x111A
/* cl_addressing_mode */
#define CL_ADDRESS_NONE 0x1130
#define CL_ADDRESS_CLAMP_TO_EDGE 0x1131
#define CL_ADDRESS_CLAMP 0x1132
#define CL_ADDRESS_REPEAT 0x1133
#define CL_ADDRESS_MIRRORED_REPEAT 0x1134
/* cl_filter_mode */
#define CL_FILTER_NEAREST 0x1140
#define CL_FILTER_LINEAR 0x1141
/* cl_sampler_info */
#define CL_SAMPLER_REFERENCE_COUNT 0x1150
#define CL_SAMPLER_CONTEXT 0x1151
#define CL_SAMPLER_NORMALIZED_COORDS 0x1152
#define CL_SAMPLER_ADDRESSING_MODE 0x1153
#define CL_SAMPLER_FILTER_MODE 0x1154
/* cl_map_flags - bitfield */
#define CL_MAP_READ (1 << 0)
#define CL_MAP_WRITE (1 << 1)
#define CL_MAP_WRITE_INVALIDATE_REGION (1 << 2)
/* cl_program_info */
#define CL_PROGRAM_REFERENCE_COUNT 0x1160
#define CL_PROGRAM_CONTEXT 0x1161
#define CL_PROGRAM_NUM_DEVICES 0x1162
#define CL_PROGRAM_DEVICES 0x1163
#define CL_PROGRAM_SOURCE 0x1164
#define CL_PROGRAM_BINARY_SIZES 0x1165
#define CL_PROGRAM_BINARIES 0x1166
#define CL_PROGRAM_NUM_KERNELS 0x1167
#define CL_PROGRAM_KERNEL_NAMES 0x1168
/* cl_program_build_info */
#define CL_PROGRAM_BUILD_STATUS 0x1181
#define CL_PROGRAM_BUILD_OPTIONS 0x1182
#define CL_PROGRAM_BUILD_LOG 0x1183
#define CL_PROGRAM_BINARY_TYPE 0x1184
/* cl_program_binary_type */
#define CL_PROGRAM_BINARY_TYPE_NONE 0x0
#define CL_PROGRAM_BINARY_TYPE_COMPILED_OBJECT 0x1
#define CL_PROGRAM_BINARY_TYPE_LIBRARY 0x2
#define CL_PROGRAM_BINARY_TYPE_EXECUTABLE 0x4
/* cl_build_status */
#define CL_BUILD_SUCCESS 0
#define CL_BUILD_NONE -1
#define CL_BUILD_ERROR -2
#define CL_BUILD_IN_PROGRESS -3
/* cl_kernel_info */
#define CL_KERNEL_FUNCTION_NAME 0x1190
#define CL_KERNEL_NUM_ARGS 0x1191
#define CL_KERNEL_REFERENCE_COUNT 0x1192
#define CL_KERNEL_CONTEXT 0x1193
#define CL_KERNEL_PROGRAM 0x1194
#define CL_KERNEL_ATTRIBUTES 0x1195
/* cl_kernel_arg_info */
#define CL_KERNEL_ARG_ADDRESS_QUALIFIER 0x1196
#define CL_KERNEL_ARG_ACCESS_QUALIFIER 0x1197
#define CL_KERNEL_ARG_TYPE_NAME 0x1198
#define CL_KERNEL_ARG_TYPE_QUALIFIER 0x1199
#define CL_KERNEL_ARG_NAME 0x119A
/* cl_kernel_arg_address_qualifier */
#define CL_KERNEL_ARG_ADDRESS_GLOBAL 0x119B
#define CL_KERNEL_ARG_ADDRESS_LOCAL 0x119C
#define CL_KERNEL_ARG_ADDRESS_CONSTANT 0x119D
#define CL_KERNEL_ARG_ADDRESS_PRIVATE 0x119E
/* cl_kernel_arg_access_qualifier */
#define CL_KERNEL_ARG_ACCESS_READ_ONLY 0x11A0
#define CL_KERNEL_ARG_ACCESS_WRITE_ONLY 0x11A1
#define CL_KERNEL_ARG_ACCESS_READ_WRITE 0x11A2
#define CL_KERNEL_ARG_ACCESS_NONE 0x11A3
/* cl_kernel_arg_type_qualifer */
#define CL_KERNEL_ARG_TYPE_NONE 0
#define CL_KERNEL_ARG_TYPE_CONST (1 << 0)
#define CL_KERNEL_ARG_TYPE_RESTRICT (1 << 1)
#define CL_KERNEL_ARG_TYPE_VOLATILE (1 << 2)
/* cl_kernel_work_group_info */
#define CL_KERNEL_WORK_GROUP_SIZE 0x11B0
#define CL_KERNEL_COMPILE_WORK_GROUP_SIZE 0x11B1
#define CL_KERNEL_LOCAL_MEM_SIZE 0x11B2
#define CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE 0x11B3
#define CL_KERNEL_PRIVATE_MEM_SIZE 0x11B4
#define CL_KERNEL_GLOBAL_WORK_SIZE 0x11B5
/* cl_event_info */
#define CL_EVENT_COMMAND_QUEUE 0x11D0
#define CL_EVENT_COMMAND_TYPE 0x11D1
#define CL_EVENT_REFERENCE_COUNT 0x11D2
#define CL_EVENT_COMMAND_EXECUTION_STATUS 0x11D3
#define CL_EVENT_CONTEXT 0x11D4
/* cl_command_type */
#define CL_COMMAND_NDRANGE_KERNEL 0x11F0
#define CL_COMMAND_TASK 0x11F1
#define CL_COMMAND_NATIVE_KERNEL 0x11F2
#define CL_COMMAND_READ_BUFFER 0x11F3
#define CL_COMMAND_WRITE_BUFFER 0x11F4
#define CL_COMMAND_COPY_BUFFER 0x11F5
#define CL_COMMAND_READ_IMAGE 0x11F6
#define CL_COMMAND_WRITE_IMAGE 0x11F7
#define CL_COMMAND_COPY_IMAGE 0x11F8
#define CL_COMMAND_COPY_IMAGE_TO_BUFFER 0x11F9
#define CL_COMMAND_COPY_BUFFER_TO_IMAGE 0x11FA
#define CL_COMMAND_MAP_BUFFER 0x11FB
#define CL_COMMAND_MAP_IMAGE 0x11FC
#define CL_COMMAND_UNMAP_MEM_OBJECT 0x11FD
#define CL_COMMAND_MARKER 0x11FE
#define CL_COMMAND_ACQUIRE_GL_OBJECTS 0x11FF
#define CL_COMMAND_RELEASE_GL_OBJECTS 0x1200
#define CL_COMMAND_READ_BUFFER_RECT 0x1201
#define CL_COMMAND_WRITE_BUFFER_RECT 0x1202
#define CL_COMMAND_COPY_BUFFER_RECT 0x1203
#define CL_COMMAND_USER 0x1204
#define CL_COMMAND_BARRIER 0x1205
#define CL_COMMAND_MIGRATE_MEM_OBJECTS 0x1206
#define CL_COMMAND_FILL_BUFFER 0x1207
#define CL_COMMAND_FILL_IMAGE 0x1208
/* command execution status */
#define CL_COMPLETE 0x0
#define CL_RUNNING 0x1
#define CL_SUBMITTED 0x2
#define CL_QUEUED 0x3
/* cl_buffer_create_type */
#define CL_BUFFER_CREATE_TYPE_REGION 0x1220
/* cl_profiling_info */
#define CL_PROFILING_COMMAND_QUEUED 0x1280
#define CL_PROFILING_COMMAND_SUBMIT 0x1281
#define CL_PROFILING_COMMAND_START 0x1282
#define CL_PROFILING_COMMAND_END 0x1283
/********************************************************************************************************/
/* Platform API */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetPlatformIDs(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetPlatformInfo(cl_platform_id /* platform */,
cl_platform_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Device APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceIDs(cl_platform_id /* platform */,
cl_device_type /* device_type */,
cl_uint /* num_entries */,
cl_device_id * /* devices */,
cl_uint * /* num_devices */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceInfo(cl_device_id /* device */,
cl_device_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevices(cl_device_id /* in_device */,
const cl_device_partition_property * /* properties */,
cl_uint /* num_devices */,
cl_device_id * /* out_devices */,
cl_uint * /* num_devices_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDevice(cl_device_id /* device */) CL_API_SUFFIX__VERSION_1_2;
/* Context APIs */
extern CL_API_ENTRY cl_context CL_API_CALL
clCreateContext(const cl_context_properties * /* properties */,
cl_uint /* num_devices */,
const cl_device_id * /* devices */,
void (CL_CALLBACK * /* pfn_notify */)(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_context CL_API_CALL
clCreateContextFromType(const cl_context_properties * /* properties */,
cl_device_type /* device_type */,
void (CL_CALLBACK * /* pfn_notify*/ )(const char *, const void *, size_t, void *),
void * /* user_data */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseContext(cl_context /* context */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetContextInfo(cl_context /* context */,
cl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Command Queue APIs */
extern CL_API_ENTRY cl_command_queue CL_API_CALL
clCreateCommandQueue(cl_context /* context */,
cl_device_id /* device */,
cl_command_queue_properties /* properties */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseCommandQueue(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetCommandQueueInfo(cl_command_queue /* command_queue */,
cl_command_queue_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Memory Object APIs */
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
size_t /* size */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateSubBuffer(cl_mem /* buffer */,
cl_mem_flags /* flags */,
cl_buffer_create_type /* buffer_create_type */,
const void * /* buffer_create_info */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateImage(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
const cl_image_desc * /* image_desc */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseMemObject(cl_mem /* memobj */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSupportedImageFormats(cl_context /* context */,
cl_mem_flags /* flags */,
cl_mem_object_type /* image_type */,
cl_uint /* num_entries */,
cl_image_format * /* image_formats */,
cl_uint * /* num_image_formats */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetMemObjectInfo(cl_mem /* memobj */,
cl_mem_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetImageInfo(cl_mem /* image */,
cl_image_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetMemObjectDestructorCallback( cl_mem /* memobj */,
void (CL_CALLBACK * /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ ) CL_API_SUFFIX__VERSION_1_1;
/* Sampler APIs */
extern CL_API_ENTRY cl_sampler CL_API_CALL
clCreateSampler(cl_context /* context */,
cl_bool /* normalized_coords */,
cl_addressing_mode /* addressing_mode */,
cl_filter_mode /* filter_mode */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseSampler(cl_sampler /* sampler */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetSamplerInfo(cl_sampler /* sampler */,
cl_sampler_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Program Object APIs */
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithSource(cl_context /* context */,
cl_uint /* count */,
const char ** /* strings */,
const size_t * /* lengths */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithBinary(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const size_t * /* lengths */,
const unsigned char ** /* binaries */,
cl_int * /* binary_status */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_program CL_API_CALL
clCreateProgramWithBuiltInKernels(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* kernel_names */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseProgram(cl_program /* program */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clBuildProgram(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clCompileProgram(cl_program /* program */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
cl_uint /* num_input_headers */,
const cl_program * /* input_headers */,
const char ** /* header_include_names */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_program CL_API_CALL
clLinkProgram(cl_context /* context */,
cl_uint /* num_devices */,
const cl_device_id * /* device_list */,
const char * /* options */,
cl_uint /* num_input_programs */,
const cl_program * /* input_programs */,
void (CL_CALLBACK * /* pfn_notify */)(cl_program /* program */, void * /* user_data */),
void * /* user_data */,
cl_int * /* errcode_ret */ ) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clUnloadPlatformCompiler(cl_platform_id /* platform */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetProgramInfo(cl_program /* program */,
cl_program_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetProgramBuildInfo(cl_program /* program */,
cl_device_id /* device */,
cl_program_build_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Kernel Object APIs */
extern CL_API_ENTRY cl_kernel CL_API_CALL
clCreateKernel(cl_program /* program */,
const char * /* kernel_name */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateKernelsInProgram(cl_program /* program */,
cl_uint /* num_kernels */,
cl_kernel * /* kernels */,
cl_uint * /* num_kernels_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseKernel(cl_kernel /* kernel */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetKernelArg(cl_kernel /* kernel */,
cl_uint /* arg_index */,
size_t /* arg_size */,
const void * /* arg_value */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelInfo(cl_kernel /* kernel */,
cl_kernel_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelArgInfo(cl_kernel /* kernel */,
cl_uint /* arg_indx */,
cl_kernel_arg_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetKernelWorkGroupInfo(cl_kernel /* kernel */,
cl_device_id /* device */,
cl_kernel_work_group_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Event Object APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clWaitForEvents(cl_uint /* num_events */,
const cl_event * /* event_list */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetEventInfo(cl_event /* event */,
cl_event_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateUserEvent(cl_context /* context */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseEvent(cl_event /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetUserEventStatus(cl_event /* event */,
cl_int /* execution_status */) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clSetEventCallback( cl_event /* event */,
cl_int /* command_exec_callback_type */,
void (CL_CALLBACK * /* pfn_notify */)(cl_event, cl_int, void *),
void * /* user_data */) CL_API_SUFFIX__VERSION_1_1;
/* Profiling APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clGetEventProfilingInfo(cl_event /* event */,
cl_profiling_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
/* Flush and Finish APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clFlush(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clFinish(cl_command_queue /* command_queue */) CL_API_SUFFIX__VERSION_1_0;
/* Enqueued Commands APIs */
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
size_t /* offset */,
size_t /* size */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadBufferRect(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_read */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
size_t /* offset */,
size_t /* size */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteBufferRect(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_write */,
const size_t * /* buffer_offset */,
const size_t * /* host_offset */,
const size_t * /* region */,
size_t /* buffer_row_pitch */,
size_t /* buffer_slice_pitch */,
size_t /* host_row_pitch */,
size_t /* host_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueFillBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
const void * /* pattern */,
size_t /* pattern_size */,
size_t /* offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBuffer(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
size_t /* src_offset */,
size_t /* dst_offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBufferRect(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin */,
const size_t * /* dst_origin */,
const size_t * /* region */,
size_t /* src_row_pitch */,
size_t /* src_slice_pitch */,
size_t /* dst_row_pitch */,
size_t /* dst_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReadImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_read */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* row_pitch */,
size_t /* slice_pitch */,
void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueWriteImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_write */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t /* input_row_pitch */,
size_t /* input_slice_pitch */,
const void * /* ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueFillImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
const void * /* fill_color */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyImage(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_image */,
const size_t * /* src_origin[3] */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyImageToBuffer(cl_command_queue /* command_queue */,
cl_mem /* src_image */,
cl_mem /* dst_buffer */,
const size_t * /* src_origin[3] */,
const size_t * /* region[3] */,
size_t /* dst_offset */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueCopyBufferToImage(cl_command_queue /* command_queue */,
cl_mem /* src_buffer */,
cl_mem /* dst_image */,
size_t /* src_offset */,
const size_t * /* dst_origin[3] */,
const size_t * /* region[3] */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY void * CL_API_CALL
clEnqueueMapBuffer(cl_command_queue /* command_queue */,
cl_mem /* buffer */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
size_t /* offset */,
size_t /* size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY void * CL_API_CALL
clEnqueueMapImage(cl_command_queue /* command_queue */,
cl_mem /* image */,
cl_bool /* blocking_map */,
cl_map_flags /* map_flags */,
const size_t * /* origin[3] */,
const size_t * /* region[3] */,
size_t * /* image_row_pitch */,
size_t * /* image_slice_pitch */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueUnmapMemObject(cl_command_queue /* command_queue */,
cl_mem /* memobj */,
void * /* mapped_ptr */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMigrateMemObjects(cl_command_queue /* command_queue */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_objects */,
cl_mem_migration_flags /* flags */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueNDRangeKernel(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* work_dim */,
const size_t * /* global_work_offset */,
const size_t * /* global_work_size */,
const size_t * /* local_work_size */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueTask(cl_command_queue /* command_queue */,
cl_kernel /* kernel */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueNativeKernel(cl_command_queue /* command_queue */,
void (CL_CALLBACK * /*user_func*/)(void *),
void * /* args */,
size_t /* cb_args */,
cl_uint /* num_mem_objects */,
const cl_mem * /* mem_list */,
const void ** /* args_mem_loc */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueMarkerWithWaitList(cl_command_queue /* command_queue */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueBarrierWithWaitList(cl_command_queue /* command_queue */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_2;
/* Extension function access
*
* Returns the extension function address for the given function name,
* or NULL if a valid function can not be found. The client must
* check to make sure the address is not NULL, before using or
* calling the returned function address.
*/
extern CL_API_ENTRY void * CL_API_CALL
clGetExtensionFunctionAddressForPlatform(cl_platform_id /* platform */,
const char * /* func_name */) CL_API_SUFFIX__VERSION_1_2;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateImage2D(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_row_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateImage3D(cl_context /* context */,
cl_mem_flags /* flags */,
const cl_image_format * /* image_format */,
size_t /* image_width */,
size_t /* image_height */,
size_t /* image_depth */,
size_t /* image_row_pitch */,
size_t /* image_slice_pitch */,
void * /* host_ptr */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
clEnqueueMarker(cl_command_queue /* command_queue */,
cl_event * /* event */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
clEnqueueWaitForEvents(cl_command_queue /* command_queue */,
cl_uint /* num_events */,
const cl_event * /* event_list */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
clEnqueueBarrier(cl_command_queue /* command_queue */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int CL_API_CALL
clUnloadCompiler(void) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED void * CL_API_CALL
clGetExtensionFunctionAddress(const char * /* func_name */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_H */
/*******************************************************************************
* Copyright (c) 2008-2013 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11928 $ on $Date: 2010-07-13 09:04:56 -0700 (Tue, 13 Jul 2010) $ */
/* cl_ext.h contains OpenCL extensions which don't have external */
/* (OpenGL, D3D) dependencies. */
#ifndef __CL_EXT_H
#define __CL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#ifdef __APPLE__
#include <AvailabilityMacros.h>
#endif
#include <cl.h>
/* cl_khr_fp16 extension - no extension #define since it has no functions */
#define CL_DEVICE_HALF_FP_CONFIG 0x1033
/* Memory object destruction
*
* Apple extension for use to manage externally allocated buffers used with cl_mem objects with CL_MEM_USE_HOST_PTR
*
* Registers a user callback function that will be called when the memory object is deleted and its resources
* freed. Each call to clSetMemObjectCallbackFn registers the specified user callback function on a callback
* stack associated with memobj. The registered user callback functions are called in the reverse order in
* which they were registered. The user callback functions are called and then the memory object is deleted
* and its resources freed. This provides a mechanism for the application (and libraries) using memobj to be
* notified when the memory referenced by host_ptr, specified when the memory object is created and used as
* the storage bits for the memory object, can be reused or freed.
*
* The application may not call CL api's with the cl_mem object passed to the pfn_notify.
*
* Please check for the "cl_APPLE_SetMemObjectDestructor" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*/
#define cl_APPLE_SetMemObjectDestructor 1
cl_int CL_API_ENTRY clSetMemObjectDestructorAPPLE( cl_mem /* memobj */,
void (* /*pfn_notify*/)( cl_mem /* memobj */, void* /*user_data*/),
void * /*user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* Context Logging Functions
*
* The next three convenience functions are intended to be used as the pfn_notify parameter to clCreateContext().
* Please check for the "cl_APPLE_ContextLoggingFunctions" extension using clGetDeviceInfo(CL_DEVICE_EXTENSIONS)
* before using.
*
* clLogMessagesToSystemLog fowards on all log messages to the Apple System Logger
*/
#define cl_APPLE_ContextLoggingFunctions 1
extern void CL_API_ENTRY clLogMessagesToSystemLogAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStdout sends all log messages to the file descriptor stdout */
extern void CL_API_ENTRY clLogMessagesToStdoutAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/* clLogMessagesToStderr sends all log messages to the file descriptor stderr */
extern void CL_API_ENTRY clLogMessagesToStderrAPPLE( const char * /* errstr */,
const void * /* private_info */,
size_t /* cb */,
void * /* user_data */ ) CL_EXT_SUFFIX__VERSION_1_0;
/************************
* cl_khr_icd extension *
************************/
#define cl_khr_icd 1
/* cl_platform_info */
#define CL_PLATFORM_ICD_SUFFIX_KHR 0x0920
/* Additional Error Codes */
#define CL_PLATFORM_NOT_FOUND_KHR -1001
extern CL_API_ENTRY cl_int CL_API_CALL
clIcdGetPlatformIDsKHR(cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
typedef CL_API_ENTRY cl_int (CL_API_CALL *clIcdGetPlatformIDsKHR_fn)(
cl_uint /* num_entries */,
cl_platform_id * /* platforms */,
cl_uint * /* num_platforms */);
/* Extension: cl_khr_image2D_buffer
*
* This extension allows a 2D image to be created from a cl_mem buffer without a copy.
* The type associated with a 2D image created from a buffer in an OpenCL program is image2d_t.
* Both the sampler and sampler-less read_image built-in functions are supported for 2D images
* and 2D images created from a buffer. Similarly, the write_image built-ins are also supported
* for 2D images created from a buffer.
*
* When the 2D image from buffer is created, the client must specify the width,
* height, image format (i.e. channel order and channel data type) and optionally the row pitch
*
* The pitch specified must be a multiple of CL_DEVICE_IMAGE_PITCH_ALIGNMENT pixels.
* The base address of the buffer must be aligned to CL_DEVICE_IMAGE_BASE_ADDRESS_ALIGNMENT pixels.
*/
/*************************************
* cl_khr_initalize_memory extension *
*************************************/
#define CL_CONTEXT_MEMORY_INITIALIZE_KHR 0x200E
/**************************************
* cl_khr_terminate_context extension *
**************************************/
#define CL_DEVICE_TERMINATE_CAPABILITY_KHR 0x200F
#define CL_CONTEXT_TERMINATE_KHR 0x2010
#define cl_khr_terminate_context 1
extern CL_API_ENTRY cl_int CL_API_CALL clTerminateContextKHR(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clTerminateContextKHR_fn)(cl_context /* context */) CL_EXT_SUFFIX__VERSION_1_2;
/*
* Extension: cl_khr_spir
*
* This extension adds support to create an OpenCL program object from a
* Standard Portable Intermediate Representation (SPIR) instance
*/
#define CL_DEVICE_SPIR_VERSIONS 0x40E0
#define CL_PROGRAM_BINARY_TYPE_INTERMEDIATE 0x40E1
/******************************************
* cl_nv_device_attribute_query extension *
******************************************/
/* cl_nv_device_attribute_query extension - no extension #define since it has no functions */
#define CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV 0x4000
#define CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV 0x4001
#define CL_DEVICE_REGISTERS_PER_BLOCK_NV 0x4002
#define CL_DEVICE_WARP_SIZE_NV 0x4003
#define CL_DEVICE_GPU_OVERLAP_NV 0x4004
#define CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV 0x4005
#define CL_DEVICE_INTEGRATED_MEMORY_NV 0x4006
/*********************************
* cl_amd_device_attribute_query *
*********************************/
#define CL_DEVICE_PROFILING_TIMER_OFFSET_AMD 0x4036
/*********************************
* cl_arm_printf extension
*********************************/
#define CL_PRINTF_CALLBACK_ARM 0x40B0
#define CL_PRINTF_BUFFERSIZE_ARM 0x40B1
#ifdef CL_VERSION_1_1
/***********************************
* cl_ext_device_fission extension *
***********************************/
#define cl_ext_device_fission 1
extern CL_API_ENTRY cl_int CL_API_CALL
clReleaseDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clReleaseDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
extern CL_API_ENTRY cl_int CL_API_CALL
clRetainDeviceEXT( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
(CL_API_CALL *clRetainDeviceEXT_fn)( cl_device_id /*device*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef cl_ulong cl_device_partition_property_ext;
extern CL_API_ENTRY cl_int CL_API_CALL
clCreateSubDevicesEXT( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
typedef CL_API_ENTRY cl_int
( CL_API_CALL * clCreateSubDevicesEXT_fn)( cl_device_id /*in_device*/,
const cl_device_partition_property_ext * /* properties */,
cl_uint /*num_entries*/,
cl_device_id * /*out_devices*/,
cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
/* cl_device_partition_property_ext */
#define CL_DEVICE_PARTITION_EQUALLY_EXT 0x4050
#define CL_DEVICE_PARTITION_BY_COUNTS_EXT 0x4051
#define CL_DEVICE_PARTITION_BY_NAMES_EXT 0x4052
#define CL_DEVICE_PARTITION_BY_AFFINITY_DOMAIN_EXT 0x4053
/* clDeviceGetInfo selectors */
#define CL_DEVICE_PARENT_DEVICE_EXT 0x4054
#define CL_DEVICE_PARTITION_TYPES_EXT 0x4055
#define CL_DEVICE_AFFINITY_DOMAINS_EXT 0x4056
#define CL_DEVICE_REFERENCE_COUNT_EXT 0x4057
#define CL_DEVICE_PARTITION_STYLE_EXT 0x4058
/* error codes */
#define CL_DEVICE_PARTITION_FAILED_EXT -1057
#define CL_INVALID_PARTITION_COUNT_EXT -1058
#define CL_INVALID_PARTITION_NAME_EXT -1059
/* CL_AFFINITY_DOMAINs */
#define CL_AFFINITY_DOMAIN_L1_CACHE_EXT 0x1
#define CL_AFFINITY_DOMAIN_L2_CACHE_EXT 0x2
#define CL_AFFINITY_DOMAIN_L3_CACHE_EXT 0x3
#define CL_AFFINITY_DOMAIN_L4_CACHE_EXT 0x4
#define CL_AFFINITY_DOMAIN_NUMA_EXT 0x10
#define CL_AFFINITY_DOMAIN_NEXT_FISSIONABLE_EXT 0x100
/* cl_device_partition_property_ext list terminators */
#define CL_PROPERTIES_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_COUNTS_LIST_END_EXT ((cl_device_partition_property_ext) 0)
#define CL_PARTITION_BY_NAMES_LIST_END_EXT ((cl_device_partition_property_ext) 0 - 1)
/*********************************
* cl_qcom_ext_host_ptr extension
*********************************/
#define CL_MEM_EXT_HOST_PTR_QCOM (1 << 29)
#define CL_DEVICE_EXT_MEM_PADDING_IN_BYTES_QCOM 0x40A0
#define CL_DEVICE_PAGE_SIZE_QCOM 0x40A1
#define CL_IMAGE_ROW_ALIGNMENT_QCOM 0x40A2
#define CL_IMAGE_SLICE_ALIGNMENT_QCOM 0x40A3
#define CL_MEM_HOST_UNCACHED_QCOM 0x40A4
#define CL_MEM_HOST_WRITEBACK_QCOM 0x40A5
#define CL_MEM_HOST_WRITETHROUGH_QCOM 0x40A6
#define CL_MEM_HOST_WRITE_COMBINING_QCOM 0x40A7
typedef cl_uint cl_image_pitch_info_qcom;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetDeviceImageInfoQCOM(cl_device_id device,
size_t image_width,
size_t image_height,
const cl_image_format *image_format,
cl_image_pitch_info_qcom param_name,
size_t param_value_size,
void *param_value,
size_t *param_value_size_ret);
typedef struct _cl_mem_ext_host_ptr
{
/* Type of external memory allocation. */
/* Legal values will be defined in layered extensions. */
cl_uint allocation_type;
/* Host cache policy for this external memory allocation. */
cl_uint host_cache_policy;
} cl_mem_ext_host_ptr;
/*********************************
* cl_qcom_ion_host_ptr extension
*********************************/
#define CL_MEM_ION_HOST_PTR_QCOM 0x40A8
typedef struct _cl_mem_ion_host_ptr
{
/* Type of external memory allocation. */
/* Must be CL_MEM_ION_HOST_PTR_QCOM for ION allocations. */
cl_mem_ext_host_ptr ext_host_ptr;
/* ION file descriptor */
int ion_filedesc;
/* Host pointer to the ION allocated memory */
void* ion_hostptr;
} cl_mem_ion_host_ptr;
#endif /* CL_VERSION_1_1 */
#ifdef __cplusplus
}
#endif
#endif /* __CL_EXT_H */
/**********************************************************************************
* Copyright (c) 2008 - 2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
#ifndef __OPENCL_CL_GL_H
#define __OPENCL_CL_GL_H
#include <cl.h>
#ifdef __cplusplus
extern "C" {
#endif
typedef cl_uint cl_gl_object_type;
typedef cl_uint cl_gl_texture_info;
typedef cl_uint cl_gl_platform_info;
typedef struct __GLsync *cl_GLsync;
/* cl_gl_object_type = 0x2000 - 0x200F enum values are currently taken */
#define CL_GL_OBJECT_BUFFER 0x2000
#define CL_GL_OBJECT_TEXTURE2D 0x2001
#define CL_GL_OBJECT_TEXTURE3D 0x2002
#define CL_GL_OBJECT_RENDERBUFFER 0x2003
#define CL_GL_OBJECT_TEXTURE2D_ARRAY 0x200E
#define CL_GL_OBJECT_TEXTURE1D 0x200F
#define CL_GL_OBJECT_TEXTURE1D_ARRAY 0x2010
#define CL_GL_OBJECT_TEXTURE_BUFFER 0x2011
/* cl_gl_texture_info */
#define CL_GL_TEXTURE_TARGET 0x2004
#define CL_GL_MIPMAP_LEVEL 0x2005
#define CL_GL_NUM_SAMPLES 0x2012
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLBuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* bufobj */,
int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLTexture(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_2;
extern CL_API_ENTRY cl_mem CL_API_CALL
clCreateFromGLRenderbuffer(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLuint /* renderbuffer */,
cl_int * /* errcode_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLObjectInfo(cl_mem /* memobj */,
cl_gl_object_type * /* gl_object_type */,
cl_GLuint * /* gl_object_name */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLTextureInfo(cl_mem /* memobj */,
cl_gl_texture_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueAcquireGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
extern CL_API_ENTRY cl_int CL_API_CALL
clEnqueueReleaseGLObjects(cl_command_queue /* command_queue */,
cl_uint /* num_objects */,
const cl_mem * /* mem_objects */,
cl_uint /* num_events_in_wait_list */,
const cl_event * /* event_wait_list */,
cl_event * /* event */) CL_API_SUFFIX__VERSION_1_0;
/* Deprecated OpenCL 1.1 APIs */
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture2D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
extern CL_API_ENTRY CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_mem CL_API_CALL
clCreateFromGLTexture3D(cl_context /* context */,
cl_mem_flags /* flags */,
cl_GLenum /* target */,
cl_GLint /* miplevel */,
cl_GLuint /* texture */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
/* cl_khr_gl_sharing extension */
#define cl_khr_gl_sharing 1
typedef cl_uint cl_gl_context_info;
/* Additional Error Codes */
#define CL_INVALID_GL_SHAREGROUP_REFERENCE_KHR -1000
/* cl_gl_context_info */
#define CL_CURRENT_DEVICE_FOR_GL_CONTEXT_KHR 0x2006
#define CL_DEVICES_FOR_GL_CONTEXT_KHR 0x2007
/* Additional cl_context_properties */
#define CL_GL_CONTEXT_KHR 0x2008
#define CL_EGL_DISPLAY_KHR 0x2009
#define CL_GLX_DISPLAY_KHR 0x200A
#define CL_WGL_HDC_KHR 0x200B
#define CL_CGL_SHAREGROUP_KHR 0x200C
extern CL_API_ENTRY cl_int CL_API_CALL
clGetGLContextInfoKHR(const cl_context_properties * /* properties */,
cl_gl_context_info /* param_name */,
size_t /* param_value_size */,
void * /* param_value */,
size_t * /* param_value_size_ret */) CL_API_SUFFIX__VERSION_1_0;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clGetGLContextInfoKHR_fn)(
const cl_context_properties * properties,
cl_gl_context_info param_name,
size_t param_value_size,
void * param_value,
size_t * param_value_size_ret);
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_H */
/**********************************************************************************
* Copyright (c) 2008-2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
/* cl_gl_ext.h contains vendor (non-KHR) OpenCL extensions which have */
/* OpenGL dependencies. */
#ifndef __OPENCL_CL_GL_EXT_H
#define __OPENCL_CL_GL_EXT_H
#ifdef __cplusplus
extern "C" {
#endif
#include <cl_gl.h>
/*
* For each extension, follow this template
* cl_VEN_extname extension */
/* #define cl_VEN_extname 1
* ... define new types, if any
* ... define new tokens, if any
* ... define new APIs, if any
*
* If you need GLtypes here, mirror them with a cl_GLtype, rather than including a GL header
* This allows us to avoid having to decide whether to include GL headers or GLES here.
*/
/*
* cl_khr_gl_event extension
* See section 9.9 in the OpenCL 1.1 spec for more information
*/
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
extern CL_API_ENTRY cl_event CL_API_CALL
clCreateEventFromGLsyncKHR(cl_context /* context */,
cl_GLsync /* cl_GLsync */,
cl_int * /* errcode_ret */) CL_EXT_SUFFIX__VERSION_1_1;
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_CL_GL_EXT_H */
/**********************************************************************************
* Copyright (c) 2008-2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
**********************************************************************************/
/* $Revision: 11803 $ on $Date: 2010-06-25 10:02:12 -0700 (Fri, 25 Jun 2010) $ */
#ifndef __CL_PLATFORM_H
#define __CL_PLATFORM_H
#ifdef __APPLE__
/* Contains #defines for AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER below */
#include <AvailabilityMacros.h>
#endif
#ifdef __cplusplus
extern "C" {
#endif
#if defined(_WIN32)
#define CL_API_ENTRY
#define CL_API_CALL __stdcall
#define CL_CALLBACK __stdcall
#else
#define CL_API_ENTRY
#define CL_API_CALL
#define CL_CALLBACK
#endif
#ifdef __APPLE__
#define CL_EXTENSION_WEAK_LINK __attribute__((weak_import))
#define CL_API_SUFFIX__VERSION_1_0 AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_0 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER
#define CL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define GCL_API_SUFFIX__VERSION_1_1 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_1 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_6_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_7
#ifdef AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
#define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
#define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_8_AND_LATER
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER_BUT_DEPRECATED_IN_MAC_OS_X_VERSION_10_8
#else
#warning This path should never happen outside of internal operating system development. AvailabilityMacros do not function correctly here!
#define CL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define GCL_API_SUFFIX__VERSION_1_2 AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_2 CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED CL_EXTENSION_WEAK_LINK AVAILABLE_MAC_OS_X_VERSION_10_7_AND_LATER
#endif
#else
#define CL_EXTENSION_WEAK_LINK
#define CL_API_SUFFIX__VERSION_1_0
#define CL_EXT_SUFFIX__VERSION_1_0
#define CL_API_SUFFIX__VERSION_1_1
#define CL_EXT_SUFFIX__VERSION_1_1
#define CL_API_SUFFIX__VERSION_1_2
#define CL_EXT_SUFFIX__VERSION_1_2
#ifdef __GNUC__
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED __attribute__((deprecated))
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED __attribute__((deprecated))
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#endif
#elif _WIN32
#ifdef CL_USE_DEPRECATED_OPENCL_1_0_APIS
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED __declspec(deprecated)
#endif
#ifdef CL_USE_DEPRECATED_OPENCL_1_1_APIS
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#else
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED __declspec(deprecated)
#endif
#else
#define CL_EXT_SUFFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_0_DEPRECATED
#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
#endif
#endif
#if (defined (_WIN32) && defined(_MSC_VER))
/* scalar types */
typedef signed __int8 cl_char;
typedef unsigned __int8 cl_uchar;
typedef signed __int16 cl_short;
typedef unsigned __int16 cl_ushort;
typedef signed __int32 cl_int;
typedef unsigned __int32 cl_uint;
typedef signed __int64 cl_long;
typedef unsigned __int64 cl_ulong;
typedef unsigned __int16 cl_half;
typedef float cl_float;
typedef double cl_double;
/* Macro names and corresponding values defined by OpenCL */
#define CL_CHAR_BIT 8
#define CL_SCHAR_MAX 127
#define CL_SCHAR_MIN (-127-1)
#define CL_CHAR_MAX CL_SCHAR_MAX
#define CL_CHAR_MIN CL_SCHAR_MIN
#define CL_UCHAR_MAX 255
#define CL_SHRT_MAX 32767
#define CL_SHRT_MIN (-32767-1)
#define CL_USHRT_MAX 65535
#define CL_INT_MAX 2147483647
#define CL_INT_MIN (-2147483647-1)
#define CL_UINT_MAX 0xffffffffU
#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
#define CL_FLT_DIG 6
#define CL_FLT_MANT_DIG 24
#define CL_FLT_MAX_10_EXP +38
#define CL_FLT_MAX_EXP +128
#define CL_FLT_MIN_10_EXP -37
#define CL_FLT_MIN_EXP -125
#define CL_FLT_RADIX 2
#define CL_FLT_MAX 340282346638528859811704183484516925440.0f
#define CL_FLT_MIN 1.175494350822287507969e-38f
#define CL_FLT_EPSILON 0x1.0p-23f
#define CL_DBL_DIG 15
#define CL_DBL_MANT_DIG 53
#define CL_DBL_MAX_10_EXP +308
#define CL_DBL_MAX_EXP +1024
#define CL_DBL_MIN_10_EXP -307
#define CL_DBL_MIN_EXP -1021
#define CL_DBL_RADIX 2
#define CL_DBL_MAX 179769313486231570814527423731704356798070567525844996598917476803157260780028538760589558632766878171540458953514382464234321326889464182768467546703537516986049910576551282076245490090389328944075868508455133942304583236903222948165808559332123348274797826204144723168738177180919299881250404026184124858368.0
#define CL_DBL_MIN 2.225073858507201383090e-308
#define CL_DBL_EPSILON 2.220446049250313080847e-16
#define CL_M_E 2.718281828459045090796
#define CL_M_LOG2E 1.442695040888963387005
#define CL_M_LOG10E 0.434294481903251816668
#define CL_M_LN2 0.693147180559945286227
#define CL_M_LN10 2.302585092994045901094
#define CL_M_PI 3.141592653589793115998
#define CL_M_PI_2 1.570796326794896557999
#define CL_M_PI_4 0.785398163397448278999
#define CL_M_1_PI 0.318309886183790691216
#define CL_M_2_PI 0.636619772367581382433
#define CL_M_2_SQRTPI 1.128379167095512558561
#define CL_M_SQRT2 1.414213562373095145475
#define CL_M_SQRT1_2 0.707106781186547572737
#define CL_M_E_F 2.71828174591064f
#define CL_M_LOG2E_F 1.44269502162933f
#define CL_M_LOG10E_F 0.43429449200630f
#define CL_M_LN2_F 0.69314718246460f
#define CL_M_LN10_F 2.30258512496948f
#define CL_M_PI_F 3.14159274101257f
#define CL_M_PI_2_F 1.57079637050629f
#define CL_M_PI_4_F 0.78539818525314f
#define CL_M_1_PI_F 0.31830987334251f
#define CL_M_2_PI_F 0.63661974668503f
#define CL_M_2_SQRTPI_F 1.12837922573090f
#define CL_M_SQRT2_F 1.41421353816986f
#define CL_M_SQRT1_2_F 0.70710676908493f
#define CL_NAN (CL_INFINITY - CL_INFINITY)
#define CL_HUGE_VALF ((cl_float) 1e50)
#define CL_HUGE_VAL ((cl_double) 1e500)
#define CL_MAXFLOAT CL_FLT_MAX
#define CL_INFINITY CL_HUGE_VALF
#else
#include <stdint.h>
/* scalar types */
typedef int8_t cl_char;
typedef uint8_t cl_uchar;
typedef int16_t cl_short __attribute__((aligned(2)));
typedef uint16_t cl_ushort __attribute__((aligned(2)));
typedef int32_t cl_int __attribute__((aligned(4)));
typedef uint32_t cl_uint __attribute__((aligned(4)));
typedef int64_t cl_long __attribute__((aligned(8)));
typedef uint64_t cl_ulong __attribute__((aligned(8)));
typedef uint16_t cl_half __attribute__((aligned(2)));
typedef float cl_float __attribute__((aligned(4)));
typedef double cl_double __attribute__((aligned(8)));
/* Macro names and corresponding values defined by OpenCL */
#define CL_CHAR_BIT 8
#define CL_SCHAR_MAX 127
#define CL_SCHAR_MIN (-127-1)
#define CL_CHAR_MAX CL_SCHAR_MAX
#define CL_CHAR_MIN CL_SCHAR_MIN
#define CL_UCHAR_MAX 255
#define CL_SHRT_MAX 32767
#define CL_SHRT_MIN (-32767-1)
#define CL_USHRT_MAX 65535
#define CL_INT_MAX 2147483647
#define CL_INT_MIN (-2147483647-1)
#define CL_UINT_MAX 0xffffffffU
#define CL_LONG_MAX ((cl_long) 0x7FFFFFFFFFFFFFFFLL)
#define CL_LONG_MIN ((cl_long) -0x7FFFFFFFFFFFFFFFLL - 1LL)
#define CL_ULONG_MAX ((cl_ulong) 0xFFFFFFFFFFFFFFFFULL)
#define CL_FLT_DIG 6
#define CL_FLT_MANT_DIG 24
#define CL_FLT_MAX_10_EXP +38
#define CL_FLT_MAX_EXP +128
#define CL_FLT_MIN_10_EXP -37
#define CL_FLT_MIN_EXP -125
#define CL_FLT_RADIX 2
#define CL_FLT_MAX 0x1.fffffep127f
#define CL_FLT_MIN 0x1.0p-126f
#define CL_FLT_EPSILON 0x1.0p-23f
#define CL_DBL_DIG 15
#define CL_DBL_MANT_DIG 53
#define CL_DBL_MAX_10_EXP +308
#define CL_DBL_MAX_EXP +1024
#define CL_DBL_MIN_10_EXP -307
#define CL_DBL_MIN_EXP -1021
#define CL_DBL_RADIX 2
#define CL_DBL_MAX 0x1.fffffffffffffp1023
#define CL_DBL_MIN 0x1.0p-1022
#define CL_DBL_EPSILON 0x1.0p-52
#define CL_M_E 2.718281828459045090796
#define CL_M_LOG2E 1.442695040888963387005
#define CL_M_LOG10E 0.434294481903251816668
#define CL_M_LN2 0.693147180559945286227
#define CL_M_LN10 2.302585092994045901094
#define CL_M_PI 3.141592653589793115998
#define CL_M_PI_2 1.570796326794896557999
#define CL_M_PI_4 0.785398163397448278999
#define CL_M_1_PI 0.318309886183790691216
#define CL_M_2_PI 0.636619772367581382433
#define CL_M_2_SQRTPI 1.128379167095512558561
#define CL_M_SQRT2 1.414213562373095145475
#define CL_M_SQRT1_2 0.707106781186547572737
#define CL_M_E_F 2.71828174591064f
#define CL_M_LOG2E_F 1.44269502162933f
#define CL_M_LOG10E_F 0.43429449200630f
#define CL_M_LN2_F 0.69314718246460f
#define CL_M_LN10_F 2.30258512496948f
#define CL_M_PI_F 3.14159274101257f
#define CL_M_PI_2_F 1.57079637050629f
#define CL_M_PI_4_F 0.78539818525314f
#define CL_M_1_PI_F 0.31830987334251f
#define CL_M_2_PI_F 0.63661974668503f
#define CL_M_2_SQRTPI_F 1.12837922573090f
#define CL_M_SQRT2_F 1.41421353816986f
#define CL_M_SQRT1_2_F 0.70710676908493f
#if defined( __GNUC__ )
#define CL_HUGE_VALF __builtin_huge_valf()
#define CL_HUGE_VAL __builtin_huge_val()
#define CL_NAN __builtin_nanf( "" )
#else
#define CL_HUGE_VALF ((cl_float) 1e50)
#define CL_HUGE_VAL ((cl_double) 1e500)
float nanf( const char * );
#define CL_NAN nanf( "" )
#endif
#define CL_MAXFLOAT CL_FLT_MAX
#define CL_INFINITY CL_HUGE_VALF
#endif
#include <stddef.h>
/* Mirror types to GL types. Mirror types allow us to avoid deciding which 87s to load based on whether we are using GL or GLES here. */
typedef unsigned int cl_GLuint;
typedef int cl_GLint;
typedef unsigned int cl_GLenum;
/*
* Vector types
*
* Note: OpenCL requires that all types be naturally aligned.
* This means that vector types must be naturally aligned.
* For example, a vector of four floats must be aligned to
* a 16 byte boundary (calculated as 4 * the natural 4-byte
* alignment of the float). The alignment qualifiers here
* will only function properly if your compiler supports them
* and if you don't actively work to defeat them. For example,
* in order for a cl_float4 to be 16 byte aligned in a struct,
* the start of the struct must itself be 16-byte aligned.
*
* Maintaining proper alignment is the user's responsibility.
*/
/* Define basic vector types */
#if defined( __VEC__ )
#include <altivec.h> /* may be omitted depending on compiler. AltiVec spec provides no way to detect whether the header is required. */
typedef vector unsigned char __cl_uchar16;
typedef vector signed char __cl_char16;
typedef vector unsigned short __cl_ushort8;
typedef vector signed short __cl_short8;
typedef vector unsigned int __cl_uint4;
typedef vector signed int __cl_int4;
typedef vector float __cl_float4;
#define __CL_UCHAR16__ 1
#define __CL_CHAR16__ 1
#define __CL_USHORT8__ 1
#define __CL_SHORT8__ 1
#define __CL_UINT4__ 1
#define __CL_INT4__ 1
#define __CL_FLOAT4__ 1
#endif
#if defined( __SSE__ )
#if defined( __MINGW64__ )
#include <intrin.h>
#else
#include <xmmintrin.h>
#endif
#if defined( __GNUC__ )
typedef float __cl_float4 __attribute__((vector_size(16)));
#else
typedef __m128 __cl_float4;
#endif
#define __CL_FLOAT4__ 1
#endif
#if defined( __SSE2__ )
#if defined( __MINGW64__ )
#include <intrin.h>
#else
#include <emmintrin.h>
#endif
#if defined( __GNUC__ )
typedef cl_uchar __cl_uchar16 __attribute__((vector_size(16)));
typedef cl_char __cl_char16 __attribute__((vector_size(16)));
typedef cl_ushort __cl_ushort8 __attribute__((vector_size(16)));
typedef cl_short __cl_short8 __attribute__((vector_size(16)));
typedef cl_uint __cl_uint4 __attribute__((vector_size(16)));
typedef cl_int __cl_int4 __attribute__((vector_size(16)));
typedef cl_ulong __cl_ulong2 __attribute__((vector_size(16)));
typedef cl_long __cl_long2 __attribute__((vector_size(16)));
typedef cl_double __cl_double2 __attribute__((vector_size(16)));
#else
typedef __m128i __cl_uchar16;
typedef __m128i __cl_char16;
typedef __m128i __cl_ushort8;
typedef __m128i __cl_short8;
typedef __m128i __cl_uint4;
typedef __m128i __cl_int4;
typedef __m128i __cl_ulong2;
typedef __m128i __cl_long2;
typedef __m128d __cl_double2;
#endif
#define __CL_UCHAR16__ 1
#define __CL_CHAR16__ 1
#define __CL_USHORT8__ 1
#define __CL_SHORT8__ 1
#define __CL_INT4__ 1
#define __CL_UINT4__ 1
#define __CL_ULONG2__ 1
#define __CL_LONG2__ 1
#define __CL_DOUBLE2__ 1
#endif
#if defined( __MMX__ )
#include <mmintrin.h>
#if defined( __GNUC__ )
typedef cl_uchar __cl_uchar8 __attribute__((vector_size(8)));
typedef cl_char __cl_char8 __attribute__((vector_size(8)));
typedef cl_ushort __cl_ushort4 __attribute__((vector_size(8)));
typedef cl_short __cl_short4 __attribute__((vector_size(8)));
typedef cl_uint __cl_uint2 __attribute__((vector_size(8)));
typedef cl_int __cl_int2 __attribute__((vector_size(8)));
typedef cl_ulong __cl_ulong1 __attribute__((vector_size(8)));
typedef cl_long __cl_long1 __attribute__((vector_size(8)));
typedef cl_float __cl_float2 __attribute__((vector_size(8)));
#else
typedef __m64 __cl_uchar8;
typedef __m64 __cl_char8;
typedef __m64 __cl_ushort4;
typedef __m64 __cl_short4;
typedef __m64 __cl_uint2;
typedef __m64 __cl_int2;
typedef __m64 __cl_ulong1;
typedef __m64 __cl_long1;
typedef __m64 __cl_float2;
#endif
#define __CL_UCHAR8__ 1
#define __CL_CHAR8__ 1
#define __CL_USHORT4__ 1
#define __CL_SHORT4__ 1
#define __CL_INT2__ 1
#define __CL_UINT2__ 1
#define __CL_ULONG1__ 1
#define __CL_LONG1__ 1
#define __CL_FLOAT2__ 1
#endif
#if defined( __AVX__ )
#if defined( __MINGW64__ )
#include <intrin.h>
#else
#include <immintrin.h>
#endif
#if defined( __GNUC__ )
typedef cl_float __cl_float8 __attribute__((vector_size(32)));
typedef cl_double __cl_double4 __attribute__((vector_size(32)));
#else
typedef __m256 __cl_float8;
typedef __m256d __cl_double4;
#endif
#define __CL_FLOAT8__ 1
#define __CL_DOUBLE4__ 1
#endif
/* Define capabilities for anonymous struct members. */
#if defined( __GNUC__) && ! defined( __STRICT_ANSI__ )
#define __CL_HAS_ANON_STRUCT__ 1
#define __CL_ANON_STRUCT__ __extension__
#elif defined( _WIN32) && (_MSC_VER >= 1500)
/* Microsoft Developer Studio 2008 supports anonymous structs, but
* complains by default. */
#define __CL_HAS_ANON_STRUCT__ 1
#define __CL_ANON_STRUCT__
/* Disable warning C4201: nonstandard extension used : nameless
* struct/union */
#pragma warning( push )
#pragma warning( disable : 4201 )
#else
#define __CL_HAS_ANON_STRUCT__ 0
#define __CL_ANON_STRUCT__
#endif
/* Define alignment keys */
#if defined( __GNUC__ )
#define CL_ALIGNED(_x) __attribute__ ((aligned(_x)))
#elif defined( _WIN32) && (_MSC_VER)
/* Alignment keys neutered on windows because MSVC can't swallow function arguments with alignment requirements */
/* http://msdn.microsoft.com/en-us/library/373ak2y1%28VS.71%29.aspx */
/* #include <crtdefs.h> */
/* #define CL_ALIGNED(_x) _CRT_ALIGN(_x) */
#define CL_ALIGNED(_x)
#else
#warning Need to implement some method to align data here
#define CL_ALIGNED(_x)
#endif
/* Indicate whether .xyzw, .s0123 and .hi.lo are supported */
#if __CL_HAS_ANON_STRUCT__
/* .xyzw and .s0123...{f|F} are supported */
#define CL_HAS_NAMED_VECTOR_FIELDS 1
/* .hi and .lo are supported */
#define CL_HAS_HI_LO_VECTOR_FIELDS 1
#endif
/* Define cl_vector types */
/* ---- cl_charn ---- */
typedef union
{
cl_char CL_ALIGNED(2) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_char x, y; };
__CL_ANON_STRUCT__ struct{ cl_char s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_char lo, hi; };
#endif
#if defined( __CL_CHAR2__)
__cl_char2 v2;
#endif
}cl_char2;
typedef union
{
cl_char CL_ALIGNED(4) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_char2 lo, hi; };
#endif
#if defined( __CL_CHAR2__)
__cl_char2 v2[2];
#endif
#if defined( __CL_CHAR4__)
__cl_char4 v4;
#endif
}cl_char4;
/* cl_char3 is identical in size, alignment and behavior to cl_char4. See section 6.1.5. */
typedef cl_char4 cl_char3;
typedef union
{
cl_char CL_ALIGNED(8) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_char x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_char4 lo, hi; };
#endif
#if defined( __CL_CHAR2__)
__cl_char2 v2[4];
#endif
#if defined( __CL_CHAR4__)
__cl_char4 v4[2];
#endif
#if defined( __CL_CHAR8__ )
__cl_char8 v8;
#endif
}cl_char8;
typedef union
{
cl_char CL_ALIGNED(16) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_char x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_char s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_char8 lo, hi; };
#endif
#if defined( __CL_CHAR2__)
__cl_char2 v2[8];
#endif
#if defined( __CL_CHAR4__)
__cl_char4 v4[4];
#endif
#if defined( __CL_CHAR8__ )
__cl_char8 v8[2];
#endif
#if defined( __CL_CHAR16__ )
__cl_char16 v16;
#endif
}cl_char16;
/* ---- cl_ucharn ---- */
typedef union
{
cl_uchar CL_ALIGNED(2) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uchar x, y; };
__CL_ANON_STRUCT__ struct{ cl_uchar s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_uchar lo, hi; };
#endif
#if defined( __cl_uchar2__)
__cl_uchar2 v2;
#endif
}cl_uchar2;
typedef union
{
cl_uchar CL_ALIGNED(4) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_uchar2 lo, hi; };
#endif
#if defined( __CL_UCHAR2__)
__cl_uchar2 v2[2];
#endif
#if defined( __CL_UCHAR4__)
__cl_uchar4 v4;
#endif
}cl_uchar4;
/* cl_uchar3 is identical in size, alignment and behavior to cl_uchar4. See section 6.1.5. */
typedef cl_uchar4 cl_uchar3;
typedef union
{
cl_uchar CL_ALIGNED(8) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_uchar4 lo, hi; };
#endif
#if defined( __CL_UCHAR2__)
__cl_uchar2 v2[4];
#endif
#if defined( __CL_UCHAR4__)
__cl_uchar4 v4[2];
#endif
#if defined( __CL_UCHAR8__ )
__cl_uchar8 v8;
#endif
}cl_uchar8;
typedef union
{
cl_uchar CL_ALIGNED(16) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uchar x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_uchar s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_uchar8 lo, hi; };
#endif
#if defined( __CL_UCHAR2__)
__cl_uchar2 v2[8];
#endif
#if defined( __CL_UCHAR4__)
__cl_uchar4 v4[4];
#endif
#if defined( __CL_UCHAR8__ )
__cl_uchar8 v8[2];
#endif
#if defined( __CL_UCHAR16__ )
__cl_uchar16 v16;
#endif
}cl_uchar16;
/* ---- cl_shortn ---- */
typedef union
{
cl_short CL_ALIGNED(4) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_short x, y; };
__CL_ANON_STRUCT__ struct{ cl_short s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_short lo, hi; };
#endif
#if defined( __CL_SHORT2__)
__cl_short2 v2;
#endif
}cl_short2;
typedef union
{
cl_short CL_ALIGNED(8) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_short2 lo, hi; };
#endif
#if defined( __CL_SHORT2__)
__cl_short2 v2[2];
#endif
#if defined( __CL_SHORT4__)
__cl_short4 v4;
#endif
}cl_short4;
/* cl_short3 is identical in size, alignment and behavior to cl_short4. See section 6.1.5. */
typedef cl_short4 cl_short3;
typedef union
{
cl_short CL_ALIGNED(16) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_short x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_short4 lo, hi; };
#endif
#if defined( __CL_SHORT2__)
__cl_short2 v2[4];
#endif
#if defined( __CL_SHORT4__)
__cl_short4 v4[2];
#endif
#if defined( __CL_SHORT8__ )
__cl_short8 v8;
#endif
}cl_short8;
typedef union
{
cl_short CL_ALIGNED(32) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_short x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_short s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_short8 lo, hi; };
#endif
#if defined( __CL_SHORT2__)
__cl_short2 v2[8];
#endif
#if defined( __CL_SHORT4__)
__cl_short4 v4[4];
#endif
#if defined( __CL_SHORT8__ )
__cl_short8 v8[2];
#endif
#if defined( __CL_SHORT16__ )
__cl_short16 v16;
#endif
}cl_short16;
/* ---- cl_ushortn ---- */
typedef union
{
cl_ushort CL_ALIGNED(4) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ushort x, y; };
__CL_ANON_STRUCT__ struct{ cl_ushort s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_ushort lo, hi; };
#endif
#if defined( __CL_USHORT2__)
__cl_ushort2 v2;
#endif
}cl_ushort2;
typedef union
{
cl_ushort CL_ALIGNED(8) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_ushort2 lo, hi; };
#endif
#if defined( __CL_USHORT2__)
__cl_ushort2 v2[2];
#endif
#if defined( __CL_USHORT4__)
__cl_ushort4 v4;
#endif
}cl_ushort4;
/* cl_ushort3 is identical in size, alignment and behavior to cl_ushort4. See section 6.1.5. */
typedef cl_ushort4 cl_ushort3;
typedef union
{
cl_ushort CL_ALIGNED(16) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_ushort4 lo, hi; };
#endif
#if defined( __CL_USHORT2__)
__cl_ushort2 v2[4];
#endif
#if defined( __CL_USHORT4__)
__cl_ushort4 v4[2];
#endif
#if defined( __CL_USHORT8__ )
__cl_ushort8 v8;
#endif
}cl_ushort8;
typedef union
{
cl_ushort CL_ALIGNED(32) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ushort x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_ushort s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_ushort8 lo, hi; };
#endif
#if defined( __CL_USHORT2__)
__cl_ushort2 v2[8];
#endif
#if defined( __CL_USHORT4__)
__cl_ushort4 v4[4];
#endif
#if defined( __CL_USHORT8__ )
__cl_ushort8 v8[2];
#endif
#if defined( __CL_USHORT16__ )
__cl_ushort16 v16;
#endif
}cl_ushort16;
/* ---- cl_intn ---- */
typedef union
{
cl_int CL_ALIGNED(8) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_int x, y; };
__CL_ANON_STRUCT__ struct{ cl_int s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_int lo, hi; };
#endif
#if defined( __CL_INT2__)
__cl_int2 v2;
#endif
}cl_int2;
typedef union
{
cl_int CL_ALIGNED(16) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_int2 lo, hi; };
#endif
#if defined( __CL_INT2__)
__cl_int2 v2[2];
#endif
#if defined( __CL_INT4__)
__cl_int4 v4;
#endif
}cl_int4;
/* cl_int3 is identical in size, alignment and behavior to cl_int4. See section 6.1.5. */
typedef cl_int4 cl_int3;
typedef union
{
cl_int CL_ALIGNED(32) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_int x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_int4 lo, hi; };
#endif
#if defined( __CL_INT2__)
__cl_int2 v2[4];
#endif
#if defined( __CL_INT4__)
__cl_int4 v4[2];
#endif
#if defined( __CL_INT8__ )
__cl_int8 v8;
#endif
}cl_int8;
typedef union
{
cl_int CL_ALIGNED(64) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_int x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_int s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_int8 lo, hi; };
#endif
#if defined( __CL_INT2__)
__cl_int2 v2[8];
#endif
#if defined( __CL_INT4__)
__cl_int4 v4[4];
#endif
#if defined( __CL_INT8__ )
__cl_int8 v8[2];
#endif
#if defined( __CL_INT16__ )
__cl_int16 v16;
#endif
}cl_int16;
/* ---- cl_uintn ---- */
typedef union
{
cl_uint CL_ALIGNED(8) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uint x, y; };
__CL_ANON_STRUCT__ struct{ cl_uint s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_uint lo, hi; };
#endif
#if defined( __CL_UINT2__)
__cl_uint2 v2;
#endif
}cl_uint2;
typedef union
{
cl_uint CL_ALIGNED(16) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_uint2 lo, hi; };
#endif
#if defined( __CL_UINT2__)
__cl_uint2 v2[2];
#endif
#if defined( __CL_UINT4__)
__cl_uint4 v4;
#endif
}cl_uint4;
/* cl_uint3 is identical in size, alignment and behavior to cl_uint4. See section 6.1.5. */
typedef cl_uint4 cl_uint3;
typedef union
{
cl_uint CL_ALIGNED(32) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_uint4 lo, hi; };
#endif
#if defined( __CL_UINT2__)
__cl_uint2 v2[4];
#endif
#if defined( __CL_UINT4__)
__cl_uint4 v4[2];
#endif
#if defined( __CL_UINT8__ )
__cl_uint8 v8;
#endif
}cl_uint8;
typedef union
{
cl_uint CL_ALIGNED(64) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_uint x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_uint s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_uint8 lo, hi; };
#endif
#if defined( __CL_UINT2__)
__cl_uint2 v2[8];
#endif
#if defined( __CL_UINT4__)
__cl_uint4 v4[4];
#endif
#if defined( __CL_UINT8__ )
__cl_uint8 v8[2];
#endif
#if defined( __CL_UINT16__ )
__cl_uint16 v16;
#endif
}cl_uint16;
/* ---- cl_longn ---- */
typedef union
{
cl_long CL_ALIGNED(16) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_long x, y; };
__CL_ANON_STRUCT__ struct{ cl_long s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_long lo, hi; };
#endif
#if defined( __CL_LONG2__)
__cl_long2 v2;
#endif
}cl_long2;
typedef union
{
cl_long CL_ALIGNED(32) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_long2 lo, hi; };
#endif
#if defined( __CL_LONG2__)
__cl_long2 v2[2];
#endif
#if defined( __CL_LONG4__)
__cl_long4 v4;
#endif
}cl_long4;
/* cl_long3 is identical in size, alignment and behavior to cl_long4. See section 6.1.5. */
typedef cl_long4 cl_long3;
typedef union
{
cl_long CL_ALIGNED(64) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_long x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_long4 lo, hi; };
#endif
#if defined( __CL_LONG2__)
__cl_long2 v2[4];
#endif
#if defined( __CL_LONG4__)
__cl_long4 v4[2];
#endif
#if defined( __CL_LONG8__ )
__cl_long8 v8;
#endif
}cl_long8;
typedef union
{
cl_long CL_ALIGNED(128) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_long x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_long s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_long8 lo, hi; };
#endif
#if defined( __CL_LONG2__)
__cl_long2 v2[8];
#endif
#if defined( __CL_LONG4__)
__cl_long4 v4[4];
#endif
#if defined( __CL_LONG8__ )
__cl_long8 v8[2];
#endif
#if defined( __CL_LONG16__ )
__cl_long16 v16;
#endif
}cl_long16;
/* ---- cl_ulongn ---- */
typedef union
{
cl_ulong CL_ALIGNED(16) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ulong x, y; };
__CL_ANON_STRUCT__ struct{ cl_ulong s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_ulong lo, hi; };
#endif
#if defined( __CL_ULONG2__)
__cl_ulong2 v2;
#endif
}cl_ulong2;
typedef union
{
cl_ulong CL_ALIGNED(32) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_ulong2 lo, hi; };
#endif
#if defined( __CL_ULONG2__)
__cl_ulong2 v2[2];
#endif
#if defined( __CL_ULONG4__)
__cl_ulong4 v4;
#endif
}cl_ulong4;
/* cl_ulong3 is identical in size, alignment and behavior to cl_ulong4. See section 6.1.5. */
typedef cl_ulong4 cl_ulong3;
typedef union
{
cl_ulong CL_ALIGNED(64) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_ulong4 lo, hi; };
#endif
#if defined( __CL_ULONG2__)
__cl_ulong2 v2[4];
#endif
#if defined( __CL_ULONG4__)
__cl_ulong4 v4[2];
#endif
#if defined( __CL_ULONG8__ )
__cl_ulong8 v8;
#endif
}cl_ulong8;
typedef union
{
cl_ulong CL_ALIGNED(128) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_ulong x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_ulong s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_ulong8 lo, hi; };
#endif
#if defined( __CL_ULONG2__)
__cl_ulong2 v2[8];
#endif
#if defined( __CL_ULONG4__)
__cl_ulong4 v4[4];
#endif
#if defined( __CL_ULONG8__ )
__cl_ulong8 v8[2];
#endif
#if defined( __CL_ULONG16__ )
__cl_ulong16 v16;
#endif
}cl_ulong16;
/* --- cl_floatn ---- */
typedef union
{
cl_float CL_ALIGNED(8) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_float x, y; };
__CL_ANON_STRUCT__ struct{ cl_float s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_float lo, hi; };
#endif
#if defined( __CL_FLOAT2__)
__cl_float2 v2;
#endif
}cl_float2;
typedef union
{
cl_float CL_ALIGNED(16) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_float2 lo, hi; };
#endif
#if defined( __CL_FLOAT2__)
__cl_float2 v2[2];
#endif
#if defined( __CL_FLOAT4__)
__cl_float4 v4;
#endif
}cl_float4;
/* cl_float3 is identical in size, alignment and behavior to cl_float4. See section 6.1.5. */
typedef cl_float4 cl_float3;
typedef union
{
cl_float CL_ALIGNED(32) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_float x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_float4 lo, hi; };
#endif
#if defined( __CL_FLOAT2__)
__cl_float2 v2[4];
#endif
#if defined( __CL_FLOAT4__)
__cl_float4 v4[2];
#endif
#if defined( __CL_FLOAT8__ )
__cl_float8 v8;
#endif
}cl_float8;
typedef union
{
cl_float CL_ALIGNED(64) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_float x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_float s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_float8 lo, hi; };
#endif
#if defined( __CL_FLOAT2__)
__cl_float2 v2[8];
#endif
#if defined( __CL_FLOAT4__)
__cl_float4 v4[4];
#endif
#if defined( __CL_FLOAT8__ )
__cl_float8 v8[2];
#endif
#if defined( __CL_FLOAT16__ )
__cl_float16 v16;
#endif
}cl_float16;
/* --- cl_doublen ---- */
typedef union
{
cl_double CL_ALIGNED(16) s[2];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_double x, y; };
__CL_ANON_STRUCT__ struct{ cl_double s0, s1; };
__CL_ANON_STRUCT__ struct{ cl_double lo, hi; };
#endif
#if defined( __CL_DOUBLE2__)
__cl_double2 v2;
#endif
}cl_double2;
typedef union
{
cl_double CL_ALIGNED(32) s[4];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3; };
__CL_ANON_STRUCT__ struct{ cl_double2 lo, hi; };
#endif
#if defined( __CL_DOUBLE2__)
__cl_double2 v2[2];
#endif
#if defined( __CL_DOUBLE4__)
__cl_double4 v4;
#endif
}cl_double4;
/* cl_double3 is identical in size, alignment and behavior to cl_double4. See section 6.1.5. */
typedef cl_double4 cl_double3;
typedef union
{
cl_double CL_ALIGNED(64) s[8];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_double x, y, z, w; };
__CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7; };
__CL_ANON_STRUCT__ struct{ cl_double4 lo, hi; };
#endif
#if defined( __CL_DOUBLE2__)
__cl_double2 v2[4];
#endif
#if defined( __CL_DOUBLE4__)
__cl_double4 v4[2];
#endif
#if defined( __CL_DOUBLE8__ )
__cl_double8 v8;
#endif
}cl_double8;
typedef union
{
cl_double CL_ALIGNED(128) s[16];
#if __CL_HAS_ANON_STRUCT__
__CL_ANON_STRUCT__ struct{ cl_double x, y, z, w, __spacer4, __spacer5, __spacer6, __spacer7, __spacer8, __spacer9, sa, sb, sc, sd, se, sf; };
__CL_ANON_STRUCT__ struct{ cl_double s0, s1, s2, s3, s4, s5, s6, s7, s8, s9, sA, sB, sC, sD, sE, sF; };
__CL_ANON_STRUCT__ struct{ cl_double8 lo, hi; };
#endif
#if defined( __CL_DOUBLE2__)
__cl_double2 v2[8];
#endif
#if defined( __CL_DOUBLE4__)
__cl_double4 v4[4];
#endif
#if defined( __CL_DOUBLE8__ )
__cl_double8 v8[2];
#endif
#if defined( __CL_DOUBLE16__ )
__cl_double16 v16;
#endif
}cl_double16;
/* Macro to facilitate debugging
* Usage:
* Place CL_PROGRAM_STRING_DEBUG_INFO on the line before the first line of your source.
* The first line ends with: CL_PROGRAM_STRING_DEBUG_INFO \"
* Each line thereafter of OpenCL C source must end with: \n\
* The last line ends in ";
*
* Example:
*
* const char *my_program = CL_PROGRAM_STRING_DEBUG_INFO "\
* kernel void foo( int a, float * b ) \n\
* { \n\
* // my comment \n\
* *b[ get_global_id(0)] = a; \n\
* } \n\
* ";
*
* This should correctly set up the line, (column) and file information for your source
* string so you can do source level debugging.
*/
#define __CL_STRINGIFY( _x ) # _x
#define _CL_STRINGIFY( _x ) __CL_STRINGIFY( _x )
#define CL_PROGRAM_STRING_DEBUG_INFO "#line " _CL_STRINGIFY(__LINE__) " \"" __FILE__ "\" \n\n"
#ifdef __cplusplus
}
#endif
#undef __CL_HAS_ANON_STRUCT__
#undef __CL_ANON_STRUCT__
#if defined( _WIN32) && (_MSC_VER >= 1500)
#pragma warning( pop )
#endif
#endif /* __CL_PLATFORM_H */
/*******************************************************************************
* Copyright (c) 2008-2012 The Khronos Group Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and/or associated documentation files (the
* "Materials"), to deal in the Materials without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Materials, and to
* permit persons to whom the Materials are furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included
* in all copies or substantial portions of the Materials.
*
* THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
* MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
******************************************************************************/
/* $Revision: 11708 $ on $Date: 2010-06-13 23:36:24 -0700 (Sun, 13 Jun 2010) $ */
#ifndef __OPENCL_H
#define __OPENCL_H
#ifdef __cplusplus
extern "C" {
#endif
#include <cl.h>
#include <cl_gl.h>
#include <cl_gl_ext.h>
#include <cl_ext.h>
#ifdef __cplusplus
}
#endif
#endif /* __OPENCL_H */
// +build cl12
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"image"
"unsafe"
)
func (ctx *Context) CreateImage(flags MemFlag, imageFormat ImageFormat, imageDesc ImageDescription, data []byte) (*MemObject, error) {
format := imageFormat.toCl()
desc := imageDesc.toCl()
var dataPtr unsafe.Pointer
if data != nil {
dataPtr = unsafe.Pointer(&data[0])
}
var err C.cl_int
clBuffer := C.clCreateImage(ctx.clContext, C.cl_mem_flags(flags), &format, &desc, dataPtr, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
if clBuffer == nil {
return nil, ErrUnknown
}
return newMemObject(clBuffer, len(data)), nil
}
func (ctx *Context) CreateImageSimple(flags MemFlag, width, height int, channelOrder ChannelOrder, channelDataType ChannelDataType, data []byte) (*MemObject, error) {
format := ImageFormat{channelOrder, channelDataType}
desc := ImageDescription{
Type: MemObjectTypeImage2D,
Width: width,
Height: height,
}
return ctx.CreateImage(flags, format, desc, data)
}
func (ctx *Context) CreateImageFromImage(flags MemFlag, img image.Image) (*MemObject, error) {
switch m := img.(type) {
case *image.Gray:
format := ImageFormat{ChannelOrderIntensity, ChannelDataTypeUNormInt8}
desc := ImageDescription{
Type: MemObjectTypeImage2D,
Width: m.Bounds().Dx(),
Height: m.Bounds().Dy(),
RowPitch: m.Stride,
}
return ctx.CreateImage(flags, format, desc, m.Pix)
case *image.RGBA:
format := ImageFormat{ChannelOrderRGBA, ChannelDataTypeUNormInt8}
desc := ImageDescription{
Type: MemObjectTypeImage2D,
Width: m.Bounds().Dx(),
Height: m.Bounds().Dy(),
RowPitch: m.Stride,
}
return ctx.CreateImage(flags, format, desc, m.Pix)
}
b := img.Bounds()
w := b.Dx()
h := b.Dy()
data := make([]byte, w*h*4)
dataOffset := 0
for y := 0; y < h; y++ {
for x := 0; x < w; x++ {
c := img.At(x+b.Min.X, y+b.Min.Y)
r, g, b, a := c.RGBA()
data[dataOffset] = uint8(r >> 8)
data[dataOffset+1] = uint8(g >> 8)
data[dataOffset+2] = uint8(b >> 8)
data[dataOffset+3] = uint8(a >> 8)
dataOffset += 4
}
}
return ctx.CreateImageSimple(flags, w, h, ChannelOrderRGBA, ChannelDataTypeUNormInt8, data)
}
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"fmt"
"unsafe"
)
type ErrUnsupportedArgumentType struct {
Index int
Value interface{}
}
func (e ErrUnsupportedArgumentType) Error() string {
return fmt.Sprintf("cl: unsupported argument type for index %d: %+v", e.Index, e.Value)
}
type Kernel struct {
clKernel C.cl_kernel
name string
}
type LocalBuffer int
func releaseKernel(k *Kernel) {
if k.clKernel != nil {
C.clReleaseKernel(k.clKernel)
k.clKernel = nil
}
}
func (k *Kernel) Release() {
releaseKernel(k)
}
func (k *Kernel) SetArgs(args ...interface{}) error {
for index, arg := range args {
if err := k.SetArg(index, arg); err != nil {
return err
}
}
return nil
}
func (k *Kernel) SetArg(index int, arg interface{}) error {
switch val := arg.(type) {
case uint8:
return k.SetArgUint8(index, val)
case int8:
return k.SetArgInt8(index, val)
case uint32:
return k.SetArgUint32(index, val)
case uint64:
return k.SetArgUint64(index, val)
case int32:
return k.SetArgInt32(index, val)
case float32:
return k.SetArgFloat32(index, val)
case *MemObject:
return k.SetArgBuffer(index, val)
case LocalBuffer:
return k.SetArgLocal(index, int(val))
default:
return ErrUnsupportedArgumentType{Index: index, Value: arg}
}
}
func (k *Kernel) SetArgBuffer(index int, buffer *MemObject) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(buffer.clMem)), unsafe.Pointer(&buffer.clMem))
}
func (k *Kernel) SetArgFloat32(index int, val float32) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgInt8(index int, val int8) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgUint8(index int, val uint8) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgInt32(index int, val int32) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgUint32(index int, val uint32) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgUint64(index int, val uint64) error {
return k.SetArgUnsafe(index, int(unsafe.Sizeof(val)), unsafe.Pointer(&val))
}
func (k *Kernel) SetArgLocal(index int, size int) error {
return k.SetArgUnsafe(index, size, nil)
}
func (k *Kernel) SetArgUnsafe(index, argSize int, arg unsafe.Pointer) error {
//fmt.Println("FUNKY: ", index, argSize)
return toError(C.clSetKernelArg(k.clKernel, C.cl_uint(index), C.size_t(argSize), arg))
}
func (k *Kernel) PreferredWorkGroupSizeMultiple(device *Device) (int, error) {
var size C.size_t
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
return int(size), toError(err)
}
func (k *Kernel) WorkGroupSize(device *Device) (int, error) {
var size C.size_t
err := C.clGetKernelWorkGroupInfo(k.clKernel, device.nullableId(), C.CL_KERNEL_WORK_GROUP_SIZE, C.size_t(unsafe.Sizeof(size)), unsafe.Pointer(&size), nil)
return int(size), toError(err)
}
func (k *Kernel) NumArgs() (int, error) {
var num C.cl_uint
err := C.clGetKernelInfo(k.clKernel, C.CL_KERNEL_NUM_ARGS, C.size_t(unsafe.Sizeof(num)), unsafe.Pointer(&num), nil)
return int(num), toError(err)
}
// +build !cl12
package cl
func (k *Kernel) ArgName(index int) (string, error) {
return "", ErrUnsupported
}
// +build cl12
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import "unsafe"
func (k *Kernel) ArgName(index int) (string, error) {
var strC [1024]byte
var strN C.size_t
if err := C.clGetKernelArgInfo(k.clKernel, C.cl_uint(index), C.CL_KERNEL_ARG_NAME, 1024, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
return "", toError(err)
}
return string(strC[:strN]), nil
}
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import "unsafe"
const maxPlatforms = 32
type Platform struct {
id C.cl_platform_id
}
// Obtain the list of platforms available.
func GetPlatforms() ([]*Platform, error) {
var platformIds [maxPlatforms]C.cl_platform_id
var nPlatforms C.cl_uint
if err := C.clGetPlatformIDs(C.cl_uint(maxPlatforms), &platformIds[0], &nPlatforms); err != C.CL_SUCCESS {
return nil, toError(err)
}
platforms := make([]*Platform, nPlatforms)
for i := 0; i < int(nPlatforms); i++ {
platforms[i] = &Platform{id: platformIds[i]}
}
return platforms, nil
}
func (p *Platform) GetDevices(deviceType DeviceType) ([]*Device, error) {
return GetDevices(p, deviceType)
}
func (p *Platform) getInfoString(param C.cl_platform_info) (string, error) {
var strC [2048]byte
var strN C.size_t
if err := C.clGetPlatformInfo(p.id, param, 2048, unsafe.Pointer(&strC[0]), &strN); err != C.CL_SUCCESS {
return "", toError(err)
}
return string(strC[:(strN - 1)]), nil
}
func (p *Platform) Name() string {
if str, err := p.getInfoString(C.CL_PLATFORM_NAME); err != nil {
panic("Platform.Name() should never fail")
} else {
return str
}
}
func (p *Platform) Vendor() string {
if str, err := p.getInfoString(C.CL_PLATFORM_VENDOR); err != nil {
panic("Platform.Vendor() should never fail")
} else {
return str
}
}
func (p *Platform) Profile() string {
if str, err := p.getInfoString(C.CL_PLATFORM_PROFILE); err != nil {
panic("Platform.Profile() should never fail")
} else {
return str
}
}
func (p *Platform) Version() string {
if str, err := p.getInfoString(C.CL_PLATFORM_VERSION); err != nil {
panic("Platform.Version() should never fail")
} else {
return str
}
}
func (p *Platform) Extensions() string {
if str, err := p.getInfoString(C.CL_PLATFORM_EXTENSIONS); err != nil {
panic("Platform.Extensions() should never fail")
} else {
return str
}
}
package cl
// #include <stdlib.h>
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"fmt"
"runtime"
"unsafe"
)
type BuildError struct {
Message string
Device *Device
}
func (e BuildError) Error() string {
if e.Device != nil {
return fmt.Sprintf("cl: build error on %q: %s", e.Device.Name(), e.Message)
} else {
return fmt.Sprintf("cl: build error: %s", e.Message)
}
}
type Program struct {
clProgram C.cl_program
devices []*Device
}
func releaseProgram(p *Program) {
if p.clProgram != nil {
C.clReleaseProgram(p.clProgram)
p.clProgram = nil
}
}
func (p *Program) Release() {
releaseProgram(p)
}
func (p *Program) BuildProgram(devices []*Device, options string) error {
var cOptions *C.char
if options != "" {
cOptions = C.CString(options)
defer C.free(unsafe.Pointer(cOptions))
}
var deviceList []C.cl_device_id
var deviceListPtr *C.cl_device_id
numDevices := C.cl_uint(len(devices))
if devices != nil && len(devices) > 0 {
deviceList = buildDeviceIdList(devices)
deviceListPtr = &deviceList[0]
}
if err := C.clBuildProgram(p.clProgram, numDevices, deviceListPtr, cOptions, nil, nil); err != C.CL_SUCCESS {
buffer := make([]byte, 4096)
var bLen C.size_t
var err C.cl_int
for _, dev := range p.devices {
for i := 2; i >= 0; i-- {
err = C.clGetProgramBuildInfo(p.clProgram, dev.id, C.CL_PROGRAM_BUILD_LOG, C.size_t(len(buffer)), unsafe.Pointer(&buffer[0]), &bLen)
if err == C.CL_INVALID_VALUE && i > 0 && bLen < 1024*1024 {
// INVALID_VALUE probably means our buffer isn't large enough
buffer = make([]byte, bLen)
} else {
break
}
}
if err != C.CL_SUCCESS {
return toError(err)
}
if bLen > 1 {
return BuildError{
Device: dev,
Message: string(buffer[:bLen-1]),
}
}
}
return BuildError{
Device: nil,
Message: "build failed and produced no log entries",
}
}
return nil
}
func (p *Program) CreateKernel(name string) (*Kernel, error) {
cName := C.CString(name)
defer C.free(unsafe.Pointer(cName))
var err C.cl_int
clKernel := C.clCreateKernel(p.clProgram, cName, &err)
if err != C.CL_SUCCESS {
return nil, toError(err)
}
kernel := &Kernel{clKernel: clKernel, name: name}
runtime.SetFinalizer(kernel, releaseKernel)
return kernel, nil
}
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import "unsafe"
type CommandQueueProperty int
const (
CommandQueueOutOfOrderExecModeEnable CommandQueueProperty = C.CL_QUEUE_OUT_OF_ORDER_EXEC_MODE_ENABLE
CommandQueueProfilingEnable CommandQueueProperty = C.CL_QUEUE_PROFILING_ENABLE
)
type CommandQueue struct {
clQueue C.cl_command_queue
device *Device
}
func releaseCommandQueue(q *CommandQueue) {
if q.clQueue != nil {
C.clReleaseCommandQueue(q.clQueue)
q.clQueue = nil
}
}
// Call clReleaseCommandQueue on the CommandQueue. Using the CommandQueue after Release will cause a panick.
func (q *CommandQueue) Release() {
releaseCommandQueue(q)
}
// Blocks until all previously queued OpenCL commands in a command-queue are issued to the associated device and have completed.
func (q *CommandQueue) Finish() error {
return toError(C.clFinish(q.clQueue))
}
// Issues all previously queued OpenCL commands in a command-queue to the device associated with the command-queue.
func (q *CommandQueue) Flush() error {
return toError(C.clFlush(q.clQueue))
}
// Enqueues a command to map a region of the buffer object given by buffer into the host address space and returns a pointer to this mapped region.
func (q *CommandQueue) EnqueueMapBuffer(buffer *MemObject, blocking bool, flags MapFlag, offset, size int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
var event C.cl_event
var err C.cl_int
ptr := C.clEnqueueMapBuffer(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
if err != C.CL_SUCCESS {
return nil, nil, toError(err)
}
ev := newEvent(event)
if ptr == nil {
return nil, ev, ErrUnknown
}
return &MappedMemObject{ptr: ptr, size: size}, ev, nil
}
// Enqueues a command to map a region of an image object into the host address space and returns a pointer to this mapped region.
func (q *CommandQueue) EnqueueMapImage(buffer *MemObject, blocking bool, flags MapFlag, origin, region [3]int, eventWaitList []*Event) (*MappedMemObject, *Event, error) {
cOrigin := sizeT3(origin)
cRegion := sizeT3(region)
var event C.cl_event
var err C.cl_int
var rowPitch, slicePitch C.size_t
ptr := C.clEnqueueMapImage(q.clQueue, buffer.clMem, clBool(blocking), flags.toCl(), &cOrigin[0], &cRegion[0], &rowPitch, &slicePitch, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event, &err)
if err != C.CL_SUCCESS {
return nil, nil, toError(err)
}
ev := newEvent(event)
if ptr == nil {
return nil, ev, ErrUnknown
}
size := 0 // TODO: could calculate this
return &MappedMemObject{ptr: ptr, size: size, rowPitch: int(rowPitch), slicePitch: int(slicePitch)}, ev, nil
}
// Enqueues a command to unmap a previously mapped region of a memory object.
func (q *CommandQueue) EnqueueUnmapMemObject(buffer *MemObject, mappedObj *MappedMemObject, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
if err := C.clEnqueueUnmapMemObject(q.clQueue, buffer.clMem, mappedObj.ptr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event); err != C.CL_SUCCESS {
return nil, toError(err)
}
return newEvent(event), nil
}
// Enqueues a command to copy a buffer object to another buffer object.
func (q *CommandQueue) EnqueueCopyBuffer(srcBuffer, dstBuffer *MemObject, srcOffset, dstOffset, byteCount int, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueCopyBuffer(q.clQueue, srcBuffer.clMem, dstBuffer.clMem, C.size_t(srcOffset), C.size_t(dstOffset), C.size_t(byteCount), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// Enqueue commands to write to a buffer object from host memory.
func (q *CommandQueue) EnqueueWriteBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueWriteBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
func (q *CommandQueue) EnqueueWriteBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
dataPtr := unsafe.Pointer(&data[0])
dataSize := int(unsafe.Sizeof(data[0])) * len(data)
return q.EnqueueWriteBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
}
// Enqueue commands to read from a buffer object to host memory.
func (q *CommandQueue) EnqueueReadBuffer(buffer *MemObject, blocking bool, offset, dataSize int, dataPtr unsafe.Pointer, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueReadBuffer(q.clQueue, buffer.clMem, clBool(blocking), C.size_t(offset), C.size_t(dataSize), dataPtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
func (q *CommandQueue) EnqueueReadBufferFloat32(buffer *MemObject, blocking bool, offset int, data []float32, eventWaitList []*Event) (*Event, error) {
dataPtr := unsafe.Pointer(&data[0])
dataSize := int(unsafe.Sizeof(data[0])) * len(data)
return q.EnqueueReadBuffer(buffer, blocking, offset, dataSize, dataPtr, eventWaitList)
}
// Enqueues a command to execute a kernel on a device.
func (q *CommandQueue) EnqueueNDRangeKernel(kernel *Kernel, globalWorkOffset, globalWorkSize, localWorkSize []int, eventWaitList []*Event) (*Event, error) {
workDim := len(globalWorkSize)
var globalWorkOffsetList []C.size_t
var globalWorkOffsetPtr *C.size_t
if globalWorkOffset != nil {
globalWorkOffsetList = make([]C.size_t, len(globalWorkOffset))
for i, off := range globalWorkOffset {
globalWorkOffsetList[i] = C.size_t(off)
}
globalWorkOffsetPtr = &globalWorkOffsetList[0]
}
var globalWorkSizeList []C.size_t
var globalWorkSizePtr *C.size_t
if globalWorkSize != nil {
globalWorkSizeList = make([]C.size_t, len(globalWorkSize))
for i, off := range globalWorkSize {
globalWorkSizeList[i] = C.size_t(off)
}
globalWorkSizePtr = &globalWorkSizeList[0]
}
var localWorkSizeList []C.size_t
var localWorkSizePtr *C.size_t
if localWorkSize != nil {
localWorkSizeList = make([]C.size_t, len(localWorkSize))
for i, off := range localWorkSize {
localWorkSizeList[i] = C.size_t(off)
}
localWorkSizePtr = &localWorkSizeList[0]
}
var event C.cl_event
err := toError(C.clEnqueueNDRangeKernel(q.clQueue, kernel.clKernel, C.cl_uint(workDim), globalWorkOffsetPtr, globalWorkSizePtr, localWorkSizePtr, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// Enqueues a command to read from a 2D or 3D image object to host memory.
func (q *CommandQueue) EnqueueReadImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
cOrigin := sizeT3(origin)
cRegion := sizeT3(region)
var event C.cl_event
err := toError(C.clEnqueueReadImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// Enqueues a command to write from a 2D or 3D image object to host memory.
func (q *CommandQueue) EnqueueWriteImage(image *MemObject, blocking bool, origin, region [3]int, rowPitch, slicePitch int, data []byte, eventWaitList []*Event) (*Event, error) {
cOrigin := sizeT3(origin)
cRegion := sizeT3(region)
var event C.cl_event
err := toError(C.clEnqueueWriteImage(q.clQueue, image.clMem, clBool(blocking), &cOrigin[0], &cRegion[0], C.size_t(rowPitch), C.size_t(slicePitch), unsafe.Pointer(&data[0]), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
func (q *CommandQueue) EnqueueFillBuffer(buffer *MemObject, pattern unsafe.Pointer, patternSize, offset, size int, eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueFillBuffer(q.clQueue, buffer.clMem, pattern, C.size_t(patternSize), C.size_t(offset), C.size_t(size), C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// A synchronization point that enqueues a barrier operation.
func (q *CommandQueue) EnqueueBarrierWithWaitList(eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueBarrierWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
// Enqueues a marker command which waits for either a list of events to complete, or all previously enqueued commands to complete.
func (q *CommandQueue) EnqueueMarkerWithWaitList(eventWaitList []*Event) (*Event, error) {
var event C.cl_event
err := toError(C.clEnqueueMarkerWithWaitList(q.clQueue, C.cl_uint(len(eventWaitList)), eventListPtr(eventWaitList), &event))
return newEvent(event), err
}
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
import (
"errors"
"fmt"
"reflect"
"runtime"
"strings"
"unsafe"
)
var (
ErrUnknown = errors.New("cl: unknown error") // Generally an unexpected result from an OpenCL function (e.g. CL_SUCCESS but null pointer)
)
type ErrOther int
func (e ErrOther) Error() string {
return fmt.Sprintf("cl: error %d", int(e))
}
var (
ErrDeviceNotFound = errors.New("cl: Device Not Found")
ErrDeviceNotAvailable = errors.New("cl: Device Not Available")
ErrCompilerNotAvailable = errors.New("cl: Compiler Not Available")
ErrMemObjectAllocationFailure = errors.New("cl: Mem Object Allocation Failure")
ErrOutOfResources = errors.New("cl: Out Of Resources")
ErrOutOfHostMemory = errors.New("cl: Out Of Host Memory")
ErrProfilingInfoNotAvailable = errors.New("cl: Profiling Info Not Available")
ErrMemCopyOverlap = errors.New("cl: Mem Copy Overlap")
ErrImageFormatMismatch = errors.New("cl: Image Format Mismatch")
ErrImageFormatNotSupported = errors.New("cl: Image Format Not Supported")
ErrBuildProgramFailure = errors.New("cl: Build Program Failure")
ErrMapFailure = errors.New("cl: Map Failure")
ErrMisalignedSubBufferOffset = errors.New("cl: Misaligned Sub Buffer Offset")
ErrExecStatusErrorForEventsInWaitList = errors.New("cl: Exec Status Error For Events In Wait List")
ErrCompileProgramFailure = errors.New("cl: Compile Program Failure")
ErrLinkerNotAvailable = errors.New("cl: Linker Not Available")
ErrLinkProgramFailure = errors.New("cl: Link Program Failure")
ErrDevicePartitionFailed = errors.New("cl: Device Partition Failed")
ErrKernelArgInfoNotAvailable = errors.New("cl: Kernel Arg Info Not Available")
ErrInvalidValue = errors.New("cl: Invalid Value")
ErrInvalidDeviceType = errors.New("cl: Invalid Device Type")
ErrInvalidPlatform = errors.New("cl: Invalid Platform")
ErrInvalidDevice = errors.New("cl: Invalid Device")
ErrInvalidContext = errors.New("cl: Invalid Context")
ErrInvalidQueueProperties = errors.New("cl: Invalid Queue Properties")
ErrInvalidCommandQueue = errors.New("cl: Invalid Command Queue")
ErrInvalidHostPtr = errors.New("cl: Invalid Host Ptr")
ErrInvalidMemObject = errors.New("cl: Invalid Mem Object")
ErrInvalidImageFormatDescriptor = errors.New("cl: Invalid Image Format Descriptor")
ErrInvalidImageSize = errors.New("cl: Invalid Image Size")
ErrInvalidSampler = errors.New("cl: Invalid Sampler")
ErrInvalidBinary = errors.New("cl: Invalid Binary")
ErrInvalidBuildOptions = errors.New("cl: Invalid Build Options")
ErrInvalidProgram = errors.New("cl: Invalid Program")
ErrInvalidProgramExecutable = errors.New("cl: Invalid Program Executable")
ErrInvalidKernelName = errors.New("cl: Invalid Kernel Name")
ErrInvalidKernelDefinition = errors.New("cl: Invalid Kernel Definition")
ErrInvalidKernel = errors.New("cl: Invalid Kernel")
ErrInvalidArgIndex = errors.New("cl: Invalid Arg Index")
ErrInvalidArgValue = errors.New("cl: Invalid Arg Value")
ErrInvalidArgSize = errors.New("cl: Invalid Arg Size")
ErrInvalidKernelArgs = errors.New("cl: Invalid Kernel Args")
ErrInvalidWorkDimension = errors.New("cl: Invalid Work Dimension")
ErrInvalidWorkGroupSize = errors.New("cl: Invalid Work Group Size")
ErrInvalidWorkItemSize = errors.New("cl: Invalid Work Item Size")
ErrInvalidGlobalOffset = errors.New("cl: Invalid Global Offset")
ErrInvalidEventWaitList = errors.New("cl: Invalid Event Wait List")
ErrInvalidEvent = errors.New("cl: Invalid Event")
ErrInvalidOperation = errors.New("cl: Invalid Operation")
ErrInvalidGlObject = errors.New("cl: Invalid Gl Object")
ErrInvalidBufferSize = errors.New("cl: Invalid Buffer Size")
ErrInvalidMipLevel = errors.New("cl: Invalid Mip Level")
ErrInvalidGlobalWorkSize = errors.New("cl: Invalid Global Work Size")
ErrInvalidProperty = errors.New("cl: Invalid Property")
ErrInvalidImageDescriptor = errors.New("cl: Invalid Image Descriptor")
ErrInvalidCompilerOptions = errors.New("cl: Invalid Compiler Options")
ErrInvalidLinkerOptions = errors.New("cl: Invalid Linker Options")
ErrInvalidDevicePartitionCount = errors.New("cl: Invalid Device Partition Count")
)
var errorMap = map[C.cl_int]error{
C.CL_SUCCESS: nil,
C.CL_DEVICE_NOT_FOUND: ErrDeviceNotFound,
C.CL_DEVICE_NOT_AVAILABLE: ErrDeviceNotAvailable,
C.CL_COMPILER_NOT_AVAILABLE: ErrCompilerNotAvailable,
C.CL_MEM_OBJECT_ALLOCATION_FAILURE: ErrMemObjectAllocationFailure,
C.CL_OUT_OF_RESOURCES: ErrOutOfResources,
C.CL_OUT_OF_HOST_MEMORY: ErrOutOfHostMemory,
C.CL_PROFILING_INFO_NOT_AVAILABLE: ErrProfilingInfoNotAvailable,
C.CL_MEM_COPY_OVERLAP: ErrMemCopyOverlap,
C.CL_IMAGE_FORMAT_MISMATCH: ErrImageFormatMismatch,
C.CL_IMAGE_FORMAT_NOT_SUPPORTED: ErrImageFormatNotSupported,
C.CL_BUILD_PROGRAM_FAILURE: ErrBuildProgramFailure,
C.CL_MAP_FAILURE: ErrMapFailure,
C.CL_MISALIGNED_SUB_BUFFER_OFFSET: ErrMisalignedSubBufferOffset,
C.CL_EXEC_STATUS_ERROR_FOR_EVENTS_IN_WAIT_LIST: ErrExecStatusErrorForEventsInWaitList,
C.CL_INVALID_VALUE: ErrInvalidValue,
C.CL_INVALID_DEVICE_TYPE: ErrInvalidDeviceType,
C.CL_INVALID_PLATFORM: ErrInvalidPlatform,
C.CL_INVALID_DEVICE: ErrInvalidDevice,
C.CL_INVALID_CONTEXT: ErrInvalidContext,
C.CL_INVALID_QUEUE_PROPERTIES: ErrInvalidQueueProperties,
C.CL_INVALID_COMMAND_QUEUE: ErrInvalidCommandQueue,
C.CL_INVALID_HOST_PTR: ErrInvalidHostPtr,
C.CL_INVALID_MEM_OBJECT: ErrInvalidMemObject,
C.CL_INVALID_IMAGE_FORMAT_DESCRIPTOR: ErrInvalidImageFormatDescriptor,
C.CL_INVALID_IMAGE_SIZE: ErrInvalidImageSize,
C.CL_INVALID_SAMPLER: ErrInvalidSampler,
C.CL_INVALID_BINARY: ErrInvalidBinary,
C.CL_INVALID_BUILD_OPTIONS: ErrInvalidBuildOptions,
C.CL_INVALID_PROGRAM: ErrInvalidProgram,
C.CL_INVALID_PROGRAM_EXECUTABLE: ErrInvalidProgramExecutable,
C.CL_INVALID_KERNEL_NAME: ErrInvalidKernelName,
C.CL_INVALID_KERNEL_DEFINITION: ErrInvalidKernelDefinition,
C.CL_INVALID_KERNEL: ErrInvalidKernel,
C.CL_INVALID_ARG_INDEX: ErrInvalidArgIndex,
C.CL_INVALID_ARG_VALUE: ErrInvalidArgValue,
C.CL_INVALID_ARG_SIZE: ErrInvalidArgSize,
C.CL_INVALID_KERNEL_ARGS: ErrInvalidKernelArgs,
C.CL_INVALID_WORK_DIMENSION: ErrInvalidWorkDimension,
C.CL_INVALID_WORK_GROUP_SIZE: ErrInvalidWorkGroupSize,
C.CL_INVALID_WORK_ITEM_SIZE: ErrInvalidWorkItemSize,
C.CL_INVALID_GLOBAL_OFFSET: ErrInvalidGlobalOffset,
C.CL_INVALID_EVENT_WAIT_LIST: ErrInvalidEventWaitList,
C.CL_INVALID_EVENT: ErrInvalidEvent,
C.CL_INVALID_OPERATION: ErrInvalidOperation,
C.CL_INVALID_GL_OBJECT: ErrInvalidGlObject,
C.CL_INVALID_BUFFER_SIZE: ErrInvalidBufferSize,
C.CL_INVALID_MIP_LEVEL: ErrInvalidMipLevel,
C.CL_INVALID_GLOBAL_WORK_SIZE: ErrInvalidGlobalWorkSize,
C.CL_INVALID_PROPERTY: ErrInvalidProperty,
}
func toError(code C.cl_int) error {
if err, ok := errorMap[code]; ok {
return err
}
return ErrOther(code)
}
type LocalMemType int
const (
LocalMemTypeNone LocalMemType = C.CL_NONE
LocalMemTypeGlobal LocalMemType = C.CL_GLOBAL
LocalMemTypeLocal LocalMemType = C.CL_LOCAL
)
var localMemTypeMap = map[LocalMemType]string{
LocalMemTypeNone: "None",
LocalMemTypeGlobal: "Global",
LocalMemTypeLocal: "Local",
}
func (t LocalMemType) String() string {
name := localMemTypeMap[t]
if name == "" {
name = "Unknown"
}
return name
}
type ExecCapability int
const (
ExecCapabilityKernel ExecCapability = C.CL_EXEC_KERNEL // The OpenCL device can execute OpenCL kernels.
ExecCapabilityNativeKernel ExecCapability = C.CL_EXEC_NATIVE_KERNEL // The OpenCL device can execute native kernels.
)
func (ec ExecCapability) String() string {
var parts []string
if ec&ExecCapabilityKernel != 0 {
parts = append(parts, "Kernel")
}
if ec&ExecCapabilityNativeKernel != 0 {
parts = append(parts, "NativeKernel")
}
if parts == nil {
return ""
}
return strings.Join(parts, "|")
}
type MemCacheType int
const (
MemCacheTypeNone MemCacheType = C.CL_NONE
MemCacheTypeReadOnlyCache MemCacheType = C.CL_READ_ONLY_CACHE
MemCacheTypeReadWriteCache MemCacheType = C.CL_READ_WRITE_CACHE
)
func (ct MemCacheType) String() string {
switch ct {
case MemCacheTypeNone:
return "None"
case MemCacheTypeReadOnlyCache:
return "ReadOnly"
case MemCacheTypeReadWriteCache:
return "ReadWrite"
}
return fmt.Sprintf("Unknown(%x)", int(ct))
}
type MemFlag int
const (
MemReadWrite MemFlag = C.CL_MEM_READ_WRITE
MemWriteOnly MemFlag = C.CL_MEM_WRITE_ONLY
MemReadOnly MemFlag = C.CL_MEM_READ_ONLY
MemUseHostPtr MemFlag = C.CL_MEM_USE_HOST_PTR
MemAllocHostPtr MemFlag = C.CL_MEM_ALLOC_HOST_PTR
MemCopyHostPtr MemFlag = C.CL_MEM_COPY_HOST_PTR
MemWriteOnlyHost MemFlag = C.CL_MEM_HOST_WRITE_ONLY
MemReadOnlyHost MemFlag = C.CL_MEM_HOST_READ_ONLY
MemNoAccessHost MemFlag = C.CL_MEM_HOST_NO_ACCESS
)
type MemObjectType int
const (
MemObjectTypeBuffer MemObjectType = C.CL_MEM_OBJECT_BUFFER
MemObjectTypeImage2D MemObjectType = C.CL_MEM_OBJECT_IMAGE2D
MemObjectTypeImage3D MemObjectType = C.CL_MEM_OBJECT_IMAGE3D
)
type MapFlag int
const (
// This flag specifies that the region being mapped in the memory object is being mapped for reading.
MapFlagRead MapFlag = C.CL_MAP_READ
MapFlagWrite MapFlag = C.CL_MAP_WRITE
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
)
func (mf MapFlag) toCl() C.cl_map_flags {
return C.cl_map_flags(mf)
}
type ChannelOrder int
const (
ChannelOrderR ChannelOrder = C.CL_R
ChannelOrderA ChannelOrder = C.CL_A
ChannelOrderRG ChannelOrder = C.CL_RG
ChannelOrderRA ChannelOrder = C.CL_RA
ChannelOrderRGB ChannelOrder = C.CL_RGB
ChannelOrderRGBA ChannelOrder = C.CL_RGBA
ChannelOrderBGRA ChannelOrder = C.CL_BGRA
ChannelOrderARGB ChannelOrder = C.CL_ARGB
ChannelOrderIntensity ChannelOrder = C.CL_INTENSITY
ChannelOrderLuminance ChannelOrder = C.CL_LUMINANCE
ChannelOrderRx ChannelOrder = C.CL_Rx
ChannelOrderRGx ChannelOrder = C.CL_RGx
ChannelOrderRGBx ChannelOrder = C.CL_RGBx
)
var channelOrderNameMap = map[ChannelOrder]string{
ChannelOrderR: "R",
ChannelOrderA: "A",
ChannelOrderRG: "RG",
ChannelOrderRA: "RA",
ChannelOrderRGB: "RGB",
ChannelOrderRGBA: "RGBA",
ChannelOrderBGRA: "BGRA",
ChannelOrderARGB: "ARGB",
ChannelOrderIntensity: "Intensity",
ChannelOrderLuminance: "Luminance",
ChannelOrderRx: "Rx",
ChannelOrderRGx: "RGx",
ChannelOrderRGBx: "RGBx",
}
func (co ChannelOrder) String() string {
name := channelOrderNameMap[co]
if name == "" {
name = fmt.Sprintf("Unknown(%x)", int(co))
}
return name
}
type ChannelDataType int
const (
ChannelDataTypeSNormInt8 ChannelDataType = C.CL_SNORM_INT8
ChannelDataTypeSNormInt16 ChannelDataType = C.CL_SNORM_INT16
ChannelDataTypeUNormInt8 ChannelDataType = C.CL_UNORM_INT8
ChannelDataTypeUNormInt16 ChannelDataType = C.CL_UNORM_INT16
ChannelDataTypeUNormShort565 ChannelDataType = C.CL_UNORM_SHORT_565
ChannelDataTypeUNormShort555 ChannelDataType = C.CL_UNORM_SHORT_555
ChannelDataTypeUNormInt101010 ChannelDataType = C.CL_UNORM_INT_101010
ChannelDataTypeSignedInt8 ChannelDataType = C.CL_SIGNED_INT8
ChannelDataTypeSignedInt16 ChannelDataType = C.CL_SIGNED_INT16
ChannelDataTypeSignedInt32 ChannelDataType = C.CL_SIGNED_INT32
ChannelDataTypeUnsignedInt8 ChannelDataType = C.CL_UNSIGNED_INT8
ChannelDataTypeUnsignedInt16 ChannelDataType = C.CL_UNSIGNED_INT16
ChannelDataTypeUnsignedInt32 ChannelDataType = C.CL_UNSIGNED_INT32
ChannelDataTypeHalfFloat ChannelDataType = C.CL_HALF_FLOAT
ChannelDataTypeFloat ChannelDataType = C.CL_FLOAT
)
var channelDataTypeNameMap = map[ChannelDataType]string{
ChannelDataTypeSNormInt8: "SNormInt8",
ChannelDataTypeSNormInt16: "SNormInt16",
ChannelDataTypeUNormInt8: "UNormInt8",
ChannelDataTypeUNormInt16: "UNormInt16",
ChannelDataTypeUNormShort565: "UNormShort565",
ChannelDataTypeUNormShort555: "UNormShort555",
ChannelDataTypeUNormInt101010: "UNormInt101010",
ChannelDataTypeSignedInt8: "SignedInt8",
ChannelDataTypeSignedInt16: "SignedInt16",
ChannelDataTypeSignedInt32: "SignedInt32",
ChannelDataTypeUnsignedInt8: "UnsignedInt8",
ChannelDataTypeUnsignedInt16: "UnsignedInt16",
ChannelDataTypeUnsignedInt32: "UnsignedInt32",
ChannelDataTypeHalfFloat: "HalfFloat",
ChannelDataTypeFloat: "Float",
}
func (ct ChannelDataType) String() string {
name := channelDataTypeNameMap[ct]
if name == "" {
name = fmt.Sprintf("Unknown(%x)", int(ct))
}
return name
}
type ImageFormat struct {
ChannelOrder ChannelOrder
ChannelDataType ChannelDataType
}
func (f ImageFormat) toCl() C.cl_image_format {
var format C.cl_image_format
format.image_channel_order = C.cl_channel_order(f.ChannelOrder)
format.image_channel_data_type = C.cl_channel_type(f.ChannelDataType)
return format
}
type ProfilingInfo int
const (
// A 64-bit value that describes the current device time counter in
// nanoseconds when the command identified by event is enqueued in
// a command-queue by the host.
ProfilingInfoCommandQueued ProfilingInfo = C.CL_PROFILING_COMMAND_QUEUED
// A 64-bit value that describes the current device time counter in
// nanoseconds when the command identified by event that has been
// enqueued is submitted by the host to the device associated with the command-queue.
ProfilingInfoCommandSubmit ProfilingInfo = C.CL_PROFILING_COMMAND_SUBMIT
// A 64-bit value that describes the current device time counter in
// nanoseconds when the command identified by event starts execution on the device.
ProfilingInfoCommandStart ProfilingInfo = C.CL_PROFILING_COMMAND_START
// A 64-bit value that describes the current device time counter in
// nanoseconds when the command identified by event has finished
// execution on the device.
ProfilingInfoCommandEnd ProfilingInfo = C.CL_PROFILING_COMMAND_END
)
type CommmandExecStatus int
const (
CommmandExecStatusComplete CommmandExecStatus = C.CL_COMPLETE
CommmandExecStatusRunning CommmandExecStatus = C.CL_RUNNING
CommmandExecStatusSubmitted CommmandExecStatus = C.CL_SUBMITTED
CommmandExecStatusQueued CommmandExecStatus = C.CL_QUEUED
)
type Event struct {
clEvent C.cl_event
}
func releaseEvent(ev *Event) {
if ev.clEvent != nil {
C.clReleaseEvent(ev.clEvent)
ev.clEvent = nil
}
}
func (e *Event) Release() {
releaseEvent(e)
}
func (e *Event) GetEventProfilingInfo(paramName ProfilingInfo) (int64, error) {
var paramValue C.cl_ulong
if err := C.clGetEventProfilingInfo(e.clEvent, C.cl_profiling_info(paramName), C.size_t(unsafe.Sizeof(paramValue)), unsafe.Pointer(&paramValue), nil); err != C.CL_SUCCESS {
return 0, toError(err)
}
return int64(paramValue), nil
}
// Sets the execution status of a user event object.
//
// `status` specifies the new execution status to be set and
// can be CL_COMPLETE or a negative integer value to indicate
// an error. A negative integer value causes all enqueued commands
// that wait on this user event to be terminated. clSetUserEventStatus
// can only be called once to change the execution status of event.
func (e *Event) SetUserEventStatus(status int) error {
return toError(C.clSetUserEventStatus(e.clEvent, C.cl_int(status)))
}
// Waits on the host thread for commands identified by event objects in
// events to complete. A command is considered complete if its execution
// status is CL_COMPLETE or a negative value. The events specified in
// event_list act as synchronization points.
//
// If the cl_khr_gl_event extension is enabled, event objects can also be
// used to reflect the status of an OpenGL sync object. The sync object
// in turn refers to a fence command executing in an OpenGL command
// stream. This provides another method of coordinating sharing of buffers
// and images between OpenGL and OpenCL.
func WaitForEvents(events []*Event) error {
return toError(C.clWaitForEvents(C.cl_uint(len(events)), eventListPtr(events)))
}
func newEvent(clEvent C.cl_event) *Event {
ev := &Event{clEvent: clEvent}
runtime.SetFinalizer(ev, releaseEvent)
return ev
}
func eventListPtr(el []*Event) *C.cl_event {
if el == nil {
return nil
}
elist := make([]C.cl_event, len(el))
for i, e := range el {
elist[i] = e.clEvent
}
return (*C.cl_event)(&elist[0])
}
func clBool(b bool) C.cl_bool {
if b {
return C.CL_TRUE
}
return C.CL_FALSE
}
func sizeT3(i3 [3]int) [3]C.size_t {
var val [3]C.size_t
val[0] = C.size_t(i3[0])
val[1] = C.size_t(i3[1])
val[2] = C.size_t(i3[2])
return val
}
type MappedMemObject struct {
ptr unsafe.Pointer
size int
rowPitch int
slicePitch int
}
func (mb *MappedMemObject) ByteSlice() []byte {
var byteSlice []byte
sliceHeader := (*reflect.SliceHeader)(unsafe.Pointer(&byteSlice))
sliceHeader.Cap = mb.size
sliceHeader.Len = mb.size
sliceHeader.Data = uintptr(mb.ptr)
return byteSlice
}
func (mb *MappedMemObject) Ptr() unsafe.Pointer {
return mb.ptr
}
func (mb *MappedMemObject) Size() int {
return mb.size
}
func (mb *MappedMemObject) RowPitch() int {
return mb.rowPitch
}
func (mb *MappedMemObject) SlicePitch() int {
return mb.slicePitch
}
// +build cl12
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
const (
ChannelDataTypeUNormInt24 ChannelDataType = C.CL_UNORM_INT24
ChannelOrderDepth ChannelOrder = C.CL_DEPTH
ChannelOrderDepthStencil ChannelOrder = C.CL_DEPTH_STENCIL
MemHostNoAccess MemFlag = C.CL_MEM_HOST_NO_ACCESS // OpenCL 1.2
MemHostReadOnly MemFlag = C.CL_MEM_HOST_READ_ONLY // OpenCL 1.2
MemHostWriteOnly MemFlag = C.CL_MEM_HOST_WRITE_ONLY // OpenCL 1.2
MemObjectTypeImage1D MemObjectType = C.CL_MEM_OBJECT_IMAGE1D
MemObjectTypeImage1DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_ARRAY
MemObjectTypeImage1DBuffer MemObjectType = C.CL_MEM_OBJECT_IMAGE1D_BUFFER
MemObjectTypeImage2DArray MemObjectType = C.CL_MEM_OBJECT_IMAGE2D_ARRAY
// This flag specifies that the region being mapped in the memory object is being mapped for writing.
//
// The contents of the region being mapped are to be discarded. This is typically the case when the
// region being mapped is overwritten by the host. This flag allows the implementation to no longer
// guarantee that the pointer returned by clEnqueueMapBuffer or clEnqueueMapImage contains the
// latest bits in the region being mapped which can be a significant performance enhancement.
MapFlagWriteInvalidateRegion MapFlag = C.CL_MAP_WRITE_INVALIDATE_REGION
)
func init() {
errorMap[C.CL_COMPILE_PROGRAM_FAILURE] = ErrCompileProgramFailure
errorMap[C.CL_DEVICE_PARTITION_FAILED] = ErrDevicePartitionFailed
errorMap[C.CL_INVALID_COMPILER_OPTIONS] = ErrInvalidCompilerOptions
errorMap[C.CL_INVALID_DEVICE_PARTITION_COUNT] = ErrInvalidDevicePartitionCount
errorMap[C.CL_INVALID_IMAGE_DESCRIPTOR] = ErrInvalidImageDescriptor
errorMap[C.CL_INVALID_LINKER_OPTIONS] = ErrInvalidLinkerOptions
errorMap[C.CL_KERNEL_ARG_INFO_NOT_AVAILABLE] = ErrKernelArgInfoNotAvailable
errorMap[C.CL_LINK_PROGRAM_FAILURE] = ErrLinkProgramFailure
errorMap[C.CL_LINKER_NOT_AVAILABLE] = ErrLinkerNotAvailable
channelOrderNameMap[ChannelOrderDepth] = "Depth"
channelOrderNameMap[ChannelOrderDepthStencil] = "DepthStencil"
channelDataTypeNameMap[ChannelDataTypeUNormInt24] = "UNormInt24"
}
type ImageDescription struct {
Type MemObjectType
Width, Height, Depth int
ArraySize, RowPitch, SlicePitch int
NumMipLevels, NumSamples int
Buffer *MemObject
}
func (d ImageDescription) toCl() C.cl_image_desc {
var desc C.cl_image_desc
desc.image_type = C.cl_mem_object_type(d.Type)
desc.image_width = C.size_t(d.Width)
desc.image_height = C.size_t(d.Height)
desc.image_depth = C.size_t(d.Depth)
desc.image_array_size = C.size_t(d.ArraySize)
desc.image_row_pitch = C.size_t(d.RowPitch)
desc.image_slice_pitch = C.size_t(d.SlicePitch)
desc.num_mip_levels = C.cl_uint(d.NumMipLevels)
desc.num_samples = C.cl_uint(d.NumSamples)
desc.buffer = nil
if d.Buffer != nil {
desc.buffer = d.Buffer.clMem
}
return desc
}
package cl
// #ifdef __APPLE__
// #include "OpenCL/opencl.h"
// #else
// #include "cl.h"
// #endif
import "C"
// Extension: cl_APPLE_fixed_alpha_channel_orders
//
// These selectors may be passed to clCreateImage2D() in the cl_image_format.image_channel_order field.
// They are like CL_BGRA and CL_ARGB except that the alpha channel to be ignored. On calls to read_imagef,
// the alpha will be 0xff (1.0f) if the sample falls in the image and 0 if it does not fall in the image.
// On calls to write_imagef, the alpha value is ignored and 0xff (1.0f) is written. These formats are
// currently only available for the CL_UNORM_INT8 cl_channel_type. They are intended to support legacy
// image formats.
const (
ChannelOrder1RGBApple ChannelOrder = C.CL_1RGB_APPLE // Introduced in MacOS X.7.
ChannelOrderBGR1Apple ChannelOrder = C.CL_BGR1_APPLE // Introduced in MacOS X.7.
)
// Extension: cl_APPLE_biased_fixed_point_image_formats
//
// This selector may be passed to clCreateImage2D() in the cl_image_format.image_channel_data_type field.
// It defines a biased signed 1.14 fixed point storage format, with range [-1, 3). The conversion from
// float to this fixed point format is defined as follows:
//
// ushort float_to_sfixed14( float x ){
// int i = convert_int_sat_rte( x * 0x1.0p14f ); // scale [-1, 3.0) to [-16384, 3*16384), round to nearest integer
// i = add_sat( i, 0x4000 ); // apply bias, to convert to [0, 65535) range
// return convert_ushort_sat(i); // clamp to destination size
// }
//
// The inverse conversion is the reverse process. The formats are currently only available on the CPU with
// the CL_RGBA channel layout.
const (
ChannelDataTypeSFixed14Apple ChannelDataType = C.CL_SFIXED14_APPLE // Introduced in MacOS X.7.
)
func init() {
channelOrderNameMap[ChannelOrder1RGBApple] = "1RGBApple"
channelOrderNameMap[ChannelOrderBGR1Apple] = "RGB1Apple"
channelDataTypeNameMap[ChannelDataTypeSFixed14Apple] = "SFixed14Apple"
}
......@@ -30,8 +30,8 @@ import (
)
var (
minDifficulty = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0))
sharedLight = new(Light)
maxUint256 = new(big.Int).Exp(big.NewInt(2), big.NewInt(256), big.NewInt(0))
sharedLight = new(Light)
)
const (
......@@ -140,7 +140,7 @@ func (l *Light) Verify(block pow.Block) bool {
// the finalizer before the call completes.
_ = cache
// The actual check.
target := new(big.Int).Div(minDifficulty, difficulty)
target := new(big.Int).Div(maxUint256, difficulty)
return h256ToHash(ret.result).Big().Cmp(target) <= 0
}
......@@ -199,7 +199,7 @@ func (d *dag) generate() {
if d.dir == "" {
d.dir = DefaultDir
}
glog.V(logger.Info).Infof("Generating DAG for epoch %d (%x)", d.epoch, seedHash)
glog.V(logger.Info).Infof("Generating DAG for epoch %d (size %d) (%x)", d.epoch, dagSize, seedHash)
// Generate a temporary cache.
// TODO: this could share the cache with Light
cache := C.ethash_light_new_internal(cacheSize, (*C.ethash_h256_t)(unsafe.Pointer(&seedHash[0])))
......@@ -220,14 +220,18 @@ func (d *dag) generate() {
})
}
func freeDAG(h *dag) {
C.ethash_full_delete(h.ptr)
h.ptr = nil
func freeDAG(d *dag) {
C.ethash_full_delete(d.ptr)
d.ptr = nil
}
func (d *dag) Ptr() unsafe.Pointer {
return unsafe.Pointer(d.ptr.data)
}
//export ethashGoCallback
func ethashGoCallback(percent C.unsigned) C.int {
glog.V(logger.Info).Infof("Still generating DAG: %d%%", percent)
glog.V(logger.Info).Infof("Generating DAG: %d%%", percent)
return 0
}
......@@ -273,7 +277,7 @@ func (pow *Full) getDAG(blockNum uint64) (d *dag) {
return d
}
func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mixDigest []byte) {
func (pow *Full) Search(block pow.Block, stop <-chan struct{}, index int) (nonce uint64, mixDigest []byte) {
dag := pow.getDAG(block.NumberU64())
r := rand.New(rand.NewSource(time.Now().UnixNano()))
......@@ -286,7 +290,7 @@ func (pow *Full) Search(block pow.Block, stop <-chan struct{}) (nonce uint64, mi
nonce = uint64(r.Int63())
hash := hashToH256(block.HashNoNonce())
target := new(big.Int).Div(minDifficulty, diff)
target := new(big.Int).Div(maxUint256, diff)
for {
select {
case <-stop:
......
// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// +build opencl
package ethash
//#cgo LDFLAGS: -w
//#include <stdint.h>
//#include <string.h>
//#include "src/libethash/internal.h"
import "C"
import (
crand "crypto/rand"
"encoding/binary"
"fmt"
"math"
"math/big"
mrand "math/rand"
"strconv"
"strings"
"sync"
"sync/atomic"
"time"
"unsafe"
"github.com/Gustav-Simonsson/go-opencl/cl"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/pow"
)
/*
This code have two main entry points:
1. The initCL(...) function configures one or more OpenCL device
(for now only GPU) and loads the Ethash DAG onto device memory
2. The Search(...) function loads a Ethash nonce into device(s) memory and
executes the Ethash OpenCL kernel.
Throughout the code, we refer to "host memory" and "device memory".
For most systems (e.g. regular PC GPU miner) the host memory is RAM and
device memory is the GPU global memory (e.g. GDDR5).
References mentioned in code comments:
1. https://github.com/ethereum/wiki/wiki/Ethash
2. https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner.cpp
3. https://www.khronos.org/registry/cl/sdk/1.2/docs/man/xhtml/
4. http://amd-dev.wpengine.netdna-cdn.com/wordpress/media/2013/12/AMD_OpenCL_Programming_User_Guide.pdf
*/
type OpenCLDevice struct {
deviceId int
device *cl.Device
openCL11 bool // OpenCL version 1.1 and 1.2 are handled a bit different
openCL12 bool
dagBuf *cl.MemObject // Ethash full DAG in device mem
headerBuf *cl.MemObject // Hash of block-to-mine in device mem
searchBuffers []*cl.MemObject
searchKernel *cl.Kernel
hashKernel *cl.Kernel
queue *cl.CommandQueue
ctx *cl.Context
workGroupSize int
nonceRand *mrand.Rand // seeded by crypto/rand, see comments where it's initialised
result common.Hash
}
type OpenCLMiner struct {
mu sync.Mutex
ethash *Ethash // Ethash full DAG & cache in host mem
deviceIds []int
devices []*OpenCLDevice
dagSize uint64
hashRate int32 // Go atomics & uint64 have some issues; int32 is supported on all platforms
}
type pendingSearch struct {
bufIndex uint32
startNonce uint64
}
const (
SIZEOF_UINT32 = 4
// See [1]
ethashMixBytesLen = 128
ethashAccesses = 64
// See [4]
workGroupSize = 32 // must be multiple of 8
maxSearchResults = 63
searchBufSize = 2
globalWorkSize = 1024 * 256
)
func NewCL(deviceIds []int) *OpenCLMiner {
ids := make([]int, len(deviceIds))
copy(ids, deviceIds)
return &OpenCLMiner{
ethash: New(),
dagSize: 0, // to see if we need to update DAG.
deviceIds: ids,
}
}
func PrintDevices() {
fmt.Println("=============================================")
fmt.Println("============ OpenCL Device Info =============")
fmt.Println("=============================================")
var found []*cl.Device
platforms, err := cl.GetPlatforms()
if err != nil {
fmt.Println("Plaform error (check your OpenCL installation): %v", err)
return
}
for i, p := range platforms {
fmt.Println("Platform id ", i)
fmt.Println("Platform Name ", p.Name())
fmt.Println("Platform Vendor ", p.Vendor())
fmt.Println("Platform Version ", p.Version())
fmt.Println("Platform Extensions ", p.Extensions())
fmt.Println("Platform Profile ", p.Profile())
fmt.Println("")
devices, err := cl.GetDevices(p, cl.DeviceTypeGPU)
if err != nil {
fmt.Println("Device error (check your GPU drivers) :", err)
return
}
for _, d := range devices {
fmt.Println("Device OpenCL id ", i)
fmt.Println("Device id for mining ", len(found))
fmt.Println("Device Name ", d.Name())
fmt.Println("Vendor ", d.Vendor())
fmt.Println("Version ", d.Version())
fmt.Println("Driver version ", d.DriverVersion())
fmt.Println("Address bits ", d.AddressBits())
fmt.Println("Max clock freq ", d.MaxClockFrequency())
fmt.Println("Global mem size ", d.GlobalMemSize())
fmt.Println("Max constant buffer size", d.MaxConstantBufferSize())
fmt.Println("Max mem alloc size ", d.MaxMemAllocSize())
fmt.Println("Max compute units ", d.MaxComputeUnits())
fmt.Println("Max work group size ", d.MaxWorkGroupSize())
fmt.Println("Max work item sizes ", d.MaxWorkItemSizes())
fmt.Println("=============================================")
found = append(found, d)
}
}
if len(found) == 0 {
fmt.Println("Found no GPU(s). Check that your OS can see the GPU(s)")
} else {
var idsFormat string
for i := 0; i < len(found); i++ {
idsFormat += strconv.Itoa(i)
if i != len(found)-1 {
idsFormat += ","
}
}
fmt.Printf("Found %v devices. Benchmark first GPU: geth gpubench 0\n", len(found))
fmt.Printf("Mine using all GPUs: geth --minegpu %v\n", idsFormat)
}
}
// See [2]. We basically do the same here, but the Go OpenCL bindings
// are at a slightly higher abtraction level.
func InitCL(blockNum uint64, c *OpenCLMiner) error {
platforms, err := cl.GetPlatforms()
if err != nil {
return fmt.Errorf("Plaform error: %v\nCheck your OpenCL installation and then run geth gpuinfo", err)
}
var devices []*cl.Device
for _, p := range platforms {
ds, err := cl.GetDevices(p, cl.DeviceTypeGPU)
if err != nil {
return fmt.Errorf("Devices error: %v\nCheck your GPU drivers and then run geth gpuinfo", err)
}
for _, d := range ds {
devices = append(devices, d)
}
}
pow := New()
_ = pow.getDAG(blockNum) // generates DAG if we don't have it
pow.Light.getCache(blockNum) // and cache
c.ethash = pow
dagSize := uint64(C.ethash_get_datasize(C.uint64_t(blockNum)))
c.dagSize = dagSize
for _, id := range c.deviceIds {
if id > len(devices)-1 {
return fmt.Errorf("Device id not found. See available device ids with: geth gpuinfo")
} else {
err := initCLDevice(id, devices[id], c)
if err != nil {
return err
}
}
}
if len(c.devices) == 0 {
return fmt.Errorf("No GPU devices found")
}
return nil
}
func initCLDevice(deviceId int, device *cl.Device, c *OpenCLMiner) error {
devMaxAlloc := uint64(device.MaxMemAllocSize())
devGlobalMem := uint64(device.GlobalMemSize())
// TODO: more fine grained version logic
if device.Version() == "OpenCL 1.0" {
fmt.Println("Device OpenCL version not supported: ", device.Version())
return fmt.Errorf("opencl version not supported")
}
var cl11, cl12 bool
if device.Version() == "OpenCL 1.1" {
cl11 = true
}
if device.Version() == "OpenCL 1.2" {
cl12 = true
}
// log warnings but carry on; some device drivers report inaccurate values
if c.dagSize > devGlobalMem {
fmt.Printf("WARNING: device memory may be insufficient: %v. DAG size: %v.\n", devGlobalMem, c.dagSize)
}
if c.dagSize > devMaxAlloc {
fmt.Printf("WARNING: DAG size (%v) larger than device max memory allocation size (%v).\n", c.dagSize, devMaxAlloc)
fmt.Printf("You probably have to export GPU_MAX_ALLOC_PERCENT=95\n")
}
fmt.Printf("Initialising device %v: %v\n", deviceId, device.Name())
context, err := cl.CreateContext([]*cl.Device{device})
if err != nil {
return fmt.Errorf("failed creating context:", err)
}
// TODO: test running with CL_QUEUE_PROFILING_ENABLE for profiling?
queue, err := context.CreateCommandQueue(device, 0)
if err != nil {
return fmt.Errorf("command queue err:", err)
}
// See [4] section 3.2 and [3] "clBuildProgram".
// The OpenCL kernel code is compiled at run-time.
kvs := make(map[string]string, 4)
kvs["GROUP_SIZE"] = strconv.FormatUint(workGroupSize, 10)
kvs["DAG_SIZE"] = strconv.FormatUint(c.dagSize/ethashMixBytesLen, 10)
kvs["ACCESSES"] = strconv.FormatUint(ethashAccesses, 10)
kvs["MAX_OUTPUTS"] = strconv.FormatUint(maxSearchResults, 10)
kernelCode := replaceWords(kernel, kvs)
program, err := context.CreateProgramWithSource([]string{kernelCode})
if err != nil {
return fmt.Errorf("program err:", err)
}
/* if using AMD OpenCL impl, you can set this to debug on x86 CPU device.
see AMD OpenCL programming guide section 4.2
export in shell before running:
export AMD_OCL_BUILD_OPTIONS_APPEND="-g -O0"
export CPU_MAX_COMPUTE_UNITS=1
buildOpts := "-g -cl-opt-disable"
*/
buildOpts := ""
err = program.BuildProgram([]*cl.Device{device}, buildOpts)
if err != nil {
return fmt.Errorf("program build err:", err)
}
var searchKernelName, hashKernelName string
searchKernelName = "ethash_search"
hashKernelName = "ethash_hash"
searchKernel, err := program.CreateKernel(searchKernelName)
hashKernel, err := program.CreateKernel(hashKernelName)
if err != nil {
return fmt.Errorf("kernel err:", err)
}
// TODO: when this DAG size appears, patch the Go bindings
// (context.go) to work with uint64 as size_t
if c.dagSize > math.MaxInt32 {
fmt.Println("DAG too large for allocation.")
return fmt.Errorf("DAG too large for alloc")
}
// TODO: patch up Go bindings to work with size_t, will overflow if > maxint32
// TODO: fuck. shit's gonna overflow around 2017-06-09 12:17:02
dagBuf := *(new(*cl.MemObject))
dagBuf, err = context.CreateEmptyBuffer(cl.MemReadOnly, int(c.dagSize))
if err != nil {
return fmt.Errorf("allocating dag buf failed: ", err)
}
// write DAG to device mem
dagPtr := unsafe.Pointer(c.ethash.Full.current.ptr.data)
_, err = queue.EnqueueWriteBuffer(dagBuf, true, 0, int(c.dagSize), dagPtr, nil)
if err != nil {
return fmt.Errorf("writing to dag buf failed: ", err)
}
searchBuffers := make([]*cl.MemObject, searchBufSize)
for i := 0; i < searchBufSize; i++ {
searchBuff, err := context.CreateEmptyBuffer(cl.MemWriteOnly, (1+maxSearchResults)*SIZEOF_UINT32)
if err != nil {
return fmt.Errorf("search buffer err:", err)
}
searchBuffers[i] = searchBuff
}
headerBuf, err := context.CreateEmptyBuffer(cl.MemReadOnly, 32)
if err != nil {
return fmt.Errorf("header buffer err:", err)
}
// Unique, random nonces are crucial for mining efficieny.
// While we do not need cryptographically secure PRNG for nonces,
// we want to have uniform distribution and minimal repetition of nonces.
// We could guarantee strict uniqueness of nonces by generating unique ranges,
// but a int64 seed from crypto/rand should be good enough.
// we then use math/rand for speed and to avoid draining OS entropy pool
seed, err := crand.Int(crand.Reader, big.NewInt(math.MaxInt64))
if err != nil {
return err
}
nonceRand := mrand.New(mrand.NewSource(seed.Int64()))
deviceStruct := &OpenCLDevice{
deviceId: deviceId,
device: device,
openCL11: cl11,
openCL12: cl12,
dagBuf: dagBuf,
headerBuf: headerBuf,
searchBuffers: searchBuffers,
searchKernel: searchKernel,
hashKernel: hashKernel,
queue: queue,
ctx: context,
workGroupSize: workGroupSize,
nonceRand: nonceRand,
}
c.devices = append(c.devices, deviceStruct)
return nil
}
func (c *OpenCLMiner) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
c.mu.Lock()
newDagSize := uint64(C.ethash_get_datasize(C.uint64_t(block.NumberU64())))
if newDagSize > c.dagSize {
// TODO: clean up buffers from previous DAG?
err := InitCL(block.NumberU64(), c)
if err != nil {
fmt.Println("OpenCL init error: ", err)
return 0, []byte{0}
}
}
defer c.mu.Unlock()
// Avoid unneeded OpenCL initialisation if we received stop while running InitCL
select {
case <-stop:
return 0, []byte{0}
default:
}
headerHash := block.HashNoNonce()
diff := block.Difficulty()
target256 := new(big.Int).Div(maxUint256, diff)
target64 := new(big.Int).Rsh(target256, 192).Uint64()
var zero uint32 = 0
d := c.devices[index]
_, err := d.queue.EnqueueWriteBuffer(d.headerBuf, false, 0, 32, unsafe.Pointer(&headerHash[0]), nil)
if err != nil {
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
return 0, []byte{0}
}
for i := 0; i < searchBufSize; i++ {
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[i], false, 0, 4, unsafe.Pointer(&zero), nil)
if err != nil {
fmt.Println("Error in Search clEnqueueWriterBuffer : ", err)
return 0, []byte{0}
}
}
// wait for all search buffers to complete
err = d.queue.Finish()
if err != nil {
fmt.Println("Error in Search clFinish : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(1, d.headerBuf)
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(2, d.dagBuf)
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(4, target64)
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(5, uint32(math.MaxUint32))
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
// wait on this before returning
var preReturnEvent *cl.Event
if d.openCL12 {
preReturnEvent, err = d.ctx.CreateUserEvent()
if err != nil {
fmt.Println("Error in Search create CL user event : ", err)
return 0, []byte{0}
}
}
pending := make([]pendingSearch, 0, searchBufSize)
var p *pendingSearch
searchBufIndex := uint32(0)
var checkNonce uint64
loops := int64(0)
prevHashRate := int32(0)
start := time.Now().UnixNano()
// we grab a single random nonce and sets this as argument to the kernel search function
// the device will then add each local threads gid to the nonce, creating a unique nonce
// for each device computing unit executing in parallel
initNonce := uint64(d.nonceRand.Int63())
for nonce := initNonce; ; nonce += uint64(globalWorkSize) {
select {
case <-stop:
/*
if d.openCL12 {
err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
if err != nil {
fmt.Println("Error in Search WaitForEvents: ", err)
}
}
*/
atomic.AddInt32(&c.hashRate, -prevHashRate)
return 0, []byte{0}
default:
}
if (loops % (1 << 7)) == 0 {
elapsed := time.Now().UnixNano() - start
// TODO: verify if this is correct hash rate calculation
hashes := (float64(1e9) / float64(elapsed)) * float64(loops*1024*256)
hashrateDiff := int32(hashes) - prevHashRate
prevHashRate = int32(hashes)
atomic.AddInt32(&c.hashRate, hashrateDiff)
}
loops++
err = d.searchKernel.SetArg(0, d.searchBuffers[searchBufIndex])
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
err = d.searchKernel.SetArg(3, nonce)
if err != nil {
fmt.Println("Error in Search clSetKernelArg : ", err)
return 0, []byte{0}
}
// execute kernel
_, err := d.queue.EnqueueNDRangeKernel(
d.searchKernel,
[]int{0},
[]int{globalWorkSize},
[]int{d.workGroupSize},
nil)
if err != nil {
fmt.Println("Error in Search clEnqueueNDRangeKernel : ", err)
return 0, []byte{0}
}
pending = append(pending, pendingSearch{bufIndex: searchBufIndex, startNonce: nonce})
searchBufIndex = (searchBufIndex + 1) % searchBufSize
if len(pending) == searchBufSize {
p = &(pending[searchBufIndex])
cres, _, err := d.queue.EnqueueMapBuffer(d.searchBuffers[p.bufIndex], true,
cl.MapFlagRead, 0, (1+maxSearchResults)*SIZEOF_UINT32,
nil)
if err != nil {
fmt.Println("Error in Search clEnqueueMapBuffer: ", err)
return 0, []byte{0}
}
results := cres.ByteSlice()
nfound := binary.LittleEndian.Uint32(results)
nfound = uint32(math.Min(float64(nfound), float64(maxSearchResults)))
// OpenCL returns the offsets from the start nonce
for i := uint32(0); i < nfound; i++ {
lo := (i + 1) * SIZEOF_UINT32
hi := (i + 2) * SIZEOF_UINT32
upperNonce := uint64(binary.LittleEndian.Uint32(results[lo:hi]))
checkNonce = p.startNonce + upperNonce
if checkNonce != 0 {
cn := C.uint64_t(checkNonce)
ds := C.uint64_t(c.dagSize)
// We verify that the nonce is indeed a solution by
// executing the Ethash verification function (on the CPU).
ret := C.ethash_light_compute_internal(c.ethash.Light.current.ptr, ds, hashToH256(headerHash), cn)
// TODO: return result first
if ret.success && h256ToHash(ret.result).Big().Cmp(target256) <= 0 {
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
if err != nil {
fmt.Println("Error in Search clEnqueueUnmapMemObject: ", err)
}
if d.openCL12 {
err = cl.WaitForEvents([]*cl.Event{preReturnEvent})
if err != nil {
fmt.Println("Error in Search WaitForEvents: ", err)
}
}
return checkNonce, C.GoBytes(unsafe.Pointer(&ret.mix_hash), C.int(32))
}
_, err := d.queue.EnqueueWriteBuffer(d.searchBuffers[p.bufIndex], false, 0, 4, unsafe.Pointer(&zero), nil)
if err != nil {
fmt.Println("Error in Search cl: EnqueueWriteBuffer", err)
return 0, []byte{0}
}
}
}
_, err = d.queue.EnqueueUnmapMemObject(d.searchBuffers[p.bufIndex], cres, nil)
if err != nil {
fmt.Println("Error in Search clEnqueueUnMapMemObject: ", err)
return 0, []byte{0}
}
pending = append(pending[:searchBufIndex], pending[searchBufIndex+1:]...)
}
}
if d.openCL12 {
err := cl.WaitForEvents([]*cl.Event{preReturnEvent})
if err != nil {
fmt.Println("Error in Search clWaitForEvents: ", err)
return 0, []byte{0}
}
}
return 0, []byte{0}
}
func (c *OpenCLMiner) Verify(block pow.Block) bool {
return c.ethash.Light.Verify(block)
}
func (c *OpenCLMiner) GetHashrate() int64 {
return int64(atomic.LoadInt32(&c.hashRate))
}
func (c *OpenCLMiner) Turbo(on bool) {
// This is GPU mining. Always be turbo.
}
func replaceWords(text string, kvs map[string]string) string {
for k, v := range kvs {
text = strings.Replace(text, k, v, -1)
}
return text
}
func logErr(err error) {
if err != nil {
fmt.Println("Error in OpenCL call:", err)
}
}
func argErr(err error) error {
return fmt.Errorf("arg err: %v", err)
}
package ethash
/* DO NOT EDIT!!!
This code is version controlled at
https://github.com/ethereum/cpp-ethereum/blob/develop/libethash-cl/ethash_cl_miner_kernel.cl
If needed change it there first, then copy over here.
*/
const kernel = `
// author Tim Hughes <tim@twistedfury.com>
// Tested on Radeon HD 7850
// Hashrate: 15940347 hashes/s
// Bandwidth: 124533 MB/s
// search kernel should fit in <= 84 VGPRS (3 wavefronts)
#define THREADS_PER_HASH (128 / 16)
#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
#define FNV_PRIME 0x01000193
__constant uint2 const Keccak_f1600_RC[24] = {
(uint2)(0x00000001, 0x00000000),
(uint2)(0x00008082, 0x00000000),
(uint2)(0x0000808a, 0x80000000),
(uint2)(0x80008000, 0x80000000),
(uint2)(0x0000808b, 0x00000000),
(uint2)(0x80000001, 0x00000000),
(uint2)(0x80008081, 0x80000000),
(uint2)(0x00008009, 0x80000000),
(uint2)(0x0000008a, 0x00000000),
(uint2)(0x00000088, 0x00000000),
(uint2)(0x80008009, 0x00000000),
(uint2)(0x8000000a, 0x00000000),
(uint2)(0x8000808b, 0x00000000),
(uint2)(0x0000008b, 0x80000000),
(uint2)(0x00008089, 0x80000000),
(uint2)(0x00008003, 0x80000000),
(uint2)(0x00008002, 0x80000000),
(uint2)(0x00000080, 0x80000000),
(uint2)(0x0000800a, 0x00000000),
(uint2)(0x8000000a, 0x80000000),
(uint2)(0x80008081, 0x80000000),
(uint2)(0x00008080, 0x80000000),
(uint2)(0x80000001, 0x00000000),
(uint2)(0x80008008, 0x80000000),
};
void keccak_f1600_round(uint2* a, uint r, uint out_size)
{
#if !__ENDIAN_LITTLE__
for (uint i = 0; i != 25; ++i)
a[i] = a[i].yx;
#endif
uint2 b[25];
uint2 t;
// Theta
b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31);
a[0] ^= t;
a[5] ^= t;
a[10] ^= t;
a[15] ^= t;
a[20] ^= t;
t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31);
a[1] ^= t;
a[6] ^= t;
a[11] ^= t;
a[16] ^= t;
a[21] ^= t;
t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31);
a[2] ^= t;
a[7] ^= t;
a[12] ^= t;
a[17] ^= t;
a[22] ^= t;
t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31);
a[3] ^= t;
a[8] ^= t;
a[13] ^= t;
a[18] ^= t;
a[23] ^= t;
t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31);
a[4] ^= t;
a[9] ^= t;
a[14] ^= t;
a[19] ^= t;
a[24] ^= t;
// Rho Pi
b[0] = a[0];
b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31);
b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29);
b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26);
b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22);
b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17);
b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11);
b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4);
b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28);
b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19);
b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9);
b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30);
b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18);
b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5);
b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23);
b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8);
b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24);
b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7);
b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21);
b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2);
b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14);
b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25);
b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3);
b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12);
b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20);
// Chi
a[0] = bitselect(b[0] ^ b[2], b[0], b[1]);
a[1] = bitselect(b[1] ^ b[3], b[1], b[2]);
a[2] = bitselect(b[2] ^ b[4], b[2], b[3]);
a[3] = bitselect(b[3] ^ b[0], b[3], b[4]);
if (out_size >= 4)
{
a[4] = bitselect(b[4] ^ b[1], b[4], b[0]);
a[5] = bitselect(b[5] ^ b[7], b[5], b[6]);
a[6] = bitselect(b[6] ^ b[8], b[6], b[7]);
a[7] = bitselect(b[7] ^ b[9], b[7], b[8]);
a[8] = bitselect(b[8] ^ b[5], b[8], b[9]);
if (out_size >= 8)
{
a[9] = bitselect(b[9] ^ b[6], b[9], b[5]);
a[10] = bitselect(b[10] ^ b[12], b[10], b[11]);
a[11] = bitselect(b[11] ^ b[13], b[11], b[12]);
a[12] = bitselect(b[12] ^ b[14], b[12], b[13]);
a[13] = bitselect(b[13] ^ b[10], b[13], b[14]);
a[14] = bitselect(b[14] ^ b[11], b[14], b[10]);
a[15] = bitselect(b[15] ^ b[17], b[15], b[16]);
a[16] = bitselect(b[16] ^ b[18], b[16], b[17]);
a[17] = bitselect(b[17] ^ b[19], b[17], b[18]);
a[18] = bitselect(b[18] ^ b[15], b[18], b[19]);
a[19] = bitselect(b[19] ^ b[16], b[19], b[15]);
a[20] = bitselect(b[20] ^ b[22], b[20], b[21]);
a[21] = bitselect(b[21] ^ b[23], b[21], b[22]);
a[22] = bitselect(b[22] ^ b[24], b[22], b[23]);
a[23] = bitselect(b[23] ^ b[20], b[23], b[24]);
a[24] = bitselect(b[24] ^ b[21], b[24], b[20]);
}
}
// Iota
a[0] ^= Keccak_f1600_RC[r];
#if !__ENDIAN_LITTLE__
for (uint i = 0; i != 25; ++i)
a[i] = a[i].yx;
#endif
}
void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate)
{
for (uint i = in_size; i != 25; ++i)
{
a[i] = 0;
}
#if __ENDIAN_LITTLE__
a[in_size] ^= 0x0000000000000001;
a[24-out_size*2] ^= 0x8000000000000000;
#else
a[in_size] ^= 0x0100000000000000;
a[24-out_size*2] ^= 0x0000000000000080;
#endif
// Originally I unrolled the first and last rounds to interface
// better with surrounding code, however I haven't done this
// without causing the AMD compiler to blow up the VGPR usage.
uint r = 0;
do
{
// This dynamic branch stops the AMD compiler unrolling the loop
// and additionally saves about 33% of the VGPRs, enough to gain another
// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
// massage out of the compiler. It doesn't really seem to matter how
// much we try and help the compiler save VGPRs because it seems to throw
// that information away, hence the implementation of keccak here
// doesn't bother.
if (isolate)
{
keccak_f1600_round((uint2*)a, r++, 25);
}
}
while (r < 23);
// final round optimised for digest size
keccak_f1600_round((uint2*)a, r++, out_size);
}
#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
#define countof(x) (sizeof(x) / sizeof(x[0]))
uint fnv(uint x, uint y)
{
return x * FNV_PRIME ^ y;
}
uint4 fnv4(uint4 x, uint4 y)
{
return x * FNV_PRIME ^ y;
}
uint fnv_reduce(uint4 v)
{
return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
}
typedef union
{
ulong ulongs[32 / sizeof(ulong)];
uint uints[32 / sizeof(uint)];
} hash32_t;
typedef union
{
ulong ulongs[64 / sizeof(ulong)];
uint4 uint4s[64 / sizeof(uint4)];
} hash64_t;
typedef union
{
uint uints[128 / sizeof(uint)];
uint4 uint4s[128 / sizeof(uint4)];
} hash128_t;
hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
{
hash64_t init;
uint const init_size = countof(init.ulongs);
uint const hash_size = countof(header->ulongs);
// sha3_512(header .. nonce)
ulong state[25];
copy(state, header->ulongs, hash_size);
state[hash_size] = nonce;
keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate);
copy(init.ulongs, state, init_size);
return init;
}
uint inner_loop_chunks(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, __global hash128_t const* g_dag1, __global hash128_t const* g_dag2, __global hash128_t const* g_dag3, uint isolate)
{
uint4 mix = init;
// share init0
if (thread_id == 0)
*share = mix.x;
barrier(CLK_LOCAL_MEM_FENCE);
uint init0 = *share;
uint a = 0;
do
{
bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
#pragma unroll
for (uint i = 0; i != 4; ++i)
{
if (update_share)
{
uint m[4] = { mix.x, mix.y, mix.z, mix.w };
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
}
barrier(CLK_LOCAL_MEM_FENCE);
mix = fnv4(mix, *share>=3 * DAG_SIZE / 4 ? g_dag3[*share - 3 * DAG_SIZE / 4].uint4s[thread_id] : *share>=DAG_SIZE / 2 ? g_dag2[*share - DAG_SIZE / 2].uint4s[thread_id] : *share>=DAG_SIZE / 4 ? g_dag1[*share - DAG_SIZE / 4].uint4s[thread_id]:g_dag[*share].uint4s[thread_id]);
}
} while ((a += 4) != (ACCESSES & isolate));
return fnv_reduce(mix);
}
uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate)
{
uint4 mix = init;
// share init0
if (thread_id == 0)
*share = mix.x;
barrier(CLK_LOCAL_MEM_FENCE);
uint init0 = *share;
uint a = 0;
do
{
bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
#pragma unroll
for (uint i = 0; i != 4; ++i)
{
if (update_share)
{
uint m[4] = { mix.x, mix.y, mix.z, mix.w };
*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
}
barrier(CLK_LOCAL_MEM_FENCE);
mix = fnv4(mix, g_dag[*share].uint4s[thread_id]);
}
}
while ((a += 4) != (ACCESSES & isolate));
return fnv_reduce(mix);
}
hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate)
{
ulong state[25];
hash32_t hash;
uint const hash_size = countof(hash.ulongs);
uint const init_size = countof(init->ulongs);
uint const mix_size = countof(mix->ulongs);
// keccak_256(keccak_512(header..nonce) .. mix);
copy(state, init->ulongs, init_size);
copy(state + init_size, mix->ulongs, mix_size);
keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate);
// copy out
copy(hash.ulongs, state, hash_size);
return hash;
}
hash32_t compute_hash_simple(
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong nonce,
uint isolate
)
{
hash64_t init = init_hash(g_header, nonce, isolate);
hash128_t mix;
for (uint i = 0; i != countof(mix.uint4s); ++i)
{
mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)];
}
uint mix_val = mix.uints[0];
uint init0 = mix.uints[0];
uint a = 0;
do
{
uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE;
uint n = (a+1) % countof(mix.uints);
#pragma unroll
for (uint i = 0; i != countof(mix.uints); ++i)
{
mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]);
mix_val = i == n ? mix.uints[i] : mix_val;
}
}
while (++a != (ACCESSES & isolate));
// reduce to output
hash32_t fnv_mix;
for (uint i = 0; i != countof(fnv_mix.uints); ++i)
{
fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]);
}
return final_hash(&init, &fnv_mix, isolate);
}
typedef union
{
struct
{
hash64_t init;
uint pad; // avoid lds bank conflicts
};
hash32_t mix;
} compute_hash_share;
hash32_t compute_hash(
__local compute_hash_share* share,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong nonce,
uint isolate
)
{
uint const gid = get_global_id(0);
// Compute one init hash per work item.
hash64_t init = init_hash(g_header, nonce, isolate);
// Threads work together in this phase in groups of 8.
uint const thread_id = gid % THREADS_PER_HASH;
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
hash32_t mix;
uint i = 0;
do
{
// share init with other threads
if (i == thread_id)
share[hash_id].init = init;
barrier(CLK_LOCAL_MEM_FENCE);
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
barrier(CLK_LOCAL_MEM_FENCE);
uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate);
share[hash_id].mix.uints[thread_id] = thread_mix;
barrier(CLK_LOCAL_MEM_FENCE);
if (i == thread_id)
mix = share[hash_id].mix;
barrier(CLK_LOCAL_MEM_FENCE);
}
while (++i != (THREADS_PER_HASH & isolate));
return final_hash(&init, &mix, isolate);
}
hash32_t compute_hash_chunks(
__local compute_hash_share* share,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
__global hash128_t const* g_dag1,
__global hash128_t const* g_dag2,
__global hash128_t const* g_dag3,
ulong nonce,
uint isolate
)
{
uint const gid = get_global_id(0);
// Compute one init hash per work item.
hash64_t init = init_hash(g_header, nonce, isolate);
// Threads work together in this phase in groups of 8.
uint const thread_id = gid % THREADS_PER_HASH;
uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
hash32_t mix;
uint i = 0;
do
{
// share init with other threads
if (i == thread_id)
share[hash_id].init = init;
barrier(CLK_LOCAL_MEM_FENCE);
uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
barrier(CLK_LOCAL_MEM_FENCE);
uint thread_mix = inner_loop_chunks(thread_init, thread_id, share[hash_id].mix.uints, g_dag, g_dag1, g_dag2, g_dag3, isolate);
share[hash_id].mix.uints[thread_id] = thread_mix;
barrier(CLK_LOCAL_MEM_FENCE);
if (i == thread_id)
mix = share[hash_id].mix;
barrier(CLK_LOCAL_MEM_FENCE);
}
while (++i != (THREADS_PER_HASH & isolate));
return final_hash(&init, &mix, isolate);
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_hash_simple(
__global hash32_t* g_hashes,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong start_nonce,
uint isolate
)
{
uint const gid = get_global_id(0);
g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_search_simple(
__global volatile uint* restrict g_output,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong start_nonce,
ulong target,
uint isolate
)
{
uint const gid = get_global_id(0);
hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
if (hash.ulongs[countof(hash.ulongs)-1] < target)
{
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
g_output[slot] = gid;
}
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_hash(
__global hash32_t* g_hashes,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong start_nonce,
uint isolate
)
{
__local compute_hash_share share[HASHES_PER_LOOP];
uint const gid = get_global_id(0);
g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_search(
__global volatile uint* restrict g_output,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
ulong start_nonce,
ulong target,
uint isolate
)
{
__local compute_hash_share share[HASHES_PER_LOOP];
uint const gid = get_global_id(0);
hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
{
uint slot = min((uint)MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
g_output[slot] = gid;
}
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_hash_chunks(
__global hash32_t* g_hashes,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
__global hash128_t const* g_dag1,
__global hash128_t const* g_dag2,
__global hash128_t const* g_dag3,
ulong start_nonce,
uint isolate
)
{
__local compute_hash_share share[HASHES_PER_LOOP];
uint const gid = get_global_id(0);
g_hashes[gid] = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3,start_nonce + gid, isolate);
}
__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
__kernel void ethash_search_chunks(
__global volatile uint* restrict g_output,
__constant hash32_t const* g_header,
__global hash128_t const* g_dag,
__global hash128_t const* g_dag1,
__global hash128_t const* g_dag2,
__global hash128_t const* g_dag3,
ulong start_nonce,
ulong target,
uint isolate
)
{
__local compute_hash_share share[HASHES_PER_LOOP];
uint const gid = get_global_id(0);
hash32_t hash = compute_hash_chunks(share, g_header, g_dag, g_dag1, g_dag2, g_dag3, start_nonce + gid, isolate);
if (as_ulong(as_uchar8(hash.ulongs[0]).s76543210) < target)
{
uint slot = min(convert_uint(MAX_OUTPUTS), convert_uint(atomic_inc(&g_output[0]) + 1));
g_output[slot] = gid;
}
}
`
......@@ -92,7 +92,7 @@ func TestEthashConcurrentVerify(t *testing.T) {
defer os.RemoveAll(eth.Full.Dir)
block := &testBlock{difficulty: big.NewInt(10)}
nonce, md := eth.Search(block, nil)
nonce, md := eth.Search(block, nil, 0)
block.nonce = nonce
block.mixDigest = common.BytesToHash(md)
......@@ -135,7 +135,7 @@ func TestEthashConcurrentSearch(t *testing.T) {
// launch n searches concurrently.
for i := 0; i < nsearch; i++ {
go func() {
nonce, md := eth.Search(block, stop)
nonce, md := eth.Search(block, stop, 0)
select {
case found <- searchRes{n: nonce, md: md}:
case <-stop:
......@@ -167,7 +167,7 @@ func TestEthashSearchAcrossEpoch(t *testing.T) {
for i := epochLength - 40; i < epochLength+40; i++ {
block := &testBlock{number: i, difficulty: big.NewInt(90)}
rand.Read(block.hashNoNonce[:])
nonce, md := eth.Search(block, nil)
nonce, md := eth.Search(block, nil, 0)
block.nonce = nonce
block.mixDigest = common.BytesToHash(md)
if !eth.Verify(block) {
......
......@@ -6,7 +6,7 @@
GOBIN = build/bin
geth:
build/env.sh go install -v $(shell build/ldflags.sh) ./cmd/geth
build/env.sh go install -v $(shell build/flags.sh) ./cmd/geth
@echo "Done building."
@echo "Run \"$(GOBIN)/geth\" to launch geth."
......@@ -39,12 +39,12 @@ evm:
@echo "Done building."
@echo "Run \"$(GOBIN)/evm to start the evm."
mist:
build/env.sh go install -v $(shell build/ldflags.sh) ./cmd/mist
build/env.sh go install -v $(shell build/flags.sh) ./cmd/mist
@echo "Done building."
@echo "Run \"$(GOBIN)/mist --asset_path=cmd/mist/assets\" to launch mist."
all:
build/env.sh go install -v $(shell build/ldflags.sh) ./...
build/env.sh go install -v $(shell build/flags.sh) ./...
test: all
build/env.sh go test ./...
......
......@@ -16,3 +16,7 @@ sep=$(go version | awk '{ if ($3 >= "go1.5" || index($3, "devel")) print "="; el
if [ -f ".git/HEAD" ]; then
echo "-ldflags '-X main.gitCommit$sep$(git rev-parse HEAD)'"
fi
if [ ! -z "$GO_OPENCL" ]; then
echo "-tags opencl"
fi
......@@ -468,8 +468,7 @@ func processTxs(repl *testjethre, t *testing.T, expTxc int) bool {
t.Errorf("incorrect number of pending transactions, expected %v, got %v", expTxc, txc)
return false
}
err = repl.ethereum.StartMining(runtime.NumCPU())
err = repl.ethereum.StartMining(runtime.NumCPU(), "")
if err != nil {
t.Errorf("unexpected error mining: %v", err)
return false
......
......@@ -104,6 +104,22 @@ The makedag command generates an ethash DAG in /tmp/dag.
This command exists to support the system testing project.
Regular users do not need to execute it.
`,
},
{
Action: gpuinfo,
Name: "gpuinfo",
Usage: "gpuinfo",
Description: `
Prints OpenCL device info for all found GPUs.
`,
},
{
Action: gpubench,
Name: "gpubench",
Usage: "benchmark GPU",
Description: `
Runs quick benchmark on first GPU found.
`,
},
{
......@@ -298,6 +314,7 @@ JavaScript API. See https://github.com/ethereum/go-ethereum/wiki/Javascipt-Conso
utils.GasPriceFlag,
utils.MinerThreadsFlag,
utils.MiningEnabledFlag,
utils.MiningGPUFlag,
utils.AutoDAGFlag,
utils.NATFlag,
utils.NatspecEnabledFlag,
......@@ -586,7 +603,10 @@ func startEth(ctx *cli.Context, eth *eth.Ethereum) {
}
}
if ctx.GlobalBool(utils.MiningEnabledFlag.Name) {
if err := eth.StartMining(ctx.GlobalInt(utils.MinerThreadsFlag.Name)); err != nil {
err := eth.StartMining(
ctx.GlobalInt(utils.MinerThreadsFlag.Name),
ctx.GlobalString(utils.MiningGPUFlag.Name))
if err != nil {
utils.Fatalf("%v", err)
}
}
......@@ -740,6 +760,29 @@ func makedag(ctx *cli.Context) {
}
}
func gpuinfo(ctx *cli.Context) {
eth.PrintOpenCLDevices()
}
func gpubench(ctx *cli.Context) {
args := ctx.Args()
wrongArgs := func() {
utils.Fatalf(`Usage: geth gpubench <gpu number>`)
}
switch {
case len(args) == 1:
n, err := strconv.ParseUint(args[0], 0, 64)
if err != nil {
wrongArgs()
}
eth.GPUBench(n)
case len(args) == 0:
eth.GPUBench(0)
default:
wrongArgs()
}
}
func version(c *cli.Context) {
fmt.Println(ClientIdentifier)
fmt.Println("Version:", Version)
......
......@@ -155,6 +155,12 @@ var (
}
// miner settings
// TODO: refactor CPU vs GPU mining flags
MiningGPUFlag = cli.StringFlag{
Name: "minegpu",
Usage: "Mine with given GPUs. '--minegpu 0,1' will mine with the first two GPUs found.",
}
MinerThreadsFlag = cli.IntFlag{
Name: "minerthreads",
Usage: "Number of miner threads",
......
......@@ -306,7 +306,7 @@ func processTxs(repl *testFrontend, t *testing.T, expTxc int) bool {
return false
}
err = repl.ethereum.StartMining(runtime.NumCPU())
err = repl.ethereum.StartMining(runtime.NumCPU(), "")
if err != nil {
t.Errorf("unexpected error mining: %v", err)
return false
......
......@@ -32,7 +32,7 @@ import (
// It returns true from Verify for any block.
type FakePow struct{}
func (f FakePow) Search(block pow.Block, stop <-chan struct{}) (uint64, []byte) {
func (f FakePow) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
return 0, nil
}
func (f FakePow) Verify(block pow.Block) bool { return true }
......
......@@ -34,7 +34,7 @@ type failPow struct {
failing uint64
}
func (pow failPow) Search(pow.Block, <-chan struct{}) (uint64, []byte) {
func (pow failPow) Search(pow.Block, <-chan struct{}, int) (uint64, []byte) {
return 0, nil
}
func (pow failPow) Verify(block pow.Block) bool { return block.NumberU64() != pow.failing }
......@@ -47,7 +47,7 @@ type delayedPow struct {
delay time.Duration
}
func (pow delayedPow) Search(pow.Block, <-chan struct{}) (uint64, []byte) {
func (pow delayedPow) Search(pow.Block, <-chan struct{}, int) (uint64, []byte) {
return 0, nil
}
func (pow delayedPow) Verify(block pow.Block) bool { time.Sleep(pow.delay); return true }
......
......@@ -498,18 +498,6 @@ func (s *Ethereum) ResetWithGenesisBlock(gb *types.Block) {
s.blockchain.ResetWithGenesisBlock(gb)
}
func (s *Ethereum) StartMining(threads int) error {
eb, err := s.Etherbase()
if err != nil {
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
glog.V(logger.Error).Infoln(err)
return err
}
go s.miner.Start(eb, threads)
return nil
}
func (s *Ethereum) Etherbase() (eb common.Address, err error) {
eb = s.etherbase
if (eb == common.Address{}) {
......
// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// +build !opencl
package eth
import (
"errors"
"fmt"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
)
const disabledInfo = "Set GO_OPENCL and re-build to enable."
func (s *Ethereum) StartMining(threads int, gpus string) error {
eb, err := s.Etherbase()
if err != nil {
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
glog.V(logger.Error).Infoln(err)
return err
}
if gpus != "" {
return errors.New("GPU mining disabled. " + disabledInfo)
}
// CPU mining
go s.miner.Start(eb, threads)
return nil
}
func GPUBench(gpuid uint64) {
fmt.Println("GPU mining disabled. " + disabledInfo)
}
func PrintOpenCLDevices() {
fmt.Println("OpenCL disabled. " + disabledInfo)
}
// Copyright 2014 The go-ethereum Authors
// This file is part of the go-ethereum library.
//
// The go-ethereum library is free software: you can redistribute it and/or modify
// it under the terms of the GNU Lesser General Public License as published by
// the Free Software Foundation, either version 3 of the License, or
// (at your option) any later version.
//
// The go-ethereum library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
// GNU Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public License
// along with the go-ethereum library. If not, see <http://www.gnu.org/licenses/>.
// +build opencl
package eth
import (
"fmt"
"math/big"
"strconv"
"strings"
"time"
"github.com/ethereum/ethash"
"github.com/ethereum/go-ethereum/common"
"github.com/ethereum/go-ethereum/core/types"
"github.com/ethereum/go-ethereum/logger"
"github.com/ethereum/go-ethereum/logger/glog"
"github.com/ethereum/go-ethereum/miner"
)
func (s *Ethereum) StartMining(threads int, gpus string) error {
eb, err := s.Etherbase()
if err != nil {
err = fmt.Errorf("Cannot start mining without etherbase address: %v", err)
glog.V(logger.Error).Infoln(err)
return err
}
// GPU mining
if gpus != "" {
var ids []int
for _, s := range strings.Split(gpus, ",") {
i, err := strconv.Atoi(s)
if err != nil {
return fmt.Errorf("Invalid GPU id(s): %v", err)
}
if i < 0 {
return fmt.Errorf("Invalid GPU id: %v", i)
}
ids = append(ids, i)
}
// TODO: re-creating miner is a bit ugly
cl := ethash.NewCL(ids)
s.miner = miner.New(s, s.EventMux(), cl)
go s.miner.Start(eb, len(ids))
return nil
}
// CPU mining
go s.miner.Start(eb, threads)
return nil
}
func GPUBench(gpuid uint64) {
e := ethash.NewCL([]int{int(gpuid)})
var h common.Hash
bogoHeader := &types.Header{
ParentHash: h,
Number: big.NewInt(int64(42)),
Difficulty: big.NewInt(int64(999999999999999)),
}
bogoBlock := types.NewBlock(bogoHeader, nil, nil, nil)
err := ethash.InitCL(bogoBlock.NumberU64(), e)
if err != nil {
fmt.Println("OpenCL init error: ", err)
return
}
stopChan := make(chan struct{})
reportHashRate := func() {
for {
time.Sleep(3 * time.Second)
fmt.Printf("hashes/s : %v\n", e.GetHashrate())
}
}
fmt.Printf("Starting benchmark (%v seconds)\n", 60)
go reportHashRate()
go e.Search(bogoBlock, stopChan, 0)
time.Sleep(60 * time.Second)
fmt.Println("OK.")
}
func PrintOpenCLDevices() {
ethash.PrintDevices()
}
......@@ -118,7 +118,7 @@ func (self *CpuAgent) mine(work *Work, stop <-chan struct{}) {
glog.V(logger.Debug).Infof("(re)started agent[%d]. mining...\n", self.index)
// Mine
nonce, mixDigest := self.pow.Search(work.Block, stop)
nonce, mixDigest := self.pow.Search(work.Block, stop, self.index)
if nonce != 0 {
block := work.Block.WithMiningResult(nonce, common.BytesToHash(mixDigest))
self.returnCh <- &Result{work, block}
......
......@@ -48,7 +48,7 @@ func (pow *EasyPow) Turbo(on bool) {
pow.turbo = on
}
func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}) (uint64, []byte) {
func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}, index int) (uint64, []byte) {
r := rand.New(rand.NewSource(time.Now().UnixNano()))
hash := block.HashNoNonce()
diff := block.Difficulty()
......
......@@ -17,7 +17,7 @@
package pow
type PoW interface {
Search(block Block, stop <-chan struct{}) (uint64, []byte)
Search(block Block, stop <-chan struct{}, index int) (uint64, []byte)
Verify(block Block) bool
GetHashrate() int64
Turbo(bool)
......
......@@ -100,7 +100,7 @@ func (self *minerApi) StartMiner(req *shared.Request) (interface{}, error) {
}
self.ethereum.StartAutoDAG()
err := self.ethereum.StartMining(args.Threads)
err := self.ethereum.StartMining(args.Threads, "")
if err == nil {
return true, nil
}
......
......@@ -474,7 +474,7 @@ func (self *XEth) ClientVersion() string {
func (self *XEth) SetMining(shouldmine bool, threads int) bool {
ismining := self.backend.IsMining()
if shouldmine && !ismining {
err := self.backend.StartMining(threads)
err := self.backend.StartMining(threads, "")
return err == nil
}
if ismining && !shouldmine {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment