diff --git a/.gitignore b/.gitignore
index e2aa4de956..0cd8a23c45 100644
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@
 
 /tmp
 */**/*un~
+*/**/*.test
 *un~
 .DS_Store
 */**/.DS_Store
diff --git a/Godeps/Godeps.json b/Godeps/Godeps.json
index b66ea932fe..a1025c85d1 100644
--- a/Godeps/Godeps.json
+++ b/Godeps/Godeps.json
@@ -15,6 +15,11 @@
 			"Comment": "null-15",
 			"Rev": "12e4b4183793ac4b061921e7980845e750679fd0"
 		},
+		{
+			"ImportPath": "github.com/ethereum/ethash",
+			"Comment": "v17-23-g2561e13",
+			"Rev": "2561e1322a7e8e3d4a2cc903c44b1e96340bcb27"
+		},
 		{
 			"ImportPath": "github.com/ethereum/serpent-go",
 			"Rev": "5767a0dbd759d313df3f404dadb7f98d7ab51443"
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore b/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore
new file mode 100644
index 0000000000..6bb36ed15c
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/.gitignore
@@ -0,0 +1,5 @@
+.idea/
+.DS_Store
+*/**/*un~
+.vagrant/
+cpp-build/
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt
new file mode 100644
index 0000000000..ac189457f1
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/CMakeLists.txt
@@ -0,0 +1,14 @@
+cmake_minimum_required(VERSION 2.8.2)
+project(ethash)
+
+set(CMAKE_MODULE_PATH ${CMAKE_MODULE_PATH} "${CMAKE_SOURCE_DIR}/cmake/Modules/")
+set(ETHHASH_LIBS ethash)
+
+if (WIN32 AND WANT_CRYPTOPP)
+	add_subdirectory(cryptopp)
+endif()
+
+add_subdirectory(libethash)
+add_subdirectory(libethash-cl EXCLUDE_FROM_ALL)
+add_subdirectory(benchmark EXCLUDE_FROM_ALL)
+add_subdirectory(test EXCLUDE_FROM_ALL)
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt
new file mode 100644
index 0000000000..e6ba857904
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/CMakeLists.txt
@@ -0,0 +1,53 @@
+include_directories(..)
+
+set(CMAKE_BUILD_TYPE Release)
+
+if (MSVC)
+	add_definitions("/openmp")
+endif()
+
+if (NOT MPI_FOUND)
+    find_package(MPI)
+endif()
+
+if (NOT CRYPTOPP_FOUND)
+	find_package(CryptoPP 5.6.2)
+endif()
+
+if (CRYPTOPP_FOUND)
+	add_definitions(-DWITH_CRYPTOPP)
+endif()
+
+if (NOT OpenCL_FOUND)
+	find_package(OpenCL)
+endif()
+if (OpenCL_FOUND)
+	add_definitions(-DWITH_OPENCL)
+	include_directories(${OpenCL_INCLUDE_DIRS})
+	list(APPEND FILES ethash_cl_miner.cpp ethash_cl_miner.h)
+endif()
+
+if (MPI_FOUND)
+    include_directories(${MPI_INCLUDE_PATH})
+    add_executable (Benchmark_MPI_FULL benchmark.cpp)
+    target_link_libraries (Benchmark_MPI_FULL ${ETHHASH_LIBS} ${MPI_LIBRARIES})
+    SET_TARGET_PROPERTIES(Benchmark_MPI_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DFULL -DMPI")
+
+    add_executable (Benchmark_MPI_LIGHT benchmark.cpp)
+    target_link_libraries (Benchmark_MPI_LIGHT ${ETHHASH_LIBS} ${MPI_LIBRARIES})
+    SET_TARGET_PROPERTIES(Benchmark_MPI_LIGHT PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} ${MPI_COMPILE_FLAGS} -DMPI")
+endif()
+
+add_executable (Benchmark_FULL benchmark.cpp)
+target_link_libraries (Benchmark_FULL ${ETHHASH_LIBS})
+SET_TARGET_PROPERTIES(Benchmark_FULL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DFULL")
+
+add_executable (Benchmark_LIGHT benchmark.cpp)
+target_link_libraries (Benchmark_LIGHT ${ETHHASH_LIBS})
+
+if (OpenCL_FOUND)
+	add_executable (Benchmark_CL benchmark.cpp)
+	target_link_libraries (Benchmark_CL ${ETHHASH_LIBS} ethash-cl)
+	SET_TARGET_PROPERTIES(Benchmark_CL PROPERTIES COMPILE_FLAGS "${COMPILE_FLAGS} -DOPENCL")
+endif()
+
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp
new file mode 100644
index 0000000000..4c8f700c53
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/benchmark/benchmark.cpp
@@ -0,0 +1,260 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file benchmark.cpp
+ * @author Tim Hughes <tim@twistedfury.com>
+ * @date 2015
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <libethash/ethash.h>
+#include <libethash/util.h>
+#ifdef OPENCL
+#include <libethash-cl/ethash_cl_miner.h>
+#endif
+#include <vector>
+#include <algorithm>
+
+#ifdef WITH_CRYPTOPP
+#include <libethash/SHA3_cryptopp.h>
+#include <string>
+
+#else
+#include "libethash/sha3.h"
+#endif // WITH_CRYPTOPP
+
+#undef min
+#undef max
+
+#if defined(OPENCL)
+const unsigned trials = 1024*1024*32;
+#elif defined(FULL)
+const unsigned trials = 1024*1024/8;
+#else
+const unsigned trials = 1024*1024/1024;
+#endif
+uint8_t g_hashes[1024*32];
+
+static char nibbleToChar(unsigned nibble)
+{
+	return (char) ((nibble >= 10 ? 'a'-10 : '0') + nibble);
+}
+
+static uint8_t charToNibble(char chr)
+{
+	if (chr >= '0' && chr <= '9')
+	{
+		return (uint8_t) (chr - '0');
+	}
+	if (chr >= 'a' && chr <= 'z')
+	{
+		return (uint8_t) (chr - 'a' + 10);
+	}
+	if (chr >= 'A' && chr <= 'Z')
+	{
+		return (uint8_t) (chr - 'A' + 10);
+	}
+	return 0;
+}
+
+static std::vector<uint8_t> hexStringToBytes(char const* str)
+{
+	std::vector<uint8_t> bytes(strlen(str) >> 1);
+	for (unsigned i = 0; i != bytes.size(); ++i)
+	{
+		bytes[i] = charToNibble(str[i*2 | 0]) << 4;
+		bytes[i] |= charToNibble(str[i*2 | 1]);
+	}
+	return bytes;
+}
+
+static std::string bytesToHexString(uint8_t const* bytes, unsigned size)
+{
+	std::string str;
+	for (unsigned i = 0; i != size; ++i)
+	{
+		str += nibbleToChar(bytes[i] >> 4);
+		str += nibbleToChar(bytes[i] & 0xf);
+	}
+	return str;
+}
+
+extern "C" int main(void)
+{
+	// params for ethash
+	ethash_params params;
+	ethash_params_init(&params, 0);
+	//params.full_size = 262147 * 4096;	// 1GBish;
+	//params.full_size = 32771 * 4096;	// 128MBish;
+	//params.full_size = 8209 * 4096;	// 8MBish;
+	//params.cache_size = 8209*4096;
+	//params.cache_size = 2053*4096;
+	uint8_t seed[32], previous_hash[32];
+
+	memcpy(seed, hexStringToBytes("9410b944535a83d9adf6bbdcc80e051f30676173c16ca0d32d6f1263fc246466").data(), 32);
+	memcpy(previous_hash, hexStringToBytes("c5d2460186f7233c927e7db2dcc703c0e500b653ca82273b7bfad8045d85a470").data(), 32);
+	
+	// allocate page aligned buffer for dataset
+#ifdef FULL
+	void* full_mem_buf = malloc(params.full_size + 4095);
+	void* full_mem = (void*)((uintptr_t(full_mem_buf) + 4095) & ~4095);
+#endif
+	void* cache_mem_buf = malloc(params.cache_size + 63);
+	void* cache_mem = (void*)((uintptr_t(cache_mem_buf) + 63) & ~63);
+
+	ethash_cache cache;
+	cache.mem = cache_mem;
+	
+	// compute cache or full data
+	{
+		clock_t startTime = clock();
+		ethash_mkcache(&cache, &params, seed);
+		clock_t time = clock() - startTime;
+
+		uint8_t cache_hash[32];
+		SHA3_256(cache_hash, (uint8_t const*)cache_mem, params.cache_size);
+		debugf("ethash_mkcache: %ums, sha3: %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(cache_hash,sizeof(cache_hash)).data());
+
+		// print a couple of test hashes
+		{
+			const clock_t startTime = clock();
+			ethash_return_value hash;
+			ethash_light(&hash, &cache, &params, previous_hash, 0);
+			const clock_t time = clock() - startTime;
+			debugf("ethash_light test: %ums, %s\n", (unsigned)((time*1000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data());
+		}
+
+		#ifdef FULL
+			startTime = clock();
+			ethash_compute_full_data(full_mem, &params, &cache);
+			time = clock() - startTime;
+			debugf("ethash_compute_full_data: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC));
+		#endif // FULL
+	}
+
+#ifdef OPENCL
+	ethash_cl_miner miner;
+	{
+		const clock_t startTime = clock();
+		if (!miner.init(params, seed))
+			exit(-1);
+		const clock_t time = clock() - startTime;
+        debugf("ethash_cl_miner init: %ums\n", (unsigned)((time*1000)/CLOCKS_PER_SEC));
+	}
+#endif
+
+
+#ifdef FULL
+    {
+        const clock_t startTime = clock();
+		ethash_return_value hash;
+        ethash_full(&hash, full_mem, &params, previous_hash, 0);
+        const clock_t time = clock() - startTime;
+        debugf("ethash_full test: %uns, %s\n", (unsigned)((time*1000000)/CLOCKS_PER_SEC), bytesToHexString(hash.result, 32).data());
+    }
+#endif
+
+#ifdef OPENCL
+	// validate 1024 hashes against CPU
+	miner.hash(g_hashes, previous_hash, 0, 1024);
+	for (unsigned i = 0; i != 1024; ++i)
+	{
+		ethash_return_value hash;
+		ethash_light(&hash, &cache, &params, previous_hash, i);
+		if (memcmp(hash.result, g_hashes + 32*i, 32) != 0)
+		{
+			debugf("nonce %u failed: %s %s\n", i, bytesToHexString(g_hashes + 32*i, 32).c_str(), bytesToHexString(hash.result, 32).c_str());
+			static unsigned c = 0;
+			if (++c == 16)
+			{
+				exit(-1);
+			}
+		}
+	}
+#endif
+
+
+	clock_t startTime = clock();
+	unsigned hash_count = trials;
+	
+	#ifdef OPENCL
+	{
+		struct search_hook : ethash_cl_miner::search_hook
+		{
+			unsigned hash_count;
+			std::vector<uint64_t> nonce_vec;
+
+			virtual bool found(uint64_t const* nonces, uint32_t count)
+			{
+				nonce_vec.assign(nonces, nonces + count);
+				return false;
+			}
+
+			virtual bool searched(uint64_t start_nonce, uint32_t count)
+			{
+				// do nothing
+				hash_count += count;
+				return hash_count >= trials;
+			}
+		};
+		search_hook hook;
+		hook.hash_count = 0;
+
+		miner.search(previous_hash, 0x000000ffffffffff, hook);
+
+		for (unsigned i = 0; i != hook.nonce_vec.size(); ++i)
+		{
+			uint64_t nonce = hook.nonce_vec[i];
+			ethash_return_value hash;
+			ethash_light(&hash, &cache, &params, previous_hash, nonce);
+			debugf("found: %.8x%.8x -> %s\n", unsigned(nonce>>32), unsigned(nonce), bytesToHexString(hash.result, 32).c_str());
+		}
+
+		hash_count = hook.hash_count;
+	}
+	#else
+	{
+		//#pragma omp parallel for
+		for (int nonce = 0; nonce < trials; ++nonce)
+		{
+			ethash_return_value hash;
+			#ifdef FULL
+				ethash_full(&hash, full_mem, &params, previous_hash, nonce);
+			#else
+				ethash_light(&hash, &cache, &params, previous_hash, nonce);
+			#endif // FULL
+		}
+	}
+	#endif
+	
+	clock_t time = std::max((clock_t)1u, clock() - startTime);
+	
+	unsigned read_size = ACCESSES * MIX_BYTES;
+	debugf(			
+		"hashrate: %8u, bw: %6u MB/s\n",
+		(unsigned)(((uint64_t)hash_count*CLOCKS_PER_SEC)/time),
+		(unsigned)((((uint64_t)hash_count*read_size*CLOCKS_PER_SEC)/time) / (1024*1024))
+		);
+
+	free(cache_mem_buf);
+#ifdef FULL
+	free(full_mem_buf);
+#endif
+
+	return 0;
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake
new file mode 100644
index 0000000000..5ca01e4468
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindCryptoPP.cmake
@@ -0,0 +1,108 @@
+# Module for locating the Crypto++ encryption library.
+#
+# Customizable variables:
+#   CRYPTOPP_ROOT_DIR
+#     This variable points to the CryptoPP root directory. On Windows the
+#     library location typically will have to be provided explicitly using the
+#     -D command-line option. The directory should include the include/cryptopp,
+#     lib and/or bin sub-directories.
+#
+# Read-only variables:
+#   CRYPTOPP_FOUND
+#     Indicates whether the library has been found.
+#
+#   CRYPTOPP_INCLUDE_DIRS
+#     Points to the CryptoPP include directory.
+#
+#   CRYPTOPP_LIBRARIES
+#     Points to the CryptoPP libraries that should be passed to
+#     target_link_libararies.
+#
+#
+# Copyright (c) 2012 Sergiu Dotenco
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+INCLUDE (FindPackageHandleStandardArgs)
+
+FIND_PATH (CRYPTOPP_ROOT_DIR
+  NAMES cryptopp/cryptlib.h include/cryptopp/cryptlib.h
+  PATHS ENV CRYPTOPPROOT
+  DOC "CryptoPP root directory")
+
+# Re-use the previous path:
+FIND_PATH (CRYPTOPP_INCLUDE_DIR
+  NAMES cryptopp/cryptlib.h
+  HINTS ${CRYPTOPP_ROOT_DIR}
+  PATH_SUFFIXES include
+  DOC "CryptoPP include directory")
+
+FIND_LIBRARY (CRYPTOPP_LIBRARY_DEBUG
+  NAMES cryptlibd cryptoppd
+  HINTS ${CRYPTOPP_ROOT_DIR}
+  PATH_SUFFIXES lib
+  DOC "CryptoPP debug library")
+
+FIND_LIBRARY (CRYPTOPP_LIBRARY_RELEASE
+  NAMES cryptlib cryptopp
+  HINTS ${CRYPTOPP_ROOT_DIR}
+  PATH_SUFFIXES lib
+  DOC "CryptoPP release library")
+
+IF (CRYPTOPP_LIBRARY_DEBUG AND CRYPTOPP_LIBRARY_RELEASE)
+  SET (CRYPTOPP_LIBRARY
+    optimized ${CRYPTOPP_LIBRARY_RELEASE}
+    debug ${CRYPTOPP_LIBRARY_DEBUG} CACHE DOC "CryptoPP library")
+ELSEIF (CRYPTOPP_LIBRARY_RELEASE)
+  SET (CRYPTOPP_LIBRARY ${CRYPTOPP_LIBRARY_RELEASE} CACHE DOC
+    "CryptoPP library")
+ENDIF (CRYPTOPP_LIBRARY_DEBUG AND CRYPTOPP_LIBRARY_RELEASE)
+
+IF (CRYPTOPP_INCLUDE_DIR)
+  SET (_CRYPTOPP_VERSION_HEADER ${CRYPTOPP_INCLUDE_DIR}/cryptopp/config.h)
+
+  IF (EXISTS ${_CRYPTOPP_VERSION_HEADER})
+    FILE (STRINGS ${_CRYPTOPP_VERSION_HEADER} _CRYPTOPP_VERSION_TMP REGEX
+      "^#define CRYPTOPP_VERSION[ \t]+[0-9]+$")
+
+    STRING (REGEX REPLACE
+      "^#define CRYPTOPP_VERSION[ \t]+([0-9]+)" "\\1" _CRYPTOPP_VERSION_TMP
+      ${_CRYPTOPP_VERSION_TMP})
+
+    STRING (REGEX REPLACE "([0-9]+)[0-9][0-9]" "\\1" CRYPTOPP_VERSION_MAJOR
+      ${_CRYPTOPP_VERSION_TMP})
+    STRING (REGEX REPLACE "[0-9]([0-9])[0-9]" "\\1" CRYPTOPP_VERSION_MINOR
+      ${_CRYPTOPP_VERSION_TMP})
+    STRING (REGEX REPLACE "[0-9][0-9]([0-9])" "\\1" CRYPTOPP_VERSION_PATCH
+      ${_CRYPTOPP_VERSION_TMP})
+
+    SET (CRYPTOPP_VERSION_COUNT 3)
+    SET (CRYPTOPP_VERSION
+      ${CRYPTOPP_VERSION_MAJOR}.${CRYPTOPP_VERSION_MINOR}.${CRYPTOPP_VERSION_PATCH})
+  ENDIF (EXISTS ${_CRYPTOPP_VERSION_HEADER})
+ENDIF (CRYPTOPP_INCLUDE_DIR)
+
+SET (CRYPTOPP_INCLUDE_DIRS ${CRYPTOPP_INCLUDE_DIR})
+SET (CRYPTOPP_LIBRARIES ${CRYPTOPP_LIBRARY})
+
+MARK_AS_ADVANCED (CRYPTOPP_INCLUDE_DIR CRYPTOPP_LIBRARY CRYPTOPP_LIBRARY_DEBUG
+  CRYPTOPP_LIBRARY_RELEASE)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS (CryptoPP REQUIRED_VARS CRYPTOPP_ROOT_DIR
+  CRYPTOPP_INCLUDE_DIR CRYPTOPP_LIBRARY VERSION_VAR CRYPTOPP_VERSION)
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake
new file mode 100644
index 0000000000..cc567c95e7
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cmake/modules/FindOpenCL.cmake
@@ -0,0 +1,91 @@
+#
+#  This file taken from FindOpenCL project @ http://gitorious.com/findopencl
+#
+# - Try to find OpenCL
+# This module tries to find an OpenCL implementation on your system. It supports
+# AMD / ATI, Apple and NVIDIA implementations, but shoudl work, too.
+#
+# Once done this will define
+#  OPENCL_FOUND        - system has OpenCL
+#  OPENCL_INCLUDE_DIRS  - the OpenCL include directory
+#  OPENCL_LIBRARIES    - link these to use OpenCL
+#
+# WIN32 should work, but is untested
+
+FIND_PACKAGE( PackageHandleStandardArgs )
+
+SET (OPENCL_VERSION_STRING "0.1.0")
+SET (OPENCL_VERSION_MAJOR 0)
+SET (OPENCL_VERSION_MINOR 1)
+SET (OPENCL_VERSION_PATCH 0)
+
+IF (APPLE)
+
+  FIND_LIBRARY(OPENCL_LIBRARIES OpenCL DOC "OpenCL lib for OSX")
+  FIND_PATH(OPENCL_INCLUDE_DIRS OpenCL/cl.h DOC "Include for OpenCL on OSX")
+  FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS OpenCL/cl.hpp DOC "Include for OpenCL CPP bindings on OSX")
+
+ELSE (APPLE)
+
+	IF (WIN32)
+	
+	    FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h)
+	    FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp)
+	
+	    # The AMD SDK currently installs both x86 and x86_64 libraries
+	    # This is only a hack to find out architecture
+	    IF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" )
+	    	SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86_64")
+			SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86_64")
+	    ELSE (${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64")
+	    	SET(OPENCL_LIB_DIR "$ENV{ATISTREAMSDKROOT}/lib/x86")
+	   		SET(OPENCL_LIB_DIR "$ENV{ATIINTERNALSTREAMSDKROOT}/lib/x86")
+	    ENDIF( ${CMAKE_SYSTEM_PROCESSOR} STREQUAL "AMD64" )
+
+	    # find out if the user asked for a 64-bit build, and use the corresponding 
+	    # 64 or 32 bit NVIDIA library paths to the search:
+	    STRING(REGEX MATCH "Win64" ISWIN64 ${CMAKE_GENERATOR})
+	    IF("${ISWIN64}" STREQUAL "Win64") 
+	    	FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/x64)
+	    ELSE("${ISWIN64}" STREQUAL "Win64") 
+	    	FIND_LIBRARY(OPENCL_LIBRARIES OpenCL.lib ${OPENCL_LIB_DIR} $ENV{CUDA_LIB_PATH} $ENV{CUDA_PATH}/lib/Win32)
+	    ENDIF("${ISWIN64}" STREQUAL "Win64") 
+
+	    GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
+	    
+	    # On Win32 search relative to the library
+	    FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include)
+	    FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS "${_OPENCL_INC_CAND}" $ENV{CUDA_INC_PATH} $ENV{CUDA_PATH}/include)
+	
+	ELSE (WIN32)
+
+            # Unix style platforms
+            FIND_LIBRARY(OPENCL_LIBRARIES OpenCL
+              ENV LD_LIBRARY_PATH
+            )
+
+            GET_FILENAME_COMPONENT(OPENCL_LIB_DIR ${OPENCL_LIBRARIES} PATH)
+            GET_FILENAME_COMPONENT(_OPENCL_INC_CAND ${OPENCL_LIB_DIR}/../../include ABSOLUTE)
+
+            # The AMD SDK currently does not place its headers
+            # in /usr/include, therefore also search relative
+            # to the library
+            FIND_PATH(OPENCL_INCLUDE_DIRS CL/cl.h PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include")
+            FIND_PATH(_OPENCL_CPP_INCLUDE_DIRS CL/cl.hpp PATHS ${_OPENCL_INC_CAND} "/usr/local/cuda/include")
+
+	ENDIF (WIN32)
+
+ENDIF (APPLE)
+
+FIND_PACKAGE_HANDLE_STANDARD_ARGS( OpenCL DEFAULT_MSG OPENCL_LIBRARIES OPENCL_INCLUDE_DIRS )
+
+IF( _OPENCL_CPP_INCLUDE_DIRS )
+	SET( OPENCL_HAS_CPP_BINDINGS TRUE )
+	LIST( APPEND OPENCL_INCLUDE_DIRS ${_OPENCL_CPP_INCLUDE_DIRS} )
+	# This is often the same, so clean up
+	LIST( REMOVE_DUPLICATES OPENCL_INCLUDE_DIRS )
+ENDIF( _OPENCL_CPP_INCLUDE_DIRS )
+
+MARK_AS_ADVANCED(
+  OPENCL_INCLUDE_DIRS
+)
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt
new file mode 100644
index 0000000000..4cd9f36c69
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/cryptopp/CMakeLists.txt
@@ -0,0 +1,13 @@
+set(LIBRARY cryptopp)
+
+include_directories(../../cryptopp)
+
+# todo, subset
+file(GLOB HEADERS "../../cryptopp/*.h")
+file(GLOB SOURCE "../../cryptopp/*.cpp")
+
+add_library(${LIBRARY} ${HEADERS} ${SOURCE})
+
+set(CRYPTOPP_INCLUDE_DIRS "../.." PARENT_SCOPE)
+set(CRYPTOPP_LIBRARIES ${LIBRARY} PARENT_SCOPE)
+set(CRYPTOPP_FOUND TRUE PARENT_SCOPE)
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
new file mode 100644
index 0000000000..32d3f02641
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/ethash.go
@@ -0,0 +1,296 @@
+package ethash
+
+/*
+#cgo CFLAGS: -std=gnu99 -Wall
+#include "libethash/ethash.h"
+#include "libethash/util.c"
+#include "libethash/internal.c"
+#include "libethash/sha3.c"
+*/
+import "C"
+
+import (
+	"bytes"
+	"encoding/binary"
+	"log"
+	"math/big"
+	"math/rand"
+	"sync"
+	"time"
+	"unsafe"
+
+	"github.com/ethereum/go-ethereum/logger"
+	"github.com/ethereum/go-ethereum/pow"
+)
+
+var powlogger = logger.NewLogger("POW")
+
+type DAG struct {
+	SeedBlockNum uint64
+	dag          unsafe.Pointer // full GB of memory for dag
+}
+
+type ParamsAndCache struct {
+	params       *C.ethash_params
+	cache        *C.ethash_cache
+	SeedBlockNum uint64
+}
+
+type Ethash struct {
+	turbo          bool
+	HashRate       int64
+	chainManager   pow.ChainManager
+	dag            *DAG
+	paramsAndCache *ParamsAndCache
+	nextdag        unsafe.Pointer
+	ret            *C.ethash_return_value
+	dagMutex       *sync.Mutex
+	cacheMutex     *sync.Mutex
+}
+
+func parseNonce(nonce []byte) (uint64, error) {
+	nonceBuf := bytes.NewBuffer(nonce)
+	nonceInt, err := binary.ReadUvarint(nonceBuf)
+	if err != nil {
+		return 0, err
+	}
+	return nonceInt, nil
+}
+
+const epochLength uint64 = 30000
+
+func getSeedBlockNum(blockNum uint64) uint64 {
+	var seedBlockNum uint64 = 0
+	if blockNum >= 2*epochLength {
+		seedBlockNum = ((blockNum / epochLength) - 1) * epochLength
+	}
+	return seedBlockNum
+}
+
+func makeParamsAndCache(chainManager pow.ChainManager, blockNum uint64) *ParamsAndCache {
+	seedBlockNum := getSeedBlockNum(blockNum)
+	paramsAndCache := &ParamsAndCache{
+		params:       new(C.ethash_params),
+		cache:        new(C.ethash_cache),
+		SeedBlockNum: seedBlockNum,
+	}
+	C.ethash_params_init(paramsAndCache.params, C.uint32_t(seedBlockNum))
+	paramsAndCache.cache.mem = C.malloc(paramsAndCache.params.cache_size)
+	seedHash := chainManager.GetBlockByNumber(seedBlockNum).Header().Hash()
+	log.Println("Params", paramsAndCache.params)
+
+	log.Println("Making Cache")
+	start := time.Now()
+	C.ethash_mkcache(paramsAndCache.cache, paramsAndCache.params, (*C.uint8_t)((unsafe.Pointer)(&seedHash[0])))
+	log.Println("Took:", time.Since(start))
+	return paramsAndCache
+}
+
+func (pow *Ethash) updateCache() {
+	pow.cacheMutex.Lock()
+	seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64())
+	if pow.paramsAndCache.SeedBlockNum != seedNum {
+		pow.paramsAndCache = makeParamsAndCache(pow.chainManager, pow.chainManager.CurrentBlock().NumberU64())
+	}
+	pow.cacheMutex.Unlock()
+}
+
+func makeDAG(p *ParamsAndCache) *DAG {
+	d := &DAG{
+		dag:          C.malloc(p.params.full_size),
+		SeedBlockNum: p.SeedBlockNum,
+	}
+	C.ethash_compute_full_data(d.dag, p.params, p.cache)
+	return d
+}
+
+func (pow *Ethash) updateDAG() {
+	pow.cacheMutex.Lock()
+	pow.dagMutex.Lock()
+
+	seedNum := getSeedBlockNum(pow.chainManager.CurrentBlock().NumberU64())
+	if pow.dag == nil || pow.dag.SeedBlockNum != seedNum {
+		pow.dag = nil
+		log.Println("Making Dag")
+		start := time.Now()
+		pow.dag = makeDAG(pow.paramsAndCache)
+		log.Println("Took:", time.Since(start))
+	}
+
+	pow.dagMutex.Unlock()
+	pow.cacheMutex.Unlock()
+}
+
+func New(chainManager pow.ChainManager) *Ethash {
+	return &Ethash{
+		turbo:          false,
+		paramsAndCache: makeParamsAndCache(chainManager, chainManager.CurrentBlock().NumberU64()),
+		chainManager:   chainManager,
+		dag:            nil,
+		ret:            new(C.ethash_return_value),
+		cacheMutex:     new(sync.Mutex),
+		dagMutex:       new(sync.Mutex),
+	}
+}
+
+func (pow *Ethash) DAGSize() uint64 {
+	return uint64(pow.paramsAndCache.params.full_size)
+}
+
+func (pow *Ethash) CacheSize() uint64 {
+	return uint64(pow.paramsAndCache.params.cache_size)
+}
+
+func (pow *Ethash) GetSeedHash(blockNum uint64) []byte {
+	return pow.chainManager.GetBlockByNumber(getSeedBlockNum(blockNum)).Header().Hash()
+}
+
+func (pow *Ethash) Stop() {
+	pow.cacheMutex.Lock()
+	pow.dagMutex.Lock()
+	if pow.paramsAndCache.cache != nil {
+		C.free(pow.paramsAndCache.cache.mem)
+	}
+	if pow.dag != nil {
+		C.free(pow.dag.dag)
+	}
+	pow.dagMutex.Unlock()
+	pow.cacheMutex.Unlock()
+}
+
+func (pow *Ethash) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte, []byte) {
+	pow.updateDAG()
+
+	// Not very elegant, multiple mining instances are not supported
+	pow.dagMutex.Lock()
+	pow.cacheMutex.Lock()
+	defer pow.cacheMutex.Unlock()
+	defer pow.dagMutex.Unlock()
+
+	r := rand.New(rand.NewSource(time.Now().UnixNano()))
+	miningHash := block.HashNoNonce()
+	diff := block.Difficulty()
+	log.Println("difficulty", diff)
+	i := int64(0)
+	start := time.Now().UnixNano()
+	t := time.Now()
+
+	nonce := uint64(r.Int63())
+
+	for {
+		select {
+		case <-stop:
+			powlogger.Infoln("Breaking from mining")
+			pow.HashRate = 0
+			pow.dagMutex.Unlock()
+			return nil, nil, nil
+		default:
+			i++
+
+			if time.Since(t) > (1 * time.Second) {
+				elapsed := time.Now().UnixNano() - start
+				hashes := ((float64(1e9) / float64(elapsed)) * float64(i)) / 1000
+				pow.HashRate = int64(hashes)
+				powlogger.Infoln("Hashing @", pow.HashRate, "khash")
+
+				t = time.Now()
+			}
+
+			cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash))
+			cnonce := C.uint64_t(nonce)
+			log.Printf("seed hash, nonce: %x %x\n", miningHash, nonce)
+			// pow.hash is the output/return of ethash_full
+			C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce)
+			res := C.ethash_check_difficulty((*C.uint8_t)(&pow.ret.result[0]), (*C.uint8_t)(unsafe.Pointer(&diff.Bytes()[0])))
+			if res == 1 {
+				mixDigest := C.GoBytes(unsafe.Pointer(&pow.ret.mix_hash[0]), 32)
+				// We don't really nead 32 bytes here
+				buf := make([]byte, 32)
+				binary.PutUvarint(buf, nonce)
+				return buf, mixDigest, pow.GetSeedHash(block.NumberU64())
+			}
+			nonce += 1
+		}
+
+		if !pow.turbo {
+			time.Sleep(20 * time.Microsecond)
+		}
+	}
+}
+
+func (pow *Ethash) Verify(block pow.Block) bool {
+	// Make sure the SeedHash is set correctly
+	if bytes.Compare(block.SeedHash(), pow.GetSeedHash(block.NumberU64())) != 0 {
+		log.Println("Block had wrong SeedHash")
+		log.Println("Expected: ", pow.GetSeedHash(block.NumberU64()))
+		log.Println("Actual: ", block.SeedHash())
+		return false
+	}
+
+	nonceInt, err := parseNonce(block.Nonce())
+	if err != nil {
+		log.Println("nonce to int err:", err)
+		return false
+	}
+	return pow.verify(block.HashNoNonce(), block.MixDigest(), block.Difficulty(), block.NumberU64(), nonceInt)
+}
+
+func (pow *Ethash) verify(hash []byte, mixDigest []byte, difficulty *big.Int, blockNum uint64, nonce uint64) bool {
+	// First check: make sure header, mixDigest, nonce are correct without hitting the DAG
+	// This is to prevent DOS attacks
+	chash := (*C.uint8_t)(unsafe.Pointer(&hash))
+	cnonce := C.uint64_t(nonce)
+	cmixDigest := (*C.uint8_t)(unsafe.Pointer(&mixDigest))
+	cdifficulty := (*C.uint8_t)(unsafe.Pointer(&difficulty.Bytes()[0]))
+	if C.ethash_quick_check_difficulty(chash, cnonce, cmixDigest, cdifficulty) != 1 {
+		log.Println("Failed to pass quick check.  Are you sure that the mix digest is correct?")
+		return false
+	}
+
+	var pAc *ParamsAndCache
+	// If its an old block (doesn't use the current cache)
+	// get the cache for it but don't update (so we don't need the mutex)
+	// Otherwise, it's the current block or a future.
+	// If current, updateCache will do nothing.
+	if getSeedBlockNum(blockNum) < pow.paramsAndCache.SeedBlockNum {
+		pAc = makeParamsAndCache(pow.chainManager, blockNum)
+	} else {
+		pow.updateCache()
+		pow.cacheMutex.Lock()
+		defer pow.cacheMutex.Unlock()
+		pAc = pow.paramsAndCache
+	}
+
+	C.ethash_light(pow.ret, pAc.cache, pAc.params, chash, cnonce)
+	res := C.ethash_check_difficulty((*C.uint8_t)(unsafe.Pointer(&pow.ret.result[0])), cdifficulty)
+	return res == 1
+}
+
+func (pow *Ethash) GetHashrate() int64 {
+	return pow.HashRate
+}
+
+func (pow *Ethash) Turbo(on bool) {
+	pow.turbo = on
+}
+
+func (pow *Ethash) FullHash(nonce uint64, miningHash []byte) []byte {
+	pow.updateDAG()
+	pow.dagMutex.Lock()
+	defer pow.dagMutex.Unlock()
+	cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash))
+	cnonce := C.uint64_t(nonce)
+	log.Println("seed hash, nonce:", miningHash, nonce)
+	// pow.hash is the output/return of ethash_full
+	C.ethash_full(pow.ret, pow.dag.dag, pow.paramsAndCache.params, cMiningHash, cnonce)
+	ghash_full := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32)
+	return ghash_full
+}
+
+func (pow *Ethash) LightHash(nonce uint64, miningHash []byte) []byte {
+	cMiningHash := (*C.uint8_t)(unsafe.Pointer(&miningHash))
+	cnonce := C.uint64_t(nonce)
+	C.ethash_light(pow.ret, pow.paramsAndCache.cache, pow.paramsAndCache.params, cMiningHash, cnonce)
+	ghash_light := C.GoBytes(unsafe.Pointer(&pow.ret.result[0]), 32)
+	return ghash_light
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt
new file mode 100644
index 0000000000..19d2fecbf4
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/CMakeLists.txt
@@ -0,0 +1,12 @@
+set(LIBRARY ethash-cl)
+set(CMAKE_BUILD_TYPE Release)
+
+if (NOT OPENCL_FOUND)
+	find_package(OpenCL)
+endif()
+if (OPENCL_FOUND)
+	include_directories(${OPENCL_INCLUDE_DIRS})
+	include_directories(..)
+	add_library(${LIBRARY} ethash_cl_miner.cpp ethash_cl_miner.h)
+	TARGET_LINK_LIBRARIES(${LIBRARY} ${OPENCL_LIBRARIES} ethash)
+endif()
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp
new file mode 100644
index 0000000000..38fac1962a
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/cl.hpp
@@ -0,0 +1,12452 @@
+/*******************************************************************************
+ * Copyright (c) 2008-2013 The Khronos Group Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and/or associated documentation files (the
+ * "Materials"), to deal in the Materials without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Materials, and to
+ * permit persons to whom the Materials are furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included
+ * in all copies or substantial portions of the Materials.
+ *
+ * THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+ * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+ ******************************************************************************/
+
+/*! \file
+ *
+ *   \brief C++ bindings for OpenCL 1.0 (rev 48), OpenCL 1.1 (rev 33) and 
+ *       OpenCL 1.2 (rev 15)    
+ *   \author Benedict R. Gaster, Laurent Morichetti and Lee Howes
+ *   
+ *   Additions and fixes from:
+ *       Brian Cole, March 3rd 2010 and April 2012 
+ *       Matt Gruenke, April 2012.
+ *       Bruce Merry, February 2013.
+ *       Tom Deakin and Simon McIntosh-Smith, July 2013
+ *   
+ *   \version 1.2.6
+ *   \date August 2013
+ *
+ *   Optional extension support
+ *
+ *         cl
+ *         cl_ext_device_fission
+ *				#define USE_CL_DEVICE_FISSION
+ */
+
+/*! \mainpage
+ * \section intro Introduction
+ * For many large applications C++ is the language of choice and so it seems
+ * reasonable to define C++ bindings for OpenCL.
+ *
+ *
+ * The interface is contained with a single C++ header file \em cl.hpp and all
+ * definitions are contained within the namespace \em cl. There is no additional
+ * requirement to include \em cl.h and to use either the C++ or original C
+ * bindings it is enough to simply include \em cl.hpp.
+ *
+ * The bindings themselves are lightweight and correspond closely to the
+ * underlying C API. Using the C++ bindings introduces no additional execution
+ * overhead.
+ *
+ * For detail documentation on the bindings see:
+ *
+ * The OpenCL C++ Wrapper API 1.2 (revision 09)
+ *  http://www.khronos.org/registry/cl/specs/opencl-cplusplus-1.2.pdf
+ *
+ * \section example Example
+ *
+ * The following example shows a general use case for the C++
+ * bindings, including support for the optional exception feature and
+ * also the supplied vector and string classes, see following sections for
+ * decriptions of these features.
+ *
+ * \code
+ * #define __CL_ENABLE_EXCEPTIONS
+ * 
+ * #if defined(__APPLE__) || defined(__MACOSX)
+ * #include <OpenCL/cl.hpp>
+ * #else
+ * #include <CL/cl.hpp>
+ * #endif
+ * #include <cstdio>
+ * #include <cstdlib>
+ * #include <iostream>
+ * 
+ *  const char * helloStr  = "__kernel void "
+ *                           "hello(void) "
+ *                           "{ "
+ *                           "  "
+ *                           "} ";
+ * 
+ *  int
+ *  main(void)
+ *  {
+ *     cl_int err = CL_SUCCESS;
+ *     try {
+ *
+ *       std::vector<cl::Platform> platforms;
+ *       cl::Platform::get(&platforms);
+ *       if (platforms.size() == 0) {
+ *           std::cout << "Platform size 0\n";
+ *           return -1;
+ *       }
+ *
+ *       cl_context_properties properties[] = 
+ *          { CL_CONTEXT_PLATFORM, (cl_context_properties)(platforms[0])(), 0};
+ *       cl::Context context(CL_DEVICE_TYPE_CPU, properties); 
+ * 
+ *       std::vector<cl::Device> devices = context.getInfo<CL_CONTEXT_DEVICES>();
+ * 
+ *       cl::Program::Sources source(1,
+ *           std::make_pair(helloStr,strlen(helloStr)));
+ *       cl::Program program_ = cl::Program(context, source);
+ *       program_.build(devices);
+ * 
+ *       cl::Kernel kernel(program_, "hello", &err);
+ * 
+ *       cl::Event event;
+ *       cl::CommandQueue queue(context, devices[0], 0, &err);
+ *       queue.enqueueNDRangeKernel(
+ *           kernel, 
+ *           cl::NullRange, 
+ *           cl::NDRange(4,4),
+ *           cl::NullRange,
+ *           NULL,
+ *           &event); 
+ * 
+ *       event.wait();
+ *     }
+ *     catch (cl::Error err) {
+ *        std::cerr 
+ *           << "ERROR: "
+ *           << err.what()
+ *           << "("
+ *           << err.err()
+ *           << ")"
+ *           << std::endl;
+ *     }
+ * 
+ *    return EXIT_SUCCESS;
+ *  }
+ * 
+ * \endcode
+ *
+ */
+#ifndef CL_HPP_
+#define CL_HPP_
+
+#ifdef _WIN32
+
+#include <windows.h>
+#include <malloc.h>
+#include <iterator>
+#include <intrin.h>
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+#include <exception>
+#endif // #if defined(__CL_ENABLE_EXCEPTIONS)
+
+#pragma push_macro("max")
+#undef max
+#if defined(USE_DX_INTEROP)
+#include <CL/cl_d3d10.h>
+#include <CL/cl_dx9_media_sharing.h>
+#endif
+#endif // _WIN32
+
+// 
+#if defined(USE_CL_DEVICE_FISSION)
+#include <CL/cl_ext.h>
+#endif
+
+#if defined(__APPLE__) || defined(__MACOSX)
+#include <OpenGL/OpenGL.h>
+#include <OpenCL/opencl.h>
+#include <libkern/OSAtomic.h>
+#else
+#include <GL/gl.h>
+#include <CL/opencl.h>
+#endif // !__APPLE__
+
+// To avoid accidentally taking ownership of core OpenCL types
+// such as cl_kernel constructors are made explicit
+// under OpenCL 1.2
+#if defined(CL_VERSION_1_2) && !defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+#define __CL_EXPLICIT_CONSTRUCTORS explicit
+#else // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+#define __CL_EXPLICIT_CONSTRUCTORS 
+#endif // #if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+
+// Define deprecated prefixes and suffixes to ensure compilation
+// in case they are not pre-defined
+#if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED)
+#define CL_EXT_PREFIX__VERSION_1_1_DEPRECATED  
+#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED)
+#if !defined(CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED)
+#define CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+#endif // #if !defined(CL_EXT_PREFIX__VERSION_1_1_DEPRECATED)
+
+#if !defined(CL_CALLBACK)
+#define CL_CALLBACK
+#endif //CL_CALLBACK
+
+#include <utility>
+#include <limits>
+
+#if !defined(__NO_STD_VECTOR)
+#include <vector>
+#endif
+
+#if !defined(__NO_STD_STRING)
+#include <string>
+#endif 
+
+#if defined(linux) || defined(__APPLE__) || defined(__MACOSX)
+#include <alloca.h>
+
+#include <emmintrin.h>
+#include <xmmintrin.h>
+#endif // linux
+
+#include <cstring>
+
+
+/*! \namespace cl
+ *
+ * \brief The OpenCL C++ bindings are defined within this namespace.
+ *
+ */
+namespace cl {
+
+class Memory;
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) 
+#define __INIT_CL_EXT_FCN_PTR(name) \
+    if(!pfn_##name) { \
+        pfn_##name = (PFN_##name) \
+            clGetExtensionFunctionAddress(#name); \
+        if(!pfn_##name) { \
+        } \
+    }
+#endif // #if defined(CL_VERSION_1_1)
+
+#if defined(CL_VERSION_1_2)
+#define __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, name) \
+    if(!pfn_##name) { \
+        pfn_##name = (PFN_##name) \
+            clGetExtensionFunctionAddressForPlatform(platform, #name); \
+        if(!pfn_##name) { \
+        } \
+    }
+#endif // #if defined(CL_VERSION_1_1)
+
+class Program;
+class Device;
+class Context;
+class CommandQueue;
+class Memory;
+class Buffer;
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+/*! \brief Exception class 
+ * 
+ *  This may be thrown by API functions when __CL_ENABLE_EXCEPTIONS is defined.
+ */
+class Error : public std::exception
+{
+private:
+    cl_int err_;
+    const char * errStr_;
+public:
+    /*! \brief Create a new CL error exception for a given error code
+     *  and corresponding message.
+     * 
+     *  \param err error code value.
+     *
+     *  \param errStr a descriptive string that must remain in scope until
+     *                handling of the exception has concluded.  If set, it
+     *                will be returned by what().
+     */
+    Error(cl_int err, const char * errStr = NULL) : err_(err), errStr_(errStr)
+    {}
+
+    ~Error() throw() {}
+
+    /*! \brief Get error string associated with exception
+     *
+     * \return A memory pointer to the error message string.
+     */
+    virtual const char * what() const throw ()
+    {
+        if (errStr_ == NULL) {
+            return "empty";
+        }
+        else {
+            return errStr_;
+        }
+    }
+
+    /*! \brief Get error code associated with exception
+     *
+     *  \return The error code.
+     */
+    cl_int err(void) const { return err_; }
+};
+
+#define __ERR_STR(x) #x
+#else
+#define __ERR_STR(x) NULL
+#endif // __CL_ENABLE_EXCEPTIONS
+
+
+namespace detail
+{
+#if defined(__CL_ENABLE_EXCEPTIONS)
+static inline cl_int errHandler (
+    cl_int err,
+    const char * errStr = NULL)
+{
+    if (err != CL_SUCCESS) {
+        throw Error(err, errStr);
+    }
+    return err;
+}
+#else
+static inline cl_int errHandler (cl_int err, const char * errStr = NULL)
+{
+    (void) errStr; // suppress unused variable warning
+    return err;
+}
+#endif // __CL_ENABLE_EXCEPTIONS
+}
+
+
+
+//! \cond DOXYGEN_DETAIL
+#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
+#define __GET_DEVICE_INFO_ERR               __ERR_STR(clGetDeviceInfo)
+#define __GET_PLATFORM_INFO_ERR             __ERR_STR(clGetPlatformInfo)
+#define __GET_DEVICE_IDS_ERR                __ERR_STR(clGetDeviceIDs)
+#define __GET_PLATFORM_IDS_ERR              __ERR_STR(clGetPlatformIDs)
+#define __GET_CONTEXT_INFO_ERR              __ERR_STR(clGetContextInfo)
+#define __GET_EVENT_INFO_ERR                __ERR_STR(clGetEventInfo)
+#define __GET_EVENT_PROFILE_INFO_ERR        __ERR_STR(clGetEventProfileInfo)
+#define __GET_MEM_OBJECT_INFO_ERR           __ERR_STR(clGetMemObjectInfo)
+#define __GET_IMAGE_INFO_ERR                __ERR_STR(clGetImageInfo)
+#define __GET_SAMPLER_INFO_ERR              __ERR_STR(clGetSamplerInfo)
+#define __GET_KERNEL_INFO_ERR               __ERR_STR(clGetKernelInfo)
+#if defined(CL_VERSION_1_2)
+#define __GET_KERNEL_ARG_INFO_ERR               __ERR_STR(clGetKernelArgInfo)
+#endif // #if defined(CL_VERSION_1_2)
+#define __GET_KERNEL_WORK_GROUP_INFO_ERR    __ERR_STR(clGetKernelWorkGroupInfo)
+#define __GET_PROGRAM_INFO_ERR              __ERR_STR(clGetProgramInfo)
+#define __GET_PROGRAM_BUILD_INFO_ERR        __ERR_STR(clGetProgramBuildInfo)
+#define __GET_COMMAND_QUEUE_INFO_ERR        __ERR_STR(clGetCommandQueueInfo)
+
+#define __CREATE_CONTEXT_ERR                __ERR_STR(clCreateContext)
+#define __CREATE_CONTEXT_FROM_TYPE_ERR      __ERR_STR(clCreateContextFromType)
+#define __GET_SUPPORTED_IMAGE_FORMATS_ERR   __ERR_STR(clGetSupportedImageFormats)
+
+#define __CREATE_BUFFER_ERR                 __ERR_STR(clCreateBuffer)
+#define __COPY_ERR                          __ERR_STR(cl::copy)
+#define __CREATE_SUBBUFFER_ERR              __ERR_STR(clCreateSubBuffer)
+#define __CREATE_GL_BUFFER_ERR              __ERR_STR(clCreateFromGLBuffer)
+#define __CREATE_GL_RENDER_BUFFER_ERR       __ERR_STR(clCreateFromGLBuffer)
+#define __GET_GL_OBJECT_INFO_ERR            __ERR_STR(clGetGLObjectInfo)
+#if defined(CL_VERSION_1_2)
+#define __CREATE_IMAGE_ERR                  __ERR_STR(clCreateImage)
+#define __CREATE_GL_TEXTURE_ERR             __ERR_STR(clCreateFromGLTexture)
+#define __IMAGE_DIMENSION_ERR               __ERR_STR(Incorrect image dimensions)
+#endif // #if defined(CL_VERSION_1_2)
+#define __CREATE_SAMPLER_ERR                __ERR_STR(clCreateSampler)
+#define __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR __ERR_STR(clSetMemObjectDestructorCallback)
+
+#define __CREATE_USER_EVENT_ERR             __ERR_STR(clCreateUserEvent)
+#define __SET_USER_EVENT_STATUS_ERR         __ERR_STR(clSetUserEventStatus)
+#define __SET_EVENT_CALLBACK_ERR            __ERR_STR(clSetEventCallback)
+#define __WAIT_FOR_EVENTS_ERR               __ERR_STR(clWaitForEvents)
+
+#define __CREATE_KERNEL_ERR                 __ERR_STR(clCreateKernel)
+#define __SET_KERNEL_ARGS_ERR               __ERR_STR(clSetKernelArg)
+#define __CREATE_PROGRAM_WITH_SOURCE_ERR    __ERR_STR(clCreateProgramWithSource)
+#define __CREATE_PROGRAM_WITH_BINARY_ERR    __ERR_STR(clCreateProgramWithBinary)
+#if defined(CL_VERSION_1_2)
+#define __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR    __ERR_STR(clCreateProgramWithBuiltInKernels)
+#endif // #if defined(CL_VERSION_1_2)
+#define __BUILD_PROGRAM_ERR                 __ERR_STR(clBuildProgram)
+#if defined(CL_VERSION_1_2)
+#define __COMPILE_PROGRAM_ERR                  __ERR_STR(clCompileProgram)
+
+#endif // #if defined(CL_VERSION_1_2)
+#define __CREATE_KERNELS_IN_PROGRAM_ERR     __ERR_STR(clCreateKernelsInProgram)
+
+#define __CREATE_COMMAND_QUEUE_ERR          __ERR_STR(clCreateCommandQueue)
+#define __SET_COMMAND_QUEUE_PROPERTY_ERR    __ERR_STR(clSetCommandQueueProperty)
+#define __ENQUEUE_READ_BUFFER_ERR           __ERR_STR(clEnqueueReadBuffer)
+#define __ENQUEUE_READ_BUFFER_RECT_ERR      __ERR_STR(clEnqueueReadBufferRect)
+#define __ENQUEUE_WRITE_BUFFER_ERR          __ERR_STR(clEnqueueWriteBuffer)
+#define __ENQUEUE_WRITE_BUFFER_RECT_ERR     __ERR_STR(clEnqueueWriteBufferRect)
+#define __ENQEUE_COPY_BUFFER_ERR            __ERR_STR(clEnqueueCopyBuffer)
+#define __ENQEUE_COPY_BUFFER_RECT_ERR       __ERR_STR(clEnqueueCopyBufferRect)
+#define __ENQUEUE_FILL_BUFFER_ERR           __ERR_STR(clEnqueueFillBuffer)
+#define __ENQUEUE_READ_IMAGE_ERR            __ERR_STR(clEnqueueReadImage)
+#define __ENQUEUE_WRITE_IMAGE_ERR           __ERR_STR(clEnqueueWriteImage)
+#define __ENQUEUE_COPY_IMAGE_ERR            __ERR_STR(clEnqueueCopyImage)
+#define __ENQUEUE_FILL_IMAGE_ERR           __ERR_STR(clEnqueueFillImage)
+#define __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR  __ERR_STR(clEnqueueCopyImageToBuffer)
+#define __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR  __ERR_STR(clEnqueueCopyBufferToImage)
+#define __ENQUEUE_MAP_BUFFER_ERR            __ERR_STR(clEnqueueMapBuffer)
+#define __ENQUEUE_MAP_IMAGE_ERR             __ERR_STR(clEnqueueMapImage)
+#define __ENQUEUE_UNMAP_MEM_OBJECT_ERR      __ERR_STR(clEnqueueUnMapMemObject)
+#define __ENQUEUE_NDRANGE_KERNEL_ERR        __ERR_STR(clEnqueueNDRangeKernel)
+#define __ENQUEUE_TASK_ERR                  __ERR_STR(clEnqueueTask)
+#define __ENQUEUE_NATIVE_KERNEL             __ERR_STR(clEnqueueNativeKernel)
+#if defined(CL_VERSION_1_2)
+#define __ENQUEUE_MIGRATE_MEM_OBJECTS_ERR   __ERR_STR(clEnqueueMigrateMemObjects)
+#endif // #if defined(CL_VERSION_1_2)
+
+#define __ENQUEUE_ACQUIRE_GL_ERR            __ERR_STR(clEnqueueAcquireGLObjects)
+#define __ENQUEUE_RELEASE_GL_ERR            __ERR_STR(clEnqueueReleaseGLObjects)
+
+
+#define __RETAIN_ERR                        __ERR_STR(Retain Object)
+#define __RELEASE_ERR                       __ERR_STR(Release Object)
+#define __FLUSH_ERR                         __ERR_STR(clFlush)
+#define __FINISH_ERR                        __ERR_STR(clFinish)
+#define __VECTOR_CAPACITY_ERR               __ERR_STR(Vector capacity error)
+
+/**
+ * CL 1.2 version that uses device fission.
+ */
+#if defined(CL_VERSION_1_2)
+#define __CREATE_SUB_DEVICES                __ERR_STR(clCreateSubDevices)
+#else
+#define __CREATE_SUB_DEVICES                __ERR_STR(clCreateSubDevicesEXT)
+#endif // #if defined(CL_VERSION_1_2)
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) 
+#define __ENQUEUE_MARKER_ERR                __ERR_STR(clEnqueueMarker)
+#define __ENQUEUE_WAIT_FOR_EVENTS_ERR       __ERR_STR(clEnqueueWaitForEvents)
+#define __ENQUEUE_BARRIER_ERR               __ERR_STR(clEnqueueBarrier)
+#define __UNLOAD_COMPILER_ERR               __ERR_STR(clUnloadCompiler)
+#define __CREATE_GL_TEXTURE_2D_ERR          __ERR_STR(clCreateFromGLTexture2D)
+#define __CREATE_GL_TEXTURE_3D_ERR          __ERR_STR(clCreateFromGLTexture3D)
+#define __CREATE_IMAGE2D_ERR                __ERR_STR(clCreateImage2D)
+#define __CREATE_IMAGE3D_ERR                __ERR_STR(clCreateImage3D)
+#endif // #if defined(CL_VERSION_1_1)
+
+#endif // __CL_USER_OVERRIDE_ERROR_STRINGS
+//! \endcond
+
+/**
+ * CL 1.2 marker and barrier commands
+ */
+#if defined(CL_VERSION_1_2)
+#define __ENQUEUE_MARKER_WAIT_LIST_ERR                __ERR_STR(clEnqueueMarkerWithWaitList)
+#define __ENQUEUE_BARRIER_WAIT_LIST_ERR               __ERR_STR(clEnqueueBarrierWithWaitList)
+#endif // #if defined(CL_VERSION_1_2)
+
+#if !defined(__USE_DEV_STRING) && !defined(__NO_STD_STRING)
+typedef std::string STRING_CLASS;
+#elif !defined(__USE_DEV_STRING) 
+
+/*! \class string
+ * \brief Simple string class, that provides a limited subset of std::string
+ * functionality but avoids many of the issues that come with that class.
+ 
+ *  \note Deprecated. Please use std::string as default or
+ *  re-define the string class to match the std::string
+ *  interface by defining STRING_CLASS
+ */
+class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED string CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+{
+private:
+    ::size_t size_;
+    char * str_;
+public:
+    //! \brief Constructs an empty string, allocating no memory.
+    string(void) : size_(0), str_(NULL)
+    {
+    }
+
+    /*! \brief Constructs a string populated from an arbitrary value of
+     *  specified size.
+     * 
+     *  An extra '\0' is added, in case none was contained in str.
+     *
+     *  \param str the initial value of the string instance.  Note that '\0'     
+     *             characters receive no special treatment.  If NULL,
+     *             the string is left empty, with a size of 0.
+     *
+     *  \param size the number of characters to copy from str.
+     */
+    string(const char * str, ::size_t size) :
+        size_(size),
+        str_(NULL)
+    {
+        if( size > 0 ) {
+            str_ = new char[size_+1];
+            if (str_ != NULL) {
+                memcpy(str_, str, size_  * sizeof(char));
+                str_[size_] = '\0';
+            }
+            else {
+                size_ = 0;
+            }
+        }
+    }
+
+    /*! \brief Constructs a string populated from a null-terminated value.
+     *
+     *  \param str the null-terminated initial value of the string instance.
+     *             If NULL, the string is left empty, with a size of 0.
+     */
+    string(const char * str) :
+        size_(0),
+        str_(NULL)
+    {
+        if( str ) {
+            size_= ::strlen(str);
+        }
+        if( size_ > 0 ) {
+            str_ = new char[size_ + 1];
+            if (str_ != NULL) {
+                memcpy(str_, str, (size_ + 1) * sizeof(char));
+            }
+        }
+    }
+
+    void resize( ::size_t n )
+    {
+        if( size_ == n ) {
+            return;
+        }
+        if (n == 0) {
+            if( str_ ) {
+                delete [] str_;
+            }
+            str_ = NULL;
+            size_ = 0;
+        } 
+        else {
+            char *newString = new char[n + 1];
+            int copySize = n;
+            if( size_ < n ) {
+                copySize = size_;
+            }
+            size_ = n;
+            
+            if(str_) {
+                memcpy(newString, str_, (copySize + 1) * sizeof(char));
+            }
+            if( copySize < size_ ) {
+                memset(newString + copySize, 0, size_ - copySize);
+            }
+            newString[size_] = '\0';
+
+            delete [] str_;
+            str_ = newString;
+        }
+    }
+
+    const char& operator[] ( ::size_t pos ) const
+    {
+        return str_[pos];
+    }
+
+    char& operator[] ( ::size_t pos )
+    {
+        return str_[pos];
+    }
+
+    /*! \brief Copies the value of another string to this one.
+     *
+     *  \param rhs the string to copy.
+     *
+     *  \returns a reference to the modified instance.
+     */
+    string& operator=(const string& rhs)
+    {
+        if (this == &rhs) {
+            return *this;
+        }
+
+        if( str_ != NULL ) {
+            delete [] str_;
+            str_ = NULL;
+            size_ = 0;
+        }
+
+        if (rhs.size_ == 0 || rhs.str_ == NULL) {
+            str_ = NULL;
+            size_ = 0;
+        } 
+        else {
+            str_ = new char[rhs.size_ + 1];
+            size_ = rhs.size_;
+            
+            if (str_ != NULL) {
+                memcpy(str_, rhs.str_, (size_ + 1) * sizeof(char));
+            }
+            else {
+                size_ = 0;
+            }
+        }
+
+        return *this;
+    }
+
+    /*! \brief Constructs a string by copying the value of another instance.
+     *
+     *  \param rhs the string to copy.
+     */
+    string(const string& rhs) :
+        size_(0),
+        str_(NULL)
+    {
+        *this = rhs;
+    }
+
+    //! \brief Destructor - frees memory used to hold the current value.
+    ~string()
+    {
+        delete[] str_;
+        str_ = NULL;
+    }
+    
+    //! \brief Queries the length of the string, excluding any added '\0's.
+    ::size_t size(void) const   { return size_; }
+
+    //! \brief Queries the length of the string, excluding any added '\0's.
+    ::size_t length(void) const { return size(); }
+
+    /*! \brief Returns a pointer to the private copy held by this instance,
+     *  or "" if empty/unset.
+     */
+    const char * c_str(void) const { return (str_) ? str_ : "";}
+};
+typedef cl::string STRING_CLASS;
+#endif // #elif !defined(__USE_DEV_STRING) 
+
+#if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
+#define VECTOR_CLASS std::vector
+#elif !defined(__USE_DEV_VECTOR) 
+#define VECTOR_CLASS cl::vector 
+
+#if !defined(__MAX_DEFAULT_VECTOR_SIZE)
+#define __MAX_DEFAULT_VECTOR_SIZE 10
+#endif
+
+/*! \class vector
+ * \brief Fixed sized vector implementation that mirroring 
+ *
+ *  \note Deprecated. Please use std::vector as default or
+ *  re-define the vector class to match the std::vector
+ *  interface by defining VECTOR_CLASS
+
+ *  \note Not recommended for use with custom objects as
+ *  current implementation will construct N elements
+ *
+ * std::vector functionality.
+ *  \brief Fixed sized vector compatible with std::vector.
+ *
+ *  \note
+ *  This differs from std::vector<> not just in memory allocation,
+ *  but also in terms of when members are constructed, destroyed,
+ *  and assigned instead of being copy constructed.
+ *
+ *  \param T type of element contained in the vector.
+ *
+ *  \param N maximum size of the vector.
+ */
+template <typename T, unsigned int N = __MAX_DEFAULT_VECTOR_SIZE>
+class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED vector CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+{
+private:
+    T data_[N];
+    unsigned int size_;
+
+public:
+    //! \brief Constructs an empty vector with no memory allocated.
+    vector() :  
+        size_(static_cast<unsigned int>(0))
+    {}
+
+    //! \brief Deallocates the vector's memory and destroys all of its elements.
+    ~vector() 
+    {
+        clear();
+    }
+
+    //! \brief Returns the number of elements currently contained.
+    unsigned int size(void) const
+    {
+        return size_;
+    }
+    
+    /*! \brief Empties the vector of all elements.
+     *  \note
+     *  This does not deallocate memory but will invoke destructors
+     *  on contained elements.
+     */
+    void clear()
+    {
+        while(!empty()) {
+            pop_back();
+        }
+    }
+
+    /*! \brief Appends an element after the last valid element.
+     * Calling this on a vector that has reached capacity will throw an 
+     * exception if exceptions are enabled.
+     */
+    void push_back (const T& x)
+    { 
+        if (size() < N) {    
+            new (&data_[size_]) T(x);
+            size_++;
+        } else {
+            detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR);
+        }
+    }
+
+    /*! \brief Removes the last valid element from the vector.
+     * Calling this on an empty vector will throw an exception
+     * if exceptions are enabled.
+     */
+    void pop_back(void)
+    {
+        if (size_ != 0) {
+            --size_;
+            data_[size_].~T();
+        } else {
+            detail::errHandler(CL_MEM_OBJECT_ALLOCATION_FAILURE, __VECTOR_CAPACITY_ERR);
+        }
+    }
+  
+    /*! \brief Constructs with a value copied from another.
+     *
+     *  \param vec the vector to copy.
+     */
+    vector(const vector<T, N>& vec) : 
+        size_(vec.size_)
+    {
+        if (size_ != 0) {	
+            assign(vec.begin(), vec.end());
+        }
+    } 
+
+    /*! \brief Constructs with a specified number of initial elements.
+     *
+     *  \param size number of initial elements.
+     *
+     *  \param val value of initial elements.
+     */
+    vector(unsigned int size, const T& val = T()) :
+        size_(0)
+    {
+        for (unsigned int i = 0; i < size; i++) {
+            push_back(val);
+        }
+    }
+
+    /*! \brief Overwrites the current content with that copied from another
+     *         instance.
+     *
+     *  \param rhs vector to copy.
+     *
+     *  \returns a reference to this.
+     */
+    vector<T, N>& operator=(const vector<T, N>& rhs)
+    {
+        if (this == &rhs) {
+            return *this;
+        }
+
+        if (rhs.size_ != 0) {	
+            assign(rhs.begin(), rhs.end());
+        } else {
+            clear();
+        }
+    
+        return *this;
+    }
+
+    /*! \brief Tests equality against another instance.
+     *
+     *  \param vec the vector against which to compare.
+     */
+    bool operator==(vector<T,N> &vec)
+    {
+        if (size() != vec.size()) {
+            return false;
+        }
+
+        for( unsigned int i = 0; i < size(); ++i ) {
+            if( operator[](i) != vec[i] ) {
+                return false;
+            }
+        }
+        return true;
+    }
+  
+    //! \brief Conversion operator to T*.
+    operator T* ()             { return data_; }
+
+    //! \brief Conversion operator to const T*.
+    operator const T* () const { return data_; }
+   
+    //! \brief Tests whether this instance has any elements.
+    bool empty (void) const
+    {
+        return size_==0;
+    }
+  
+    //! \brief Returns the maximum number of elements this instance can hold.
+    unsigned int max_size (void) const
+    {
+        return N;
+    }
+
+    //! \brief Returns the maximum number of elements this instance can hold.
+    unsigned int capacity () const
+    {
+        return N;
+    }
+
+    /*! \brief Returns a reference to a given element.
+     *
+     *  \param index which element to access.     *
+     *  \note
+     *  The caller is responsible for ensuring index is >= 0 and < size().
+     */
+    T& operator[](int index)
+    {
+        return data_[index];
+    }
+  
+    /*! \brief Returns a const reference to a given element.
+     *
+     *  \param index which element to access.
+     *
+     *  \note
+     *  The caller is responsible for ensuring index is >= 0 and < size().
+     */
+    const T& operator[](int index) const
+    {
+        return data_[index];
+    }
+  
+    /*! \brief Assigns elements of the vector based on a source iterator range.
+     *
+     *  \param start Beginning iterator of source range
+     *  \param end Enditerator of source range
+     *
+     *  \note
+     *  Will throw an exception if exceptions are enabled and size exceeded.
+     */
+    template<class I>
+    void assign(I start, I end)
+    {
+        clear();   
+        while(start != end) {
+            push_back(*start);
+            start++;
+        }
+    }
+
+    /*! \class iterator
+     * \brief Const iterator class for vectors
+     */
+    class iterator
+    {
+    private:
+        const vector<T,N> *vec_;
+        int index_;
+
+        /**
+         * Internal iterator constructor to capture reference
+         * to the vector it iterates over rather than taking 
+         * the vector by copy.
+         */
+        iterator (const vector<T,N> &vec, int index) :
+            vec_(&vec)
+        {            
+            if( !vec.empty() ) {
+                index_ = index;
+            } else {
+                index_ = -1;
+            }
+        }
+
+    public:
+        iterator(void) : 
+            index_(-1),
+            vec_(NULL)
+        {
+        }
+
+        iterator(const iterator& rhs) :
+            vec_(rhs.vec_),
+            index_(rhs.index_)
+        {
+        }
+
+        ~iterator(void) {}
+
+        static iterator begin(const cl::vector<T,N> &vec)
+        {
+            iterator i(vec, 0);
+
+            return i;
+        }
+
+        static iterator end(const cl::vector<T,N> &vec)
+        {
+            iterator i(vec, vec.size());
+
+            return i;
+        }
+    
+        bool operator==(iterator i)
+        {
+            return ((vec_ == i.vec_) && 
+                    (index_ == i.index_));
+        }
+
+        bool operator!=(iterator i)
+        {
+            return (!(*this==i));
+        }
+
+        iterator& operator++()
+        {
+            ++index_;
+            return *this;
+        }
+
+        iterator operator++(int)
+        {
+            iterator retVal(*this);
+            ++index_;
+            return retVal;
+        }
+
+        iterator& operator--()
+        {
+            --index_;
+            return *this;
+        }
+
+        iterator operator--(int)
+        {
+            iterator retVal(*this);
+            --index_;
+            return retVal;
+        }
+
+        const T& operator *() const
+        {
+            return (*vec_)[index_];
+        }
+    };
+
+    iterator begin(void)
+    {
+        return iterator::begin(*this);
+    }
+
+    iterator begin(void) const
+    {
+        return iterator::begin(*this);
+    }
+
+    iterator end(void)
+    {
+        return iterator::end(*this);
+    }
+
+    iterator end(void) const
+    {
+        return iterator::end(*this);
+    }
+
+    T& front(void)
+    {
+        return data_[0];
+    }
+
+    T& back(void)
+    {
+        return data_[size_];
+    }
+
+    const T& front(void) const
+    {
+        return data_[0];
+    }
+
+    const T& back(void) const
+    {
+        return data_[size_-1];
+    }
+};  
+#endif // #if !defined(__USE_DEV_VECTOR) && !defined(__NO_STD_VECTOR)
+
+
+
+
+
+namespace detail {
+#define __DEFAULT_NOT_INITIALIZED 1 
+#define __DEFAULT_BEING_INITIALIZED 2
+#define __DEFAULT_INITIALIZED 4
+
+    /*
+     * Compare and exchange primitives are needed for handling of defaults
+    */
+    inline int compare_exchange(volatile int * dest, int exchange, int comparand)
+    {
+#ifdef _WIN32
+        return (int)(InterlockedCompareExchange(
+           (volatile long*)dest, 
+           (long)exchange, 
+           (long)comparand));
+#elif defined(__APPLE__) || defined(__MACOSX)
+		return OSAtomicOr32Orig((uint32_t)exchange, (volatile uint32_t*)dest);
+#else // !_WIN32 || defined(__APPLE__) || defined(__MACOSX)
+        return (__sync_val_compare_and_swap(
+            dest, 
+            comparand, 
+            exchange));
+#endif // !_WIN32
+    }
+
+    inline void fence() { _mm_mfence(); }
+}; // namespace detail
+
+    
+/*! \brief class used to interface between C++ and
+ *  OpenCL C calls that require arrays of size_t values, whose
+ *  size is known statically.
+ */
+template <int N>
+class size_t
+{ 
+private:
+    ::size_t data_[N];
+
+public:
+    //! \brief Initialize size_t to all 0s
+    size_t()
+    {
+        for( int i = 0; i < N; ++i ) {
+            data_[i] = 0;
+        }
+    }
+
+    ::size_t& operator[](int index)
+    {
+        return data_[index];
+    }
+
+    const ::size_t& operator[](int index) const
+    {
+        return data_[index];
+    }
+
+    //! \brief Conversion operator to T*.
+    operator ::size_t* ()             { return data_; }
+
+    //! \brief Conversion operator to const T*.
+    operator const ::size_t* () const { return data_; }
+};
+
+namespace detail {
+
+// Generic getInfoHelper. The final parameter is used to guide overload
+// resolution: the actual parameter passed is an int, which makes this
+// a worse conversion sequence than a specialization that declares the
+// parameter as an int.
+template<typename Functor, typename T>
+inline cl_int getInfoHelper(Functor f, cl_uint name, T* param, long)
+{
+    return f(name, sizeof(T), param, NULL);
+}
+
+// Specialized getInfoHelper for VECTOR_CLASS params
+template <typename Func, typename T>
+inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS<T>* param, long)
+{
+    ::size_t required;
+    cl_int err = f(name, 0, NULL, &required);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+
+    T* value = (T*) alloca(required);
+    err = f(name, required, value, NULL);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+
+    param->assign(&value[0], &value[required/sizeof(T)]);
+    return CL_SUCCESS;
+}
+
+/* Specialization for reference-counted types. This depends on the
+ * existence of Wrapper<T>::cl_type, and none of the other types having the
+ * cl_type member. Note that simplify specifying the parameter as Wrapper<T>
+ * does not work, because when using a derived type (e.g. Context) the generic
+ * template will provide a better match.
+ */
+template <typename Func, typename T>
+inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS<T>* param, int, typename T::cl_type = 0)
+{
+    ::size_t required;
+    cl_int err = f(name, 0, NULL, &required);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+
+    typename T::cl_type * value = (typename T::cl_type *) alloca(required);
+    err = f(name, required, value, NULL);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+
+    ::size_t elements = required / sizeof(typename T::cl_type);
+    param->assign(&value[0], &value[elements]);
+    for (::size_t i = 0; i < elements; i++)
+    {
+        if (value[i] != NULL)
+        {
+            err = (*param)[i].retain();
+            if (err != CL_SUCCESS) {
+                return err;
+            }
+        }
+    }
+    return CL_SUCCESS;
+}
+
+// Specialized for getInfo<CL_PROGRAM_BINARIES>
+template <typename Func>
+inline cl_int getInfoHelper(Func f, cl_uint name, VECTOR_CLASS<char *>* param, int)
+{
+    cl_int err = f(name, param->size() * sizeof(char *), &(*param)[0], NULL);
+
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+
+    return CL_SUCCESS;
+}
+
+// Specialized GetInfoHelper for STRING_CLASS params
+template <typename Func>
+inline cl_int getInfoHelper(Func f, cl_uint name, STRING_CLASS* param, long)
+{
+    ::size_t required;
+    cl_int err = f(name, 0, NULL, &required);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+
+    char* value = (char*) alloca(required);
+    err = f(name, required, value, NULL);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+
+    *param = value;
+    return CL_SUCCESS;
+}
+
+// Specialized GetInfoHelper for cl::size_t params
+template <typename Func, ::size_t N>
+inline cl_int getInfoHelper(Func f, cl_uint name, size_t<N>* param, long)
+{
+    ::size_t required;
+    cl_int err = f(name, 0, NULL, &required);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+
+    ::size_t* value = (::size_t*) alloca(required);
+    err = f(name, required, value, NULL);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+
+    for(int i = 0; i < N; ++i) {
+        (*param)[i] = value[i];
+    }
+
+    return CL_SUCCESS;
+}
+
+template<typename T> struct ReferenceHandler;
+
+/* Specialization for reference-counted types. This depends on the
+ * existence of Wrapper<T>::cl_type, and none of the other types having the
+ * cl_type member. Note that simplify specifying the parameter as Wrapper<T>
+ * does not work, because when using a derived type (e.g. Context) the generic
+ * template will provide a better match.
+ */
+template<typename Func, typename T>
+inline cl_int getInfoHelper(Func f, cl_uint name, T* param, int, typename T::cl_type = 0)
+{
+    typename T::cl_type value;
+    cl_int err = f(name, sizeof(value), &value, NULL);
+    if (err != CL_SUCCESS) {
+        return err;
+    }
+    *param = value;
+    if (value != NULL)
+    {
+        err = param->retain();
+        if (err != CL_SUCCESS) {
+            return err;
+        }
+    }
+    return CL_SUCCESS;
+}
+
+#define __PARAM_NAME_INFO_1_0(F) \
+    F(cl_platform_info, CL_PLATFORM_PROFILE, STRING_CLASS) \
+    F(cl_platform_info, CL_PLATFORM_VERSION, STRING_CLASS) \
+    F(cl_platform_info, CL_PLATFORM_NAME, STRING_CLASS) \
+    F(cl_platform_info, CL_PLATFORM_VENDOR, STRING_CLASS) \
+    F(cl_platform_info, CL_PLATFORM_EXTENSIONS, STRING_CLASS) \
+    \
+    F(cl_device_info, CL_DEVICE_TYPE, cl_device_type) \
+    F(cl_device_info, CL_DEVICE_VENDOR_ID, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_COMPUTE_UNITS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_WORK_GROUP_SIZE, ::size_t) \
+    F(cl_device_info, CL_DEVICE_MAX_WORK_ITEM_SIZES, VECTOR_CLASS< ::size_t>) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_CHAR, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_SHORT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_INT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_LONG, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_FLOAT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_DOUBLE, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_CLOCK_FREQUENCY, cl_uint) \
+    F(cl_device_info, CL_DEVICE_ADDRESS_BITS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_READ_IMAGE_ARGS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_WRITE_IMAGE_ARGS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MAX_MEM_ALLOC_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_WIDTH, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE2D_MAX_HEIGHT, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_WIDTH, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_HEIGHT, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE3D_MAX_DEPTH, ::size_t) \
+    F(cl_device_info, CL_DEVICE_IMAGE_SUPPORT, cl_bool) \
+    F(cl_device_info, CL_DEVICE_MAX_PARAMETER_SIZE, ::size_t) \
+    F(cl_device_info, CL_DEVICE_MAX_SAMPLERS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MEM_BASE_ADDR_ALIGN, cl_uint) \
+    F(cl_device_info, CL_DEVICE_MIN_DATA_TYPE_ALIGN_SIZE, cl_uint) \
+    F(cl_device_info, CL_DEVICE_SINGLE_FP_CONFIG, cl_device_fp_config) \
+    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_TYPE, cl_device_mem_cache_type) \
+    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE, cl_uint)\
+    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_CACHE_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_GLOBAL_MEM_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_BUFFER_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_MAX_CONSTANT_ARGS, cl_uint) \
+    F(cl_device_info, CL_DEVICE_LOCAL_MEM_TYPE, cl_device_local_mem_type) \
+    F(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE, cl_ulong) \
+    F(cl_device_info, CL_DEVICE_ERROR_CORRECTION_SUPPORT, cl_bool) \
+    F(cl_device_info, CL_DEVICE_PROFILING_TIMER_RESOLUTION, ::size_t) \
+    F(cl_device_info, CL_DEVICE_ENDIAN_LITTLE, cl_bool) \
+    F(cl_device_info, CL_DEVICE_AVAILABLE, cl_bool) \
+    F(cl_device_info, CL_DEVICE_COMPILER_AVAILABLE, cl_bool) \
+    F(cl_device_info, CL_DEVICE_EXECUTION_CAPABILITIES, cl_device_exec_capabilities) \
+    F(cl_device_info, CL_DEVICE_QUEUE_PROPERTIES, cl_command_queue_properties) \
+    F(cl_device_info, CL_DEVICE_PLATFORM, cl_platform_id) \
+    F(cl_device_info, CL_DEVICE_NAME, STRING_CLASS) \
+    F(cl_device_info, CL_DEVICE_VENDOR, STRING_CLASS) \
+    F(cl_device_info, CL_DRIVER_VERSION, STRING_CLASS) \
+    F(cl_device_info, CL_DEVICE_PROFILE, STRING_CLASS) \
+    F(cl_device_info, CL_DEVICE_VERSION, STRING_CLASS) \
+    F(cl_device_info, CL_DEVICE_EXTENSIONS, STRING_CLASS) \
+    \
+    F(cl_context_info, CL_CONTEXT_REFERENCE_COUNT, cl_uint) \
+    F(cl_context_info, CL_CONTEXT_DEVICES, VECTOR_CLASS<Device>) \
+    F(cl_context_info, CL_CONTEXT_PROPERTIES, VECTOR_CLASS<cl_context_properties>) \
+    \
+    F(cl_event_info, CL_EVENT_COMMAND_QUEUE, cl::CommandQueue) \
+    F(cl_event_info, CL_EVENT_COMMAND_TYPE, cl_command_type) \
+    F(cl_event_info, CL_EVENT_REFERENCE_COUNT, cl_uint) \
+    F(cl_event_info, CL_EVENT_COMMAND_EXECUTION_STATUS, cl_uint) \
+    \
+    F(cl_profiling_info, CL_PROFILING_COMMAND_QUEUED, cl_ulong) \
+    F(cl_profiling_info, CL_PROFILING_COMMAND_SUBMIT, cl_ulong) \
+    F(cl_profiling_info, CL_PROFILING_COMMAND_START, cl_ulong) \
+    F(cl_profiling_info, CL_PROFILING_COMMAND_END, cl_ulong) \
+    \
+    F(cl_mem_info, CL_MEM_TYPE, cl_mem_object_type) \
+    F(cl_mem_info, CL_MEM_FLAGS, cl_mem_flags) \
+    F(cl_mem_info, CL_MEM_SIZE, ::size_t) \
+    F(cl_mem_info, CL_MEM_HOST_PTR, void*) \
+    F(cl_mem_info, CL_MEM_MAP_COUNT, cl_uint) \
+    F(cl_mem_info, CL_MEM_REFERENCE_COUNT, cl_uint) \
+    F(cl_mem_info, CL_MEM_CONTEXT, cl::Context) \
+    \
+    F(cl_image_info, CL_IMAGE_FORMAT, cl_image_format) \
+    F(cl_image_info, CL_IMAGE_ELEMENT_SIZE, ::size_t) \
+    F(cl_image_info, CL_IMAGE_ROW_PITCH, ::size_t) \
+    F(cl_image_info, CL_IMAGE_SLICE_PITCH, ::size_t) \
+    F(cl_image_info, CL_IMAGE_WIDTH, ::size_t) \
+    F(cl_image_info, CL_IMAGE_HEIGHT, ::size_t) \
+    F(cl_image_info, CL_IMAGE_DEPTH, ::size_t) \
+    \
+    F(cl_sampler_info, CL_SAMPLER_REFERENCE_COUNT, cl_uint) \
+    F(cl_sampler_info, CL_SAMPLER_CONTEXT, cl::Context) \
+    F(cl_sampler_info, CL_SAMPLER_NORMALIZED_COORDS, cl_addressing_mode) \
+    F(cl_sampler_info, CL_SAMPLER_ADDRESSING_MODE, cl_filter_mode) \
+    F(cl_sampler_info, CL_SAMPLER_FILTER_MODE, cl_bool) \
+    \
+    F(cl_program_info, CL_PROGRAM_REFERENCE_COUNT, cl_uint) \
+    F(cl_program_info, CL_PROGRAM_CONTEXT, cl::Context) \
+    F(cl_program_info, CL_PROGRAM_NUM_DEVICES, cl_uint) \
+    F(cl_program_info, CL_PROGRAM_DEVICES, VECTOR_CLASS<Device>) \
+    F(cl_program_info, CL_PROGRAM_SOURCE, STRING_CLASS) \
+    F(cl_program_info, CL_PROGRAM_BINARY_SIZES, VECTOR_CLASS< ::size_t>) \
+    F(cl_program_info, CL_PROGRAM_BINARIES, VECTOR_CLASS<char *>) \
+    \
+    F(cl_program_build_info, CL_PROGRAM_BUILD_STATUS, cl_build_status) \
+    F(cl_program_build_info, CL_PROGRAM_BUILD_OPTIONS, STRING_CLASS) \
+    F(cl_program_build_info, CL_PROGRAM_BUILD_LOG, STRING_CLASS) \
+    \
+    F(cl_kernel_info, CL_KERNEL_FUNCTION_NAME, STRING_CLASS) \
+    F(cl_kernel_info, CL_KERNEL_NUM_ARGS, cl_uint) \
+    F(cl_kernel_info, CL_KERNEL_REFERENCE_COUNT, cl_uint) \
+    F(cl_kernel_info, CL_KERNEL_CONTEXT, cl::Context) \
+    F(cl_kernel_info, CL_KERNEL_PROGRAM, cl::Program) \
+    \
+    F(cl_kernel_work_group_info, CL_KERNEL_WORK_GROUP_SIZE, ::size_t) \
+    F(cl_kernel_work_group_info, CL_KERNEL_COMPILE_WORK_GROUP_SIZE, cl::size_t<3>) \
+    F(cl_kernel_work_group_info, CL_KERNEL_LOCAL_MEM_SIZE, cl_ulong) \
+    \
+    F(cl_command_queue_info, CL_QUEUE_CONTEXT, cl::Context) \
+    F(cl_command_queue_info, CL_QUEUE_DEVICE, cl::Device) \
+    F(cl_command_queue_info, CL_QUEUE_REFERENCE_COUNT, cl_uint) \
+    F(cl_command_queue_info, CL_QUEUE_PROPERTIES, cl_command_queue_properties)
+
+#if defined(CL_VERSION_1_1)
+#define __PARAM_NAME_INFO_1_1(F) \
+    F(cl_context_info, CL_CONTEXT_NUM_DEVICES, cl_uint)\
+    F(cl_device_info, CL_DEVICE_PREFERRED_VECTOR_WIDTH_HALF, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_CHAR, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_SHORT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_INT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_LONG, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_FLOAT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_DOUBLE, cl_uint) \
+    F(cl_device_info, CL_DEVICE_NATIVE_VECTOR_WIDTH_HALF, cl_uint) \
+    F(cl_device_info, CL_DEVICE_DOUBLE_FP_CONFIG, cl_device_fp_config) \
+    F(cl_device_info, CL_DEVICE_HALF_FP_CONFIG, cl_device_fp_config) \
+    F(cl_device_info, CL_DEVICE_HOST_UNIFIED_MEMORY, cl_bool) \
+    F(cl_device_info, CL_DEVICE_OPENCL_C_VERSION, STRING_CLASS) \
+    \
+    F(cl_mem_info, CL_MEM_ASSOCIATED_MEMOBJECT, cl::Memory) \
+    F(cl_mem_info, CL_MEM_OFFSET, ::size_t) \
+    \
+    F(cl_kernel_work_group_info, CL_KERNEL_PREFERRED_WORK_GROUP_SIZE_MULTIPLE, ::size_t) \
+    F(cl_kernel_work_group_info, CL_KERNEL_PRIVATE_MEM_SIZE, cl_ulong) \
+    \
+    F(cl_event_info, CL_EVENT_CONTEXT, cl::Context)
+#endif // CL_VERSION_1_1
+
+    
+#if defined(CL_VERSION_1_2)
+#define __PARAM_NAME_INFO_1_2(F) \
+    F(cl_image_info, CL_IMAGE_BUFFER, cl::Buffer) \
+    \
+    F(cl_program_info, CL_PROGRAM_NUM_KERNELS, ::size_t) \
+    F(cl_program_info, CL_PROGRAM_KERNEL_NAMES, STRING_CLASS) \
+    \
+    F(cl_program_build_info, CL_PROGRAM_BINARY_TYPE, cl_program_binary_type) \
+    \
+    F(cl_kernel_info, CL_KERNEL_ATTRIBUTES, STRING_CLASS) \
+    \
+    F(cl_kernel_arg_info, CL_KERNEL_ARG_ADDRESS_QUALIFIER, cl_kernel_arg_address_qualifier) \
+    F(cl_kernel_arg_info, CL_KERNEL_ARG_ACCESS_QUALIFIER, cl_kernel_arg_access_qualifier) \
+    F(cl_kernel_arg_info, CL_KERNEL_ARG_TYPE_NAME, STRING_CLASS) \
+    F(cl_kernel_arg_info, CL_KERNEL_ARG_NAME, STRING_CLASS) \
+    \
+    F(cl_device_info, CL_DEVICE_PARENT_DEVICE, cl_device_id) \
+    F(cl_device_info, CL_DEVICE_PARTITION_PROPERTIES, VECTOR_CLASS<cl_device_partition_property>) \
+    F(cl_device_info, CL_DEVICE_PARTITION_TYPE, VECTOR_CLASS<cl_device_partition_property>)  \
+    F(cl_device_info, CL_DEVICE_REFERENCE_COUNT, cl_uint) \
+    F(cl_device_info, CL_DEVICE_PREFERRED_INTEROP_USER_SYNC, ::size_t) \
+    F(cl_device_info, CL_DEVICE_PARTITION_AFFINITY_DOMAIN, cl_device_affinity_domain) \
+    F(cl_device_info, CL_DEVICE_BUILT_IN_KERNELS, STRING_CLASS)
+#endif // #if defined(CL_VERSION_1_2)
+
+#if defined(USE_CL_DEVICE_FISSION)
+#define __PARAM_NAME_DEVICE_FISSION(F) \
+    F(cl_device_info, CL_DEVICE_PARENT_DEVICE_EXT, cl_device_id) \
+    F(cl_device_info, CL_DEVICE_PARTITION_TYPES_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+    F(cl_device_info, CL_DEVICE_AFFINITY_DOMAINS_EXT, VECTOR_CLASS<cl_device_partition_property_ext>) \
+    F(cl_device_info, CL_DEVICE_REFERENCE_COUNT_EXT , cl_uint) \
+    F(cl_device_info, CL_DEVICE_PARTITION_STYLE_EXT, VECTOR_CLASS<cl_device_partition_property_ext>)
+#endif // USE_CL_DEVICE_FISSION
+
+template <typename enum_type, cl_int Name>
+struct param_traits {};
+
+#define __CL_DECLARE_PARAM_TRAITS(token, param_name, T) \
+struct token;                                        \
+template<>                                           \
+struct param_traits<detail:: token,param_name>       \
+{                                                    \
+    enum { value = param_name };                     \
+    typedef T param_type;                            \
+};
+
+__PARAM_NAME_INFO_1_0(__CL_DECLARE_PARAM_TRAITS)
+#if defined(CL_VERSION_1_1)
+__PARAM_NAME_INFO_1_1(__CL_DECLARE_PARAM_TRAITS)
+#endif // CL_VERSION_1_1
+#if defined(CL_VERSION_1_2)
+__PARAM_NAME_INFO_1_2(__CL_DECLARE_PARAM_TRAITS)
+#endif // CL_VERSION_1_1
+
+#if defined(USE_CL_DEVICE_FISSION)
+__PARAM_NAME_DEVICE_FISSION(__CL_DECLARE_PARAM_TRAITS);
+#endif // USE_CL_DEVICE_FISSION
+
+#ifdef CL_PLATFORM_ICD_SUFFIX_KHR
+__CL_DECLARE_PARAM_TRAITS(cl_platform_info, CL_PLATFORM_ICD_SUFFIX_KHR, STRING_CLASS)
+#endif
+
+#ifdef CL_DEVICE_PROFILING_TIMER_OFFSET_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_PROFILING_TIMER_OFFSET_AMD, cl_ulong)
+#endif
+
+#ifdef CL_DEVICE_GLOBAL_FREE_MEMORY_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_FREE_MEMORY_AMD, VECTOR_CLASS< ::size_t>)
+#endif
+#ifdef CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_PER_COMPUTE_UNIT_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_SIMD_WIDTH_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_WIDTH_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_SIMD_INSTRUCTION_WIDTH_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_WAVEFRONT_WIDTH_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WAVEFRONT_WIDTH_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNELS_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANKS_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GLOBAL_MEM_CHANNEL_BANK_WIDTH_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_SIZE_PER_COMPUTE_UNIT_AMD, cl_uint)
+#endif
+#ifdef CL_DEVICE_LOCAL_MEM_BANKS_AMD
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_LOCAL_MEM_BANKS_AMD, cl_uint)
+#endif
+
+#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MAJOR_NV, cl_uint)
+#endif
+#ifdef CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_COMPUTE_CAPABILITY_MINOR_NV, cl_uint)
+#endif
+#ifdef CL_DEVICE_REGISTERS_PER_BLOCK_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_REGISTERS_PER_BLOCK_NV, cl_uint)
+#endif
+#ifdef CL_DEVICE_WARP_SIZE_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_WARP_SIZE_NV, cl_uint)
+#endif
+#ifdef CL_DEVICE_GPU_OVERLAP_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_GPU_OVERLAP_NV, cl_bool)
+#endif
+#ifdef CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_KERNEL_EXEC_TIMEOUT_NV, cl_bool)
+#endif
+#ifdef CL_DEVICE_INTEGRATED_MEMORY_NV
+__CL_DECLARE_PARAM_TRAITS(cl_device_info, CL_DEVICE_INTEGRATED_MEMORY_NV, cl_bool)
+#endif
+
+// Convenience functions
+
+template <typename Func, typename T>
+inline cl_int
+getInfo(Func f, cl_uint name, T* param)
+{
+    return getInfoHelper(f, name, param, 0);
+}
+
+template <typename Func, typename Arg0>
+struct GetInfoFunctor0
+{
+    Func f_; const Arg0& arg0_;
+    cl_int operator ()(
+        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
+    { return f_(arg0_, param, size, value, size_ret); }
+};
+
+template <typename Func, typename Arg0, typename Arg1>
+struct GetInfoFunctor1
+{
+    Func f_; const Arg0& arg0_; const Arg1& arg1_;
+    cl_int operator ()(
+        cl_uint param, ::size_t size, void* value, ::size_t* size_ret)
+    { return f_(arg0_, arg1_, param, size, value, size_ret); }
+};
+
+template <typename Func, typename Arg0, typename T>
+inline cl_int
+getInfo(Func f, const Arg0& arg0, cl_uint name, T* param)
+{
+    GetInfoFunctor0<Func, Arg0> f0 = { f, arg0 };
+    return getInfoHelper(f0, name, param, 0);
+}
+
+template <typename Func, typename Arg0, typename Arg1, typename T>
+inline cl_int
+getInfo(Func f, const Arg0& arg0, const Arg1& arg1, cl_uint name, T* param)
+{
+    GetInfoFunctor1<Func, Arg0, Arg1> f0 = { f, arg0, arg1 };
+    return getInfoHelper(f0, name, param, 0);
+}
+
+template<typename T>
+struct ReferenceHandler
+{ };
+
+#if defined(CL_VERSION_1_2)
+/**
+ * OpenCL 1.2 devices do have retain/release.
+ */
+template <>
+struct ReferenceHandler<cl_device_id>
+{
+    /**
+     * Retain the device.
+     * \param device A valid device created using createSubDevices
+     * \return 
+     *   CL_SUCCESS if the function executed successfully.
+     *   CL_INVALID_DEVICE if device was not a valid subdevice
+     *   CL_OUT_OF_RESOURCES
+     *   CL_OUT_OF_HOST_MEMORY
+     */
+    static cl_int retain(cl_device_id device)
+    { return ::clRetainDevice(device); }
+    /**
+     * Retain the device.
+     * \param device A valid device created using createSubDevices
+     * \return 
+     *   CL_SUCCESS if the function executed successfully.
+     *   CL_INVALID_DEVICE if device was not a valid subdevice
+     *   CL_OUT_OF_RESOURCES
+     *   CL_OUT_OF_HOST_MEMORY
+     */
+    static cl_int release(cl_device_id device)
+    { return ::clReleaseDevice(device); }
+};
+#else // #if defined(CL_VERSION_1_2)
+/**
+ * OpenCL 1.1 devices do not have retain/release.
+ */
+template <>
+struct ReferenceHandler<cl_device_id>
+{
+    // cl_device_id does not have retain().
+    static cl_int retain(cl_device_id)
+    { return CL_SUCCESS; }
+    // cl_device_id does not have release().
+    static cl_int release(cl_device_id)
+    { return CL_SUCCESS; }
+};
+#endif // #if defined(CL_VERSION_1_2)
+
+template <>
+struct ReferenceHandler<cl_platform_id>
+{
+    // cl_platform_id does not have retain().
+    static cl_int retain(cl_platform_id)
+    { return CL_SUCCESS; }
+    // cl_platform_id does not have release().
+    static cl_int release(cl_platform_id)
+    { return CL_SUCCESS; }
+};
+
+template <>
+struct ReferenceHandler<cl_context>
+{
+    static cl_int retain(cl_context context)
+    { return ::clRetainContext(context); }
+    static cl_int release(cl_context context)
+    { return ::clReleaseContext(context); }
+};
+
+template <>
+struct ReferenceHandler<cl_command_queue>
+{
+    static cl_int retain(cl_command_queue queue)
+    { return ::clRetainCommandQueue(queue); }
+    static cl_int release(cl_command_queue queue)
+    { return ::clReleaseCommandQueue(queue); }
+};
+
+template <>
+struct ReferenceHandler<cl_mem>
+{
+    static cl_int retain(cl_mem memory)
+    { return ::clRetainMemObject(memory); }
+    static cl_int release(cl_mem memory)
+    { return ::clReleaseMemObject(memory); }
+};
+
+template <>
+struct ReferenceHandler<cl_sampler>
+{
+    static cl_int retain(cl_sampler sampler)
+    { return ::clRetainSampler(sampler); }
+    static cl_int release(cl_sampler sampler)
+    { return ::clReleaseSampler(sampler); }
+};
+
+template <>
+struct ReferenceHandler<cl_program>
+{
+    static cl_int retain(cl_program program)
+    { return ::clRetainProgram(program); }
+    static cl_int release(cl_program program)
+    { return ::clReleaseProgram(program); }
+};
+
+template <>
+struct ReferenceHandler<cl_kernel>
+{
+    static cl_int retain(cl_kernel kernel)
+    { return ::clRetainKernel(kernel); }
+    static cl_int release(cl_kernel kernel)
+    { return ::clReleaseKernel(kernel); }
+};
+
+template <>
+struct ReferenceHandler<cl_event>
+{
+    static cl_int retain(cl_event event)
+    { return ::clRetainEvent(event); }
+    static cl_int release(cl_event event)
+    { return ::clReleaseEvent(event); }
+};
+
+
+// Extracts version number with major in the upper 16 bits, minor in the lower 16
+static cl_uint getVersion(const char *versionInfo)
+{
+    int highVersion = 0;
+    int lowVersion = 0;
+    int index = 7;
+    while(versionInfo[index] != '.' ) {
+        highVersion *= 10;
+        highVersion += versionInfo[index]-'0';
+        ++index;
+    }
+    ++index;
+    while(versionInfo[index] != ' ' ) {
+        lowVersion *= 10;
+        lowVersion += versionInfo[index]-'0';
+        ++index;
+    }
+    return (highVersion << 16) | lowVersion;
+}
+
+static cl_uint getPlatformVersion(cl_platform_id platform)
+{
+    ::size_t size = 0;
+    clGetPlatformInfo(platform, CL_PLATFORM_VERSION, 0, NULL, &size);
+    char *versionInfo = (char *) alloca(size);
+    clGetPlatformInfo(platform, CL_PLATFORM_VERSION, size, &versionInfo[0], &size);
+    return getVersion(versionInfo);
+}
+
+static cl_uint getDevicePlatformVersion(cl_device_id device)
+{
+    cl_platform_id platform;
+    clGetDeviceInfo(device, CL_DEVICE_PLATFORM, sizeof(platform), &platform, NULL);
+    return getPlatformVersion(platform);
+}
+
+#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+static cl_uint getContextPlatformVersion(cl_context context)
+{
+    // The platform cannot be queried directly, so we first have to grab a
+    // device and obtain its context
+    ::size_t size = 0;
+    clGetContextInfo(context, CL_CONTEXT_DEVICES, 0, NULL, &size);
+    if (size == 0)
+        return 0;
+    cl_device_id *devices = (cl_device_id *) alloca(size);
+    clGetContextInfo(context, CL_CONTEXT_DEVICES, size, devices, NULL);
+    return getDevicePlatformVersion(devices[0]);
+}
+#endif // #if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+
+template <typename T>
+class Wrapper
+{
+public:
+    typedef T cl_type;
+
+protected:
+    cl_type object_;
+
+public:
+    Wrapper() : object_(NULL) { }
+
+    Wrapper(const cl_type &obj) : object_(obj) { }
+
+    ~Wrapper()
+    {
+        if (object_ != NULL) { release(); }
+    }
+
+    Wrapper(const Wrapper<cl_type>& rhs)
+    {
+        object_ = rhs.object_;
+        if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
+    }
+
+    Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
+    {
+        if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
+        object_ = rhs.object_;
+        if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
+        return *this;
+    }
+
+    Wrapper<cl_type>& operator = (const cl_type &rhs)
+    {
+        if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
+        object_ = rhs;
+        return *this;
+    }
+
+    cl_type operator ()() const { return object_; }
+
+    cl_type& operator ()() { return object_; }
+
+protected:
+    template<typename Func, typename U>
+    friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type);
+
+    cl_int retain() const
+    {
+        return ReferenceHandler<cl_type>::retain(object_);
+    }
+
+    cl_int release() const
+    {
+        return ReferenceHandler<cl_type>::release(object_);
+    }
+};
+
+template <>
+class Wrapper<cl_device_id>
+{
+public:
+    typedef cl_device_id cl_type;
+
+protected:
+    cl_type object_;
+    bool referenceCountable_;
+
+    static bool isReferenceCountable(cl_device_id device)
+    {
+        bool retVal = false;
+        if (device != NULL) {
+            int version = getDevicePlatformVersion(device);
+            if(version > ((1 << 16) + 1)) {
+                retVal = true;
+            }
+        }
+        return retVal;
+    }
+
+public:
+    Wrapper() : object_(NULL), referenceCountable_(false) 
+    { 
+    }
+    
+    Wrapper(const cl_type &obj) : object_(obj), referenceCountable_(false) 
+    {
+        referenceCountable_ = isReferenceCountable(obj); 
+    }
+
+    ~Wrapper()
+    {
+        if (object_ != NULL) { release(); }
+    }
+    
+    Wrapper(const Wrapper<cl_type>& rhs)
+    {
+        object_ = rhs.object_;
+        referenceCountable_ = isReferenceCountable(object_); 
+        if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
+    }
+
+    Wrapper<cl_type>& operator = (const Wrapper<cl_type>& rhs)
+    {
+        if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
+        object_ = rhs.object_;
+        referenceCountable_ = rhs.referenceCountable_;
+        if (object_ != NULL) { detail::errHandler(retain(), __RETAIN_ERR); }
+        return *this;
+    }
+
+    Wrapper<cl_type>& operator = (const cl_type &rhs)
+    {
+        if (object_ != NULL) { detail::errHandler(release(), __RELEASE_ERR); }
+        object_ = rhs;
+        referenceCountable_ = isReferenceCountable(object_); 
+        return *this;
+    }
+
+    cl_type operator ()() const { return object_; }
+
+    cl_type& operator ()() { return object_; }
+
+protected:
+    template<typename Func, typename U>
+    friend inline cl_int getInfoHelper(Func, cl_uint, U*, int, typename U::cl_type);
+
+    template<typename Func, typename U>
+    friend inline cl_int getInfoHelper(Func, cl_uint, VECTOR_CLASS<U>*, int, typename U::cl_type);
+
+    cl_int retain() const
+    {
+        if( referenceCountable_ ) {
+            return ReferenceHandler<cl_type>::retain(object_);
+        }
+        else {
+            return CL_SUCCESS;
+        }
+    }
+
+    cl_int release() const
+    {
+        if( referenceCountable_ ) {
+            return ReferenceHandler<cl_type>::release(object_);
+        }
+        else {
+            return CL_SUCCESS;
+        }
+    }
+};
+
+} // namespace detail
+//! \endcond
+
+/*! \stuct ImageFormat
+ *  \brief Adds constructors and member functions for cl_image_format.
+ *
+ *  \see cl_image_format
+ */
+struct ImageFormat : public cl_image_format
+{
+    //! \brief Default constructor - performs no initialization.
+    ImageFormat(){}
+
+    //! \brief Initializing constructor.
+    ImageFormat(cl_channel_order order, cl_channel_type type)
+    {
+        image_channel_order = order;
+        image_channel_data_type = type;
+    }
+
+    //! \brief Assignment operator.
+    ImageFormat& operator = (const ImageFormat& rhs)
+    {
+        if (this != &rhs) {
+            this->image_channel_data_type = rhs.image_channel_data_type;
+            this->image_channel_order     = rhs.image_channel_order;
+        }
+        return *this;
+    }
+};
+
+/*! \brief Class interface for cl_device_id.
+ *
+ *  \note Copies of these objects are inexpensive, since they don't 'own'
+ *        any underlying resources or data structures.
+ *
+ *  \see cl_device_id
+ */
+class Device : public detail::Wrapper<cl_device_id>
+{
+public:
+    //! \brief Default constructor - initializes to NULL.
+    Device() : detail::Wrapper<cl_type>() { }
+
+    /*! \brief Copy constructor.
+     * 
+     *  This simply copies the device ID value, which is an inexpensive operation.
+     */
+    Device(const Device& device) : detail::Wrapper<cl_type>(device) { }
+
+    /*! \brief Constructor from cl_device_id.
+     * 
+     *  This simply copies the device ID value, which is an inexpensive operation.
+     */
+    Device(const cl_device_id &device) : detail::Wrapper<cl_type>(device) { }
+
+    /*! \brief Returns the first device on the default context.
+     *
+     *  \see Context::getDefault()
+     */
+    static Device getDefault(cl_int * err = NULL);
+
+    /*! \brief Assignment operator from Device.
+     * 
+     *  This simply copies the device ID value, which is an inexpensive operation.
+     */
+    Device& operator = (const Device& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment operator from cl_device_id.
+     * 
+     *  This simply copies the device ID value, which is an inexpensive operation.
+     */
+    Device& operator = (const cl_device_id& rhs)
+    {
+        detail::Wrapper<cl_type>::operator=(rhs);
+        return *this;
+    }
+
+    //! \brief Wrapper for clGetDeviceInfo().
+    template <typename T>
+    cl_int getInfo(cl_device_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetDeviceInfo, object_, name, param),
+            __GET_DEVICE_INFO_ERR);
+    }
+
+    //! \brief Wrapper for clGetDeviceInfo() that returns by value.
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_device_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_device_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    /**
+     * CL 1.2 version
+     */
+#if defined(CL_VERSION_1_2)
+    //! \brief Wrapper for clCreateSubDevicesEXT().
+    cl_int createSubDevices(
+        const cl_device_partition_property * properties,
+        VECTOR_CLASS<Device>* devices)
+    {
+        cl_uint n = 0;
+        cl_int err = clCreateSubDevices(object_, properties, 0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_SUB_DEVICES);
+        }
+
+        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+        err = clCreateSubDevices(object_, properties, n, ids, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_SUB_DEVICES);
+        }
+
+        devices->assign(&ids[0], &ids[n]);
+        return CL_SUCCESS;
+    }
+#endif // #if defined(CL_VERSION_1_2)
+
+/**
+ * CL 1.1 version that uses device fission.
+ */
+#if defined(CL_VERSION_1_1)
+#if defined(USE_CL_DEVICE_FISSION)
+    cl_int createSubDevices(
+        const cl_device_partition_property_ext * properties,
+        VECTOR_CLASS<Device>* devices)
+    {
+        typedef CL_API_ENTRY cl_int 
+            ( CL_API_CALL * PFN_clCreateSubDevicesEXT)(
+                cl_device_id /*in_device*/,
+                const cl_device_partition_property_ext * /* properties */,
+                cl_uint /*num_entries*/,
+                cl_device_id * /*out_devices*/,
+                cl_uint * /*num_devices*/ ) CL_EXT_SUFFIX__VERSION_1_1;
+
+        static PFN_clCreateSubDevicesEXT pfn_clCreateSubDevicesEXT = NULL;
+        __INIT_CL_EXT_FCN_PTR(clCreateSubDevicesEXT);
+
+        cl_uint n = 0;
+        cl_int err = pfn_clCreateSubDevicesEXT(object_, properties, 0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_SUB_DEVICES);
+        }
+
+        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+        err = pfn_clCreateSubDevicesEXT(object_, properties, n, ids, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_SUB_DEVICES);
+        }
+
+        devices->assign(&ids[0], &ids[n]);
+        return CL_SUCCESS;
+    }
+#endif // #if defined(USE_CL_DEVICE_FISSION)
+#endif // #if defined(CL_VERSION_1_1)
+};
+
+/*! \brief Class interface for cl_platform_id.
+ *
+ *  \note Copies of these objects are inexpensive, since they don't 'own'
+ *        any underlying resources or data structures.
+ *
+ *  \see cl_platform_id
+ */
+class Platform : public detail::Wrapper<cl_platform_id>
+{
+public:
+    //! \brief Default constructor - initializes to NULL.
+    Platform() : detail::Wrapper<cl_type>()  { }
+
+    /*! \brief Copy constructor.
+     * 
+     *  This simply copies the platform ID value, which is an inexpensive operation.
+     */
+    Platform(const Platform& platform) : detail::Wrapper<cl_type>(platform) { }
+
+    /*! \brief Constructor from cl_platform_id.
+     * 
+     *  This simply copies the platform ID value, which is an inexpensive operation.
+     */
+    Platform(const cl_platform_id &platform) : detail::Wrapper<cl_type>(platform) { }
+
+    /*! \brief Assignment operator from Platform.
+     * 
+     *  This simply copies the platform ID value, which is an inexpensive operation.
+     */
+    Platform& operator = (const Platform& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment operator from cl_platform_id.
+     * 
+     *  This simply copies the platform ID value, which is an inexpensive operation.
+     */
+    Platform& operator = (const cl_platform_id& rhs)
+    {
+        detail::Wrapper<cl_type>::operator=(rhs);
+        return *this;
+    }
+
+    //! \brief Wrapper for clGetPlatformInfo().
+    cl_int getInfo(cl_platform_info name, STRING_CLASS* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetPlatformInfo, object_, name, param),
+            __GET_PLATFORM_INFO_ERR);
+    }
+
+    //! \brief Wrapper for clGetPlatformInfo() that returns by value.
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_platform_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_platform_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    /*! \brief Gets a list of devices for this platform.
+     * 
+     *  Wraps clGetDeviceIDs().
+     */
+    cl_int getDevices(
+        cl_device_type type,
+        VECTOR_CLASS<Device>* devices) const
+    {
+        cl_uint n = 0;
+        if( devices == NULL ) {
+            return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR);
+        }
+        cl_int err = ::clGetDeviceIDs(object_, type, 0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+        }
+
+        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+        err = ::clGetDeviceIDs(object_, type, n, ids, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+        }
+
+        devices->assign(&ids[0], &ids[n]);
+        return CL_SUCCESS;
+    }
+
+#if defined(USE_DX_INTEROP)
+   /*! \brief Get the list of available D3D10 devices.
+     *
+     *  \param d3d_device_source.
+     *
+     *  \param d3d_object.
+     *
+     *  \param d3d_device_set.
+     *
+     *  \param devices returns a vector of OpenCL D3D10 devices found. The cl::Device
+     *  values returned in devices can be used to identify a specific OpenCL
+     *  device. If \a devices argument is NULL, this argument is ignored.
+     *
+     *  \return One of the following values:
+     *    - CL_SUCCESS if the function is executed successfully.
+     *
+     *  The application can query specific capabilities of the OpenCL device(s)
+     *  returned by cl::getDevices. This can be used by the application to
+     *  determine which device(s) to use.
+     *
+     * \note In the case that exceptions are enabled and a return value
+     * other than CL_SUCCESS is generated, then cl::Error exception is
+     * generated.
+     */
+    cl_int getDevices(
+        cl_d3d10_device_source_khr d3d_device_source,
+        void *                     d3d_object,
+        cl_d3d10_device_set_khr    d3d_device_set,
+        VECTOR_CLASS<Device>* devices) const
+    {
+        typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clGetDeviceIDsFromD3D10KHR)(
+            cl_platform_id platform, 
+            cl_d3d10_device_source_khr d3d_device_source, 
+            void * d3d_object,
+            cl_d3d10_device_set_khr d3d_device_set,
+            cl_uint num_entries,
+            cl_device_id * devices,
+            cl_uint* num_devices);
+
+        if( devices == NULL ) {
+            return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_DEVICE_IDS_ERR);
+        }
+
+        static PFN_clGetDeviceIDsFromD3D10KHR pfn_clGetDeviceIDsFromD3D10KHR = NULL;
+        __INIT_CL_EXT_FCN_PTR_PLATFORM(object_, clGetDeviceIDsFromD3D10KHR);
+
+        cl_uint n = 0;
+        cl_int err = pfn_clGetDeviceIDsFromD3D10KHR(
+            object_, 
+            d3d_device_source, 
+            d3d_object,
+            d3d_device_set, 
+            0, 
+            NULL, 
+            &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+        }
+
+        cl_device_id* ids = (cl_device_id*) alloca(n * sizeof(cl_device_id));
+        err = pfn_clGetDeviceIDsFromD3D10KHR(
+            object_, 
+            d3d_device_source, 
+            d3d_object,
+            d3d_device_set,
+            n, 
+            ids, 
+            NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_DEVICE_IDS_ERR);
+        }
+
+        devices->assign(&ids[0], &ids[n]);
+        return CL_SUCCESS;
+    }
+#endif
+
+    /*! \brief Gets a list of available platforms.
+     * 
+     *  Wraps clGetPlatformIDs().
+     */
+    static cl_int get(
+        VECTOR_CLASS<Platform>* platforms)
+    {
+        cl_uint n = 0;
+
+        if( platforms == NULL ) {
+            return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR);
+        }
+
+        cl_int err = ::clGetPlatformIDs(0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+        }
+
+        cl_platform_id* ids = (cl_platform_id*) alloca(
+            n * sizeof(cl_platform_id));
+        err = ::clGetPlatformIDs(n, ids, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+        }
+
+        platforms->assign(&ids[0], &ids[n]);
+        return CL_SUCCESS;
+    }
+
+    /*! \brief Gets the first available platform.
+     * 
+     *  Wraps clGetPlatformIDs(), returning the first result.
+     */
+    static cl_int get(
+        Platform * platform)
+    {
+        cl_uint n = 0;
+
+        if( platform == NULL ) {
+            return detail::errHandler(CL_INVALID_ARG_VALUE, __GET_PLATFORM_IDS_ERR);
+        }
+
+        cl_int err = ::clGetPlatformIDs(0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+        }
+
+        cl_platform_id* ids = (cl_platform_id*) alloca(
+            n * sizeof(cl_platform_id));
+        err = ::clGetPlatformIDs(n, ids, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+        }
+
+        *platform = ids[0];
+        return CL_SUCCESS;
+    }
+
+    /*! \brief Gets the first available platform, returning it by value.
+     * 
+     *  Wraps clGetPlatformIDs(), returning the first result.
+     */
+    static Platform get(
+        cl_int * errResult = NULL)
+    {
+        Platform platform;
+        cl_uint n = 0;
+        cl_int err = ::clGetPlatformIDs(0, NULL, &n);
+        if (err != CL_SUCCESS) {
+            detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+            if (errResult != NULL) {
+                *errResult = err;
+            }
+        }
+
+        cl_platform_id* ids = (cl_platform_id*) alloca(
+            n * sizeof(cl_platform_id));
+        err = ::clGetPlatformIDs(n, ids, NULL);
+
+        if (err != CL_SUCCESS) {
+            detail::errHandler(err, __GET_PLATFORM_IDS_ERR);
+        }
+
+        if (errResult != NULL) {
+            *errResult = err;
+        }
+        
+        return ids[0];
+    }
+
+    static Platform getDefault( 
+        cl_int *errResult = NULL )
+    {
+        return get(errResult);
+    }
+
+    
+#if defined(CL_VERSION_1_2)
+    //! \brief Wrapper for clUnloadCompiler().
+    cl_int
+    unloadCompiler()
+    {
+        return ::clUnloadPlatformCompiler(object_);
+    }
+#endif // #if defined(CL_VERSION_1_2)
+}; // class Platform
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2))
+/**
+ * Unload the OpenCL compiler.
+ * \note Deprecated for OpenCL 1.2. Use Platform::unloadCompiler instead.
+ */
+inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED cl_int
+UnloadCompiler() CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+inline cl_int
+UnloadCompiler()
+{
+    return ::clUnloadCompiler();
+}
+#endif // #if defined(CL_VERSION_1_1)
+
+/*! \brief Class interface for cl_context.
+ *
+ *  \note Copies of these objects are shallow, meaning that the copy will refer
+ *        to the same underlying cl_context as the original.  For details, see
+ *        clRetainContext() and clReleaseContext().
+ *
+ *  \see cl_context
+ */
+class Context 
+    : public detail::Wrapper<cl_context>
+{
+private:
+    static volatile int default_initialized_;
+    static Context default_;
+    static volatile cl_int default_error_;
+public:
+    /*! \brief Destructor.
+     *
+     *  This calls clReleaseContext() on the value held by this instance.
+     */
+    ~Context() { }
+
+    /*! \brief Constructs a context including a list of specified devices.
+     *
+     *  Wraps clCreateContext().
+     */
+    Context(
+        const VECTOR_CLASS<Device>& devices,
+        cl_context_properties* properties = NULL,
+        void (CL_CALLBACK * notifyFptr)(
+            const char *,
+            const void *,
+            ::size_t,
+            void *) = NULL,
+        void* data = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+        ::size_t numDevices = devices.size();
+        cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
+        for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
+            deviceIDs[deviceIndex] = (devices[deviceIndex])();
+        }
+
+        object_ = ::clCreateContext(
+            properties, (cl_uint) numDevices,
+            deviceIDs,
+            notifyFptr, data, &error);
+
+        detail::errHandler(error, __CREATE_CONTEXT_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Context(
+        const Device& device,
+        cl_context_properties* properties = NULL,
+        void (CL_CALLBACK * notifyFptr)(
+            const char *,
+            const void *,
+            ::size_t,
+            void *) = NULL,
+        void* data = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+        cl_device_id deviceID = device();
+
+        object_ = ::clCreateContext(
+            properties, 1,
+            &deviceID,
+            notifyFptr, data, &error);
+
+        detail::errHandler(error, __CREATE_CONTEXT_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    /*! \brief Constructs a context including all or a subset of devices of a specified type.
+     *
+     *  Wraps clCreateContextFromType().
+     */
+    Context(
+        cl_device_type type,
+        cl_context_properties* properties = NULL,
+        void (CL_CALLBACK * notifyFptr)(
+            const char *,
+            const void *,
+            ::size_t,
+            void *) = NULL,
+        void* data = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+#if !defined(__APPLE__) || !defined(__MACOS)
+        cl_context_properties prop[4] = {CL_CONTEXT_PLATFORM, 0, 0, 0 };
+
+        if (properties == NULL) {
+            // Get a valid platform ID as we cannot send in a blank one
+            VECTOR_CLASS<Platform> platforms;
+            error = Platform::get(&platforms);
+            if (error != CL_SUCCESS) {
+                detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+                if (err != NULL) {
+                    *err = error;
+                }
+                return;
+            }
+
+            // Check the platforms we found for a device of our specified type
+            cl_context_properties platform_id = 0;
+            for (unsigned int i = 0; i < platforms.size(); i++) {
+
+                VECTOR_CLASS<Device> devices;
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+                try {
+#endif
+
+                    error = platforms[i].getDevices(type, &devices);
+
+#if defined(__CL_ENABLE_EXCEPTIONS)
+                } catch (Error) {}
+    // Catch if exceptions are enabled as we don't want to exit if first platform has no devices of type
+    // We do error checking next anyway, and can throw there if needed
+#endif
+
+                // Only squash CL_SUCCESS and CL_DEVICE_NOT_FOUND
+                if (error != CL_SUCCESS && error != CL_DEVICE_NOT_FOUND) {
+                    detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+                    if (err != NULL) {
+                        *err = error;
+                    }
+                }
+
+                if (devices.size() > 0) {
+                    platform_id = (cl_context_properties)platforms[i]();
+                    break;
+                }
+            }
+
+            if (platform_id == 0) {
+                detail::errHandler(CL_DEVICE_NOT_FOUND, __CREATE_CONTEXT_FROM_TYPE_ERR);
+                if (err != NULL) {
+                    *err = CL_DEVICE_NOT_FOUND;
+                }
+                return;
+            }
+
+            prop[1] = platform_id;
+            properties = &prop[0];
+        }
+#endif
+        object_ = ::clCreateContextFromType(
+            properties, type, notifyFptr, data, &error);
+
+        detail::errHandler(error, __CREATE_CONTEXT_FROM_TYPE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    /*! \brief Returns a singleton context including all devices of CL_DEVICE_TYPE_DEFAULT.
+     *
+     *  \note All calls to this function return the same cl_context as the first.
+     */
+    static Context getDefault(cl_int * err = NULL) 
+    {
+        int state = detail::compare_exchange(
+            &default_initialized_, 
+            __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED);
+        
+        if (state & __DEFAULT_INITIALIZED) {
+            if (err != NULL) {
+                *err = default_error_;
+            }
+            return default_;
+        }
+
+        if (state & __DEFAULT_BEING_INITIALIZED) {
+              // Assume writes will propagate eventually...
+              while(default_initialized_ != __DEFAULT_INITIALIZED) {
+                  detail::fence();
+              }
+
+            if (err != NULL) {
+                *err = default_error_;
+            }
+            return default_;
+        }
+
+        cl_int error;
+        default_ = Context(
+            CL_DEVICE_TYPE_DEFAULT,
+            NULL,
+            NULL,
+            NULL,
+            &error);
+
+        detail::fence();
+
+        default_error_ = error;
+        // Assume writes will propagate eventually...
+        default_initialized_ = __DEFAULT_INITIALIZED;
+
+        detail::fence();
+
+        if (err != NULL) {
+            *err = default_error_;
+        }
+        return default_;
+
+    }
+
+    //! \brief Default constructor - initializes to NULL.
+    Context() : detail::Wrapper<cl_type>() { }
+
+    /*! \brief Copy constructor.
+     * 
+     *  This calls clRetainContext() on the parameter's cl_context.
+     */
+    Context(const Context& context) : detail::Wrapper<cl_type>(context) { }
+
+    /*! \brief Constructor from cl_context - takes ownership.
+     * 
+     *  This effectively transfers ownership of a refcount on the cl_context
+     *  into the new Context object.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Context(const cl_context& context) : detail::Wrapper<cl_type>(context) { }
+
+    /*! \brief Assignment operator from Context.
+     * 
+     *  This calls clRetainContext() on the parameter and clReleaseContext() on
+     *  the previous value held by this instance.
+     */
+    Context& operator = (const Context& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment operator from cl_context - takes ownership.
+     * 
+     *  This effectively transfers ownership of a refcount on the rhs and calls
+     *  clReleaseContext() on the value previously held by this instance.
+     */
+    Context& operator = (const cl_context& rhs)
+    {
+        detail::Wrapper<cl_type>::operator=(rhs);
+        return *this;
+    }
+
+    //! \brief Wrapper for clGetContextInfo().
+    template <typename T>
+    cl_int getInfo(cl_context_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetContextInfo, object_, name, param),
+            __GET_CONTEXT_INFO_ERR);
+    }
+
+    //! \brief Wrapper for clGetContextInfo() that returns by value.
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_context_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_context_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    /*! \brief Gets a list of supported image formats.
+     *  
+     *  Wraps clGetSupportedImageFormats().
+     */
+    cl_int getSupportedImageFormats(
+        cl_mem_flags flags,
+        cl_mem_object_type type,
+        VECTOR_CLASS<ImageFormat>* formats) const
+    {
+        cl_uint numEntries;
+        cl_int err = ::clGetSupportedImageFormats(
+           object_, 
+           flags,
+           type, 
+           0, 
+           NULL, 
+           &numEntries);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
+        }
+
+        ImageFormat* value = (ImageFormat*)
+            alloca(numEntries * sizeof(ImageFormat));
+        err = ::clGetSupportedImageFormats(
+            object_, 
+            flags, 
+            type, 
+            numEntries,
+            (cl_image_format*) value, 
+            NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __GET_SUPPORTED_IMAGE_FORMATS_ERR);
+        }
+
+        formats->assign(&value[0], &value[numEntries]);
+        return CL_SUCCESS;
+    }
+};
+
+inline Device Device::getDefault(cl_int * err)
+{
+    cl_int error;
+    Device device;
+
+    Context context = Context::getDefault(&error);
+    detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+    if (error != CL_SUCCESS) {
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+    else {
+        device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
+        if (err != NULL) {
+            *err = CL_SUCCESS;
+        }
+    }
+
+    return device;
+}
+
+
+#ifdef _WIN32
+__declspec(selectany) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
+__declspec(selectany) Context Context::default_;
+__declspec(selectany) volatile cl_int Context::default_error_ = CL_SUCCESS;
+#else
+__attribute__((weak)) volatile int Context::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
+__attribute__((weak)) Context Context::default_;
+__attribute__((weak)) volatile cl_int Context::default_error_ = CL_SUCCESS;
+#endif
+
+/*! \brief Class interface for cl_event.
+ *
+ *  \note Copies of these objects are shallow, meaning that the copy will refer
+ *        to the same underlying cl_event as the original.  For details, see
+ *        clRetainEvent() and clReleaseEvent().
+ *
+ *  \see cl_event
+ */
+class Event : public detail::Wrapper<cl_event>
+{
+public:
+    /*! \brief Destructor.
+     *
+     *  This calls clReleaseEvent() on the value held by this instance.
+     */
+    ~Event() { }
+ 
+    //! \brief Default constructor - initializes to NULL.
+    Event() : detail::Wrapper<cl_type>() { }
+
+    /*! \brief Copy constructor.
+     * 
+     *  This calls clRetainEvent() on the parameter's cl_event.
+     */
+    Event(const Event& event) : detail::Wrapper<cl_type>(event) { }
+
+    /*! \brief Constructor from cl_event - takes ownership.
+     * 
+     *  This effectively transfers ownership of a refcount on the cl_event
+     *  into the new Event object.
+     */
+    Event(const cl_event& event) : detail::Wrapper<cl_type>(event) { }
+
+    /*! \brief Assignment operator from cl_event - takes ownership.
+     *
+     *  This effectively transfers ownership of a refcount on the rhs and calls
+     *  clReleaseEvent() on the value previously held by this instance.
+     */
+    Event& operator = (const Event& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment operator from cl_event.
+     * 
+     *  This calls clRetainEvent() on the parameter and clReleaseEvent() on
+     *  the previous value held by this instance.
+     */
+    Event& operator = (const cl_event& rhs)
+    {
+        detail::Wrapper<cl_type>::operator=(rhs);
+        return *this;
+    }
+
+    //! \brief Wrapper for clGetEventInfo().
+    template <typename T>
+    cl_int getInfo(cl_event_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetEventInfo, object_, name, param),
+            __GET_EVENT_INFO_ERR);
+    }
+
+    //! \brief Wrapper for clGetEventInfo() that returns by value.
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_event_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_event_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    //! \brief Wrapper for clGetEventProfilingInfo().
+    template <typename T>
+    cl_int getProfilingInfo(cl_profiling_info name, T* param) const
+    {
+        return detail::errHandler(detail::getInfo(
+            &::clGetEventProfilingInfo, object_, name, param),
+            __GET_EVENT_PROFILE_INFO_ERR);
+    }
+
+    //! \brief Wrapper for clGetEventProfilingInfo() that returns by value.
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_profiling_info, name>::param_type
+    getProfilingInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_profiling_info, name>::param_type param;
+        cl_int result = getProfilingInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    /*! \brief Blocks the calling thread until this event completes.
+     * 
+     *  Wraps clWaitForEvents().
+     */
+    cl_int wait() const
+    {
+        return detail::errHandler(
+            ::clWaitForEvents(1, &object_),
+            __WAIT_FOR_EVENTS_ERR);
+    }
+
+#if defined(CL_VERSION_1_1)
+    /*! \brief Registers a user callback function for a specific command execution status.
+     *
+     *  Wraps clSetEventCallback().
+     */
+    cl_int setCallback(
+        cl_int type,
+        void (CL_CALLBACK * pfn_notify)(cl_event, cl_int, void *),		
+        void * user_data = NULL)
+    {
+        return detail::errHandler(
+            ::clSetEventCallback(
+                object_,
+                type,
+                pfn_notify,
+                user_data), 
+            __SET_EVENT_CALLBACK_ERR);
+    }
+#endif
+
+    /*! \brief Blocks the calling thread until every event specified is complete.
+     * 
+     *  Wraps clWaitForEvents().
+     */
+    static cl_int
+    waitForEvents(const VECTOR_CLASS<Event>& events)
+    {
+        return detail::errHandler(
+            ::clWaitForEvents(
+                (cl_uint) events.size(), (cl_event*)&events.front()),
+            __WAIT_FOR_EVENTS_ERR);
+    }
+};
+
+#if defined(CL_VERSION_1_1)
+/*! \brief Class interface for user events (a subset of cl_event's).
+ * 
+ *  See Event for details about copy semantics, etc.
+ */
+class UserEvent : public Event
+{
+public:
+    /*! \brief Constructs a user event on a given context.
+     *
+     *  Wraps clCreateUserEvent().
+     */
+    UserEvent(
+        const Context& context,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateUserEvent(
+            context(),
+            &error);
+
+        detail::errHandler(error, __CREATE_USER_EVENT_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    //! \brief Default constructor - initializes to NULL.
+    UserEvent() : Event() { }
+
+    //! \brief Copy constructor - performs shallow copy.
+    UserEvent(const UserEvent& event) : Event(event) { }
+
+    //! \brief Assignment Operator - performs shallow copy.
+    UserEvent& operator = (const UserEvent& rhs)
+    {
+        if (this != &rhs) {
+            Event::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Sets the execution status of a user event object.
+     *
+     *  Wraps clSetUserEventStatus().
+     */
+    cl_int setStatus(cl_int status)
+    {
+        return detail::errHandler(
+            ::clSetUserEventStatus(object_,status), 
+            __SET_USER_EVENT_STATUS_ERR);
+    }
+};
+#endif
+
+/*! \brief Blocks the calling thread until every event specified is complete.
+ * 
+ *  Wraps clWaitForEvents().
+ */
+inline static cl_int
+WaitForEvents(const VECTOR_CLASS<Event>& events)
+{
+    return detail::errHandler(
+        ::clWaitForEvents(
+            (cl_uint) events.size(), (cl_event*)&events.front()),
+        __WAIT_FOR_EVENTS_ERR);
+}
+
+/*! \brief Class interface for cl_mem.
+ *
+ *  \note Copies of these objects are shallow, meaning that the copy will refer
+ *        to the same underlying cl_mem as the original.  For details, see
+ *        clRetainMemObject() and clReleaseMemObject().
+ *
+ *  \see cl_mem
+ */
+class Memory : public detail::Wrapper<cl_mem>
+{
+public:
+ 
+    /*! \brief Destructor.
+     *
+     *  This calls clReleaseMemObject() on the value held by this instance.
+     */
+    ~Memory() {}
+
+    //! \brief Default constructor - initializes to NULL.
+    Memory() : detail::Wrapper<cl_type>() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     * 
+     *  This calls clRetainMemObject() on the parameter's cl_mem.
+     */
+    Memory(const Memory& memory) : detail::Wrapper<cl_type>(memory) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     * 
+     *  This effectively transfers ownership of a refcount on the cl_mem
+     *  into the new Memory object.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Memory(const cl_mem& memory) : detail::Wrapper<cl_type>(memory) { }
+
+    /*! \brief Assignment operator from Memory.
+     * 
+     *  This calls clRetainMemObject() on the parameter and clReleaseMemObject()
+     *  on the previous value held by this instance.
+     */
+    Memory& operator = (const Memory& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment operator from cl_mem - takes ownership.
+     *
+     *  This effectively transfers ownership of a refcount on the rhs and calls
+     *  clReleaseMemObject() on the value previously held by this instance.
+     */
+    Memory& operator = (const cl_mem& rhs)
+    {
+        detail::Wrapper<cl_type>::operator=(rhs);
+        return *this;
+    }
+
+    //! \brief Wrapper for clGetMemObjectInfo().
+    template <typename T>
+    cl_int getInfo(cl_mem_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetMemObjectInfo, object_, name, param),
+            __GET_MEM_OBJECT_INFO_ERR);
+    }
+
+    //! \brief Wrapper for clGetMemObjectInfo() that returns by value.
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_mem_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_mem_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+#if defined(CL_VERSION_1_1)
+    /*! \brief Registers a callback function to be called when the memory object
+     *         is no longer needed.
+     *
+     *  Wraps clSetMemObjectDestructorCallback().
+     *
+     *  Repeated calls to this function, for a given cl_mem value, will append
+     *  to the list of functions called (in reverse order) when memory object's
+     *  resources are freed and the memory object is deleted.
+     *
+     *  \note
+     *  The registered callbacks are associated with the underlying cl_mem
+     *  value - not the Memory class instance.
+     */
+    cl_int setDestructorCallback(
+        void (CL_CALLBACK * pfn_notify)(cl_mem, void *),		
+        void * user_data = NULL)
+    {
+        return detail::errHandler(
+            ::clSetMemObjectDestructorCallback(
+                object_,
+                pfn_notify,
+                user_data), 
+            __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR);
+    }
+#endif
+
+};
+
+// Pre-declare copy functions
+class Buffer;
+template< typename IteratorType >
+cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer );
+template< typename IteratorType >
+cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator );
+template< typename IteratorType >
+cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer );
+template< typename IteratorType >
+cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator );
+
+
+/*! \brief Class interface for Buffer Memory Objects.
+ * 
+ *  See Memory for details about copy semantics, etc.
+ *
+ *  \see Memory
+ */
+class Buffer : public Memory
+{
+public:
+
+    /*! \brief Constructs a Buffer in a specified context.
+     *
+     *  Wraps clCreateBuffer().
+     *
+     *  \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was
+     *                  specified.  Note alignment & exclusivity requirements.
+     */
+    Buffer(
+        const Context& context,
+        cl_mem_flags flags,
+        ::size_t size,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error);
+
+        detail::errHandler(error, __CREATE_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    /*! \brief Constructs a Buffer in the default context.
+     *
+     *  Wraps clCreateBuffer().
+     *
+     *  \param host_ptr Storage to be used if the CL_MEM_USE_HOST_PTR flag was
+     *                  specified.  Note alignment & exclusivity requirements.
+     *
+     *  \see Context::getDefault()
+     */
+    Buffer(
+         cl_mem_flags flags,
+        ::size_t size,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+        Context context = Context::getDefault(err);
+
+        object_ = ::clCreateBuffer(context(), flags, size, host_ptr, &error);
+
+        detail::errHandler(error, __CREATE_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    /*!
+     * \brief Construct a Buffer from a host container via iterators.
+     * IteratorType must be random access.
+     * If useHostPtr is specified iterators must represent contiguous data.
+     */
+    template< typename IteratorType >
+    Buffer(
+        IteratorType startIterator,
+        IteratorType endIterator,
+        bool readOnly,
+        bool useHostPtr = false,
+        cl_int* err = NULL)
+    {
+        typedef typename std::iterator_traits<IteratorType>::value_type DataType;
+        cl_int error;
+
+        cl_mem_flags flags = 0;
+        if( readOnly ) {
+            flags |= CL_MEM_READ_ONLY;
+        }
+        else {
+            flags |= CL_MEM_READ_WRITE;
+        }
+        if( useHostPtr ) {
+            flags |= CL_MEM_USE_HOST_PTR;
+        }
+        
+        ::size_t size = sizeof(DataType)*(endIterator - startIterator);
+
+        Context context = Context::getDefault(err);
+
+        if( useHostPtr ) {
+            object_ = ::clCreateBuffer(context(), flags, size, static_cast<DataType*>(&*startIterator), &error);
+        } else {
+            object_ = ::clCreateBuffer(context(), flags, size, 0, &error);
+        }
+
+        detail::errHandler(error, __CREATE_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+
+        if( !useHostPtr ) {
+            error = cl::copy(startIterator, endIterator, *this);
+            detail::errHandler(error, __CREATE_BUFFER_ERR);
+            if (err != NULL) {
+                *err = error;
+            }
+        }
+    }
+
+    /*!
+     * \brief Construct a Buffer from a host container via iterators using a specified context.
+     * IteratorType must be random access.
+     * If useHostPtr is specified iterators must represent contiguous data.
+     */
+    template< typename IteratorType >
+    Buffer(const Context &context, IteratorType startIterator, IteratorType endIterator,
+        bool readOnly, bool useHostPtr = false, cl_int* err = NULL);
+
+    //! \brief Default constructor - initializes to NULL.
+    Buffer() : Memory() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Buffer(const Buffer& buffer) : Memory(buffer) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Buffer(const cl_mem& buffer) : Memory(buffer) { }
+
+    /*! \brief Assignment from Buffer - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Buffer& operator = (const Buffer& rhs)
+    {
+        if (this != &rhs) {
+            Memory::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Buffer& operator = (const cl_mem& rhs)
+    {
+        Memory::operator=(rhs);
+        return *this;
+    }
+
+#if defined(CL_VERSION_1_1)
+    /*! \brief Creates a new buffer object from this.
+     *
+     *  Wraps clCreateSubBuffer().
+     */
+    Buffer createSubBuffer(
+        cl_mem_flags flags,
+        cl_buffer_create_type buffer_create_type,
+        const void * buffer_create_info,
+        cl_int * err = NULL)
+    {
+        Buffer result;
+        cl_int error;
+        result.object_ = ::clCreateSubBuffer(
+            object_, 
+            flags, 
+            buffer_create_type, 
+            buffer_create_info, 
+            &error);
+
+        detail::errHandler(error, __CREATE_SUBBUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+
+        return result;
+    }		
+#endif
+};
+
+#if defined (USE_DX_INTEROP)
+/*! \brief Class interface for creating OpenCL buffers from ID3D10Buffer's.
+ *
+ *  This is provided to facilitate interoperability with Direct3D.
+ * 
+ *  See Memory for details about copy semantics, etc.
+ *
+ *  \see Memory
+ */
+class BufferD3D10 : public Buffer
+{
+public:
+    typedef CL_API_ENTRY cl_mem (CL_API_CALL *PFN_clCreateFromD3D10BufferKHR)(
+    cl_context context, cl_mem_flags flags, ID3D10Buffer*  buffer,
+    cl_int* errcode_ret);
+
+    /*! \brief Constructs a BufferD3D10, in a specified context, from a
+     *         given ID3D10Buffer.
+     *
+     *  Wraps clCreateFromD3D10BufferKHR().
+     */
+    BufferD3D10(
+        const Context& context,
+        cl_mem_flags flags,
+        ID3D10Buffer* bufobj,
+        cl_int * err = NULL)
+    {
+        static PFN_clCreateFromD3D10BufferKHR pfn_clCreateFromD3D10BufferKHR = NULL;
+
+#if defined(CL_VERSION_1_2)
+        vector<cl_context_properties> props = context.getInfo<CL_CONTEXT_PROPERTIES>();
+        cl_platform platform = -1;
+        for( int i = 0; i < props.size(); ++i ) {
+            if( props[i] == CL_CONTEXT_PLATFORM ) {
+                platform = props[i+1];
+            }
+        }
+        __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clCreateFromD3D10BufferKHR);
+#endif
+#if defined(CL_VERSION_1_1)
+        __INIT_CL_EXT_FCN_PTR(clCreateFromD3D10BufferKHR);
+#endif
+
+        cl_int error;
+        object_ = pfn_clCreateFromD3D10BufferKHR(
+            context(),
+            flags,
+            bufobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    //! \brief Default constructor - initializes to NULL.
+    BufferD3D10() : Buffer() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    BufferD3D10(const BufferD3D10& buffer) : Buffer(buffer) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS BufferD3D10(const cl_mem& buffer) : Buffer(buffer) { }
+
+    /*! \brief Assignment from BufferD3D10 - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    BufferD3D10& operator = (const BufferD3D10& rhs)
+    {
+        if (this != &rhs) {
+            Buffer::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    BufferD3D10& operator = (const cl_mem& rhs)
+    {
+        Buffer::operator=(rhs);
+        return *this;
+    }
+};
+#endif
+
+/*! \brief Class interface for GL Buffer Memory Objects.
+ *
+ *  This is provided to facilitate interoperability with OpenGL.
+ * 
+ *  See Memory for details about copy semantics, etc.
+ * 
+ *  \see Memory
+ */
+class BufferGL : public Buffer
+{
+public:
+    /*! \brief Constructs a BufferGL in a specified context, from a given
+     *         GL buffer.
+     *
+     *  Wraps clCreateFromGLBuffer().
+     */
+    BufferGL(
+        const Context& context,
+        cl_mem_flags flags,
+        GLuint bufobj,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateFromGLBuffer(
+            context(),
+            flags,
+            bufobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    //! \brief Default constructor - initializes to NULL.
+    BufferGL() : Buffer() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    BufferGL(const BufferGL& buffer) : Buffer(buffer) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS BufferGL(const cl_mem& buffer) : Buffer(buffer) { }
+
+    /*! \brief Assignment from BufferGL - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    BufferGL& operator = (const BufferGL& rhs)
+    {
+        if (this != &rhs) {
+            Buffer::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    BufferGL& operator = (const cl_mem& rhs)
+    {
+        Buffer::operator=(rhs);
+        return *this;
+    }
+
+    //! \brief Wrapper for clGetGLObjectInfo().
+    cl_int getObjectInfo(
+        cl_gl_object_type *type,
+        GLuint * gl_object_name)
+    {
+        return detail::errHandler(
+            ::clGetGLObjectInfo(object_,type,gl_object_name),
+            __GET_GL_OBJECT_INFO_ERR);
+    }
+};
+
+/*! \brief Class interface for GL Render Buffer Memory Objects.
+ *
+ *  This is provided to facilitate interoperability with OpenGL.
+ * 
+ *  See Memory for details about copy semantics, etc.
+ * 
+ *  \see Memory
+ */
+class BufferRenderGL : public Buffer
+{
+public:
+    /*! \brief Constructs a BufferRenderGL in a specified context, from a given
+     *         GL Renderbuffer.
+     *
+     *  Wraps clCreateFromGLRenderbuffer().
+     */
+    BufferRenderGL(
+        const Context& context,
+        cl_mem_flags flags,
+        GLuint bufobj,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateFromGLRenderbuffer(
+            context(),
+            flags,
+            bufobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_RENDER_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    //! \brief Default constructor - initializes to NULL.
+    BufferRenderGL() : Buffer() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    BufferRenderGL(const BufferGL& buffer) : Buffer(buffer) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS BufferRenderGL(const cl_mem& buffer) : Buffer(buffer) { }
+
+    /*! \brief Assignment from BufferGL - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    BufferRenderGL& operator = (const BufferRenderGL& rhs)
+    {
+        if (this != &rhs) {
+            Buffer::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    BufferRenderGL& operator = (const cl_mem& rhs)
+    {
+        Buffer::operator=(rhs);
+        return *this;
+    }
+
+    //! \brief Wrapper for clGetGLObjectInfo().
+    cl_int getObjectInfo(
+        cl_gl_object_type *type,
+        GLuint * gl_object_name)
+    {
+        return detail::errHandler(
+            ::clGetGLObjectInfo(object_,type,gl_object_name),
+            __GET_GL_OBJECT_INFO_ERR);
+    }
+};
+
+/*! \brief C++ base class for Image Memory objects.
+ *
+ *  See Memory for details about copy semantics, etc.
+ * 
+ *  \see Memory
+ */
+class Image : public Memory
+{
+protected:
+    //! \brief Default constructor - initializes to NULL.
+    Image() : Memory() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image(const Image& image) : Memory(image) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Image(const cl_mem& image) : Memory(image) { }
+
+    /*! \brief Assignment from Image - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image& operator = (const Image& rhs)
+    {
+        if (this != &rhs) {
+            Memory::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image& operator = (const cl_mem& rhs)
+    {
+        Memory::operator=(rhs);
+        return *this;
+    }
+
+public:
+    //! \brief Wrapper for clGetImageInfo().
+    template <typename T>
+    cl_int getImageInfo(cl_image_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetImageInfo, object_, name, param),
+            __GET_IMAGE_INFO_ERR);
+    }
+    
+    //! \brief Wrapper for clGetImageInfo() that returns by value.
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_image_info, name>::param_type
+    getImageInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_image_info, name>::param_type param;
+        cl_int result = getImageInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+};
+
+#if defined(CL_VERSION_1_2)
+/*! \brief Class interface for 1D Image Memory objects.
+ *
+ *  See Memory for details about copy semantics, etc.
+ * 
+ *  \see Memory
+ */
+class Image1D : public Image
+{
+public:
+    /*! \brief Constructs a 1D Image in a specified context.
+     *
+     *  Wraps clCreateImage().
+     */
+    Image1D(
+        const Context& context,
+        cl_mem_flags flags,
+        ImageFormat format,
+        ::size_t width,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        cl_image_desc desc =
+        {
+            CL_MEM_OBJECT_IMAGE1D,
+            width,
+            0, 0, 0, 0, 0, 0, 0, 0
+        };
+        object_ = ::clCreateImage(
+            context(), 
+            flags, 
+            &format, 
+            &desc, 
+            host_ptr, 
+            &error);
+
+        detail::errHandler(error, __CREATE_IMAGE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    //! \brief Default constructor - initializes to NULL.
+    Image1D() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image1D(const Image1D& image1D) : Image(image1D) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Image1D(const cl_mem& image1D) : Image(image1D) { }
+
+    /*! \brief Assignment from Image1D - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image1D& operator = (const Image1D& rhs)
+    {
+        if (this != &rhs) {
+            Image::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image1D& operator = (const cl_mem& rhs)
+    {
+        Image::operator=(rhs);
+        return *this;
+    }
+};
+
+/*! \class Image1DBuffer
+ * \brief Image interface for 1D buffer images.
+ */
+class Image1DBuffer : public Image
+{
+public:
+    Image1DBuffer(
+        const Context& context,
+        cl_mem_flags flags,
+        ImageFormat format,
+        ::size_t width,
+        const Buffer &buffer,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        cl_image_desc desc =
+        {
+            CL_MEM_OBJECT_IMAGE1D_BUFFER,
+            width,
+            0, 0, 0, 0, 0, 0, 0,
+            buffer()
+        };
+        object_ = ::clCreateImage(
+            context(), 
+            flags, 
+            &format, 
+            &desc, 
+            NULL, 
+            &error);
+
+        detail::errHandler(error, __CREATE_IMAGE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Image1DBuffer() { }
+
+    Image1DBuffer(const Image1DBuffer& image1D) : Image(image1D) { }
+
+    __CL_EXPLICIT_CONSTRUCTORS Image1DBuffer(const cl_mem& image1D) : Image(image1D) { }
+
+    Image1DBuffer& operator = (const Image1DBuffer& rhs)
+    {
+        if (this != &rhs) {
+            Image::operator=(rhs);
+        }
+        return *this;
+    }
+
+    Image1DBuffer& operator = (const cl_mem& rhs)
+    {
+        Image::operator=(rhs);
+        return *this;
+    }
+};
+
+/*! \class Image1DArray
+ * \brief Image interface for arrays of 1D images.
+ */
+class Image1DArray : public Image
+{
+public:
+    Image1DArray(
+        const Context& context,
+        cl_mem_flags flags,
+        ImageFormat format,
+        ::size_t arraySize,
+        ::size_t width,
+        ::size_t rowPitch,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        cl_image_desc desc =
+        {
+            CL_MEM_OBJECT_IMAGE1D_ARRAY,
+            width,
+            0, 0,  // height, depth (unused)
+            arraySize,
+            rowPitch,
+            0, 0, 0, 0
+        };
+        object_ = ::clCreateImage(
+            context(), 
+            flags, 
+            &format, 
+            &desc, 
+            host_ptr, 
+            &error);
+
+        detail::errHandler(error, __CREATE_IMAGE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Image1DArray() { }
+
+    Image1DArray(const Image1DArray& imageArray) : Image(imageArray) { }
+
+    __CL_EXPLICIT_CONSTRUCTORS Image1DArray(const cl_mem& imageArray) : Image(imageArray) { }
+
+    Image1DArray& operator = (const Image1DArray& rhs)
+    {
+        if (this != &rhs) {
+            Image::operator=(rhs);
+        }
+        return *this;
+    }
+
+    Image1DArray& operator = (const cl_mem& rhs)
+    {
+        Image::operator=(rhs);
+        return *this;
+    }
+};
+#endif // #if defined(CL_VERSION_1_2)
+
+
+/*! \brief Class interface for 2D Image Memory objects.
+ *
+ *  See Memory for details about copy semantics, etc.
+ * 
+ *  \see Memory
+ */
+class Image2D : public Image
+{
+public:
+    /*! \brief Constructs a 1D Image in a specified context.
+     *
+     *  Wraps clCreateImage().
+     */
+    Image2D(
+        const Context& context,
+        cl_mem_flags flags,
+        ImageFormat format,
+        ::size_t width,
+        ::size_t height,
+        ::size_t row_pitch = 0,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        bool useCreateImage;
+
+#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+        // Run-time decision based on the actual platform
+        {
+            cl_uint version = detail::getContextPlatformVersion(context());
+            useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above
+        }
+#elif defined(CL_VERSION_1_2)
+        useCreateImage = true;
+#else
+        useCreateImage = false;
+#endif
+
+#if defined(CL_VERSION_1_2)
+        if (useCreateImage)
+        {
+            cl_image_desc desc =
+            {
+                CL_MEM_OBJECT_IMAGE2D,
+                width,
+                height,
+                0, 0, // depth, array size (unused)
+                row_pitch,
+                0, 0, 0, 0
+            };
+            object_ = ::clCreateImage(
+                context(),
+                flags,
+                &format,
+                &desc,
+                host_ptr,
+                &error);
+
+            detail::errHandler(error, __CREATE_IMAGE_ERR);
+            if (err != NULL) {
+                *err = error;
+            }
+        }
+#endif // #if defined(CL_VERSION_1_2)
+#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+        if (!useCreateImage)
+        {
+            object_ = ::clCreateImage2D(
+                context(), flags,&format, width, height, row_pitch, host_ptr, &error);
+
+            detail::errHandler(error, __CREATE_IMAGE2D_ERR);
+            if (err != NULL) {
+                *err = error;
+            }
+        }
+#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+    }
+
+    //! \brief Default constructor - initializes to NULL.
+    Image2D() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image2D(const Image2D& image2D) : Image(image2D) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Image2D(const cl_mem& image2D) : Image(image2D) { }
+
+    /*! \brief Assignment from Image2D - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image2D& operator = (const Image2D& rhs)
+    {
+        if (this != &rhs) {
+            Image::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image2D& operator = (const cl_mem& rhs)
+    {
+        Image::operator=(rhs);
+        return *this;
+    }
+};
+
+
+#if !defined(CL_VERSION_1_2)
+/*! \brief Class interface for GL 2D Image Memory objects.
+ *
+ *  This is provided to facilitate interoperability with OpenGL.
+ * 
+ *  See Memory for details about copy semantics, etc.
+ * 
+ *  \see Memory
+ *  \note Deprecated for OpenCL 1.2. Please use ImageGL instead.
+ */
+class CL_EXT_PREFIX__VERSION_1_1_DEPRECATED Image2DGL CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED : public Image2D
+{
+public:
+    /*! \brief Constructs an Image2DGL in a specified context, from a given
+     *         GL Texture.
+     *
+     *  Wraps clCreateFromGLTexture2D().
+     */
+    Image2DGL(
+        const Context& context,
+        cl_mem_flags flags,
+        GLenum target,
+        GLint  miplevel,
+        GLuint texobj,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateFromGLTexture2D(
+            context(),
+            flags,
+            target,
+            miplevel,
+            texobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_TEXTURE_2D_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+
+    }
+    
+    //! \brief Default constructor - initializes to NULL.
+    Image2DGL() : Image2D() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image2DGL(const Image2DGL& image) : Image2D(image) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Image2DGL(const cl_mem& image) : Image2D(image) { }
+
+    /*! \brief Assignment from Image2DGL - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image2DGL& operator = (const Image2DGL& rhs)
+    {
+        if (this != &rhs) {
+            Image2D::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image2DGL& operator = (const cl_mem& rhs)
+    {
+        Image2D::operator=(rhs);
+        return *this;
+    }
+};
+#endif // #if !defined(CL_VERSION_1_2)
+
+#if defined(CL_VERSION_1_2)
+/*! \class Image2DArray
+ * \brief Image interface for arrays of 2D images.
+ */
+class Image2DArray : public Image
+{
+public:
+    Image2DArray(
+        const Context& context,
+        cl_mem_flags flags,
+        ImageFormat format,
+        ::size_t arraySize,
+        ::size_t width,
+        ::size_t height,
+        ::size_t rowPitch,
+        ::size_t slicePitch,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        cl_image_desc desc =
+        {
+            CL_MEM_OBJECT_IMAGE2D_ARRAY,
+            width,
+            height,
+            0,       // depth (unused)
+            arraySize,
+            rowPitch,
+            slicePitch,
+            0, 0, 0
+        };
+        object_ = ::clCreateImage(
+            context(), 
+            flags, 
+            &format, 
+            &desc, 
+            host_ptr, 
+            &error);
+
+        detail::errHandler(error, __CREATE_IMAGE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Image2DArray() { }
+
+    Image2DArray(const Image2DArray& imageArray) : Image(imageArray) { }
+
+    __CL_EXPLICIT_CONSTRUCTORS Image2DArray(const cl_mem& imageArray) : Image(imageArray) { }
+
+    Image2DArray& operator = (const Image2DArray& rhs)
+    {
+        if (this != &rhs) {
+            Image::operator=(rhs);
+        }
+        return *this;
+    }
+
+    Image2DArray& operator = (const cl_mem& rhs)
+    {
+        Image::operator=(rhs);
+        return *this;
+    }
+};
+#endif // #if defined(CL_VERSION_1_2)
+
+/*! \brief Class interface for 3D Image Memory objects.
+ *
+ *  See Memory for details about copy semantics, etc.
+ * 
+ *  \see Memory
+ */
+class Image3D : public Image
+{
+public:
+    /*! \brief Constructs a 3D Image in a specified context.
+     *
+     *  Wraps clCreateImage().
+     */
+    Image3D(
+        const Context& context,
+        cl_mem_flags flags,
+        ImageFormat format,
+        ::size_t width,
+        ::size_t height,
+        ::size_t depth,
+        ::size_t row_pitch = 0,
+        ::size_t slice_pitch = 0,
+        void* host_ptr = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        bool useCreateImage;
+
+#if defined(CL_VERSION_1_2) && defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+        // Run-time decision based on the actual platform
+        {
+            cl_uint version = detail::getContextPlatformVersion(context());
+            useCreateImage = (version >= 0x10002); // OpenCL 1.2 or above
+        }
+#elif defined(CL_VERSION_1_2)
+        useCreateImage = true;
+#else
+        useCreateImage = false;
+#endif
+
+#if defined(CL_VERSION_1_2)
+        if (useCreateImage)
+        {
+            cl_image_desc desc =
+            {
+                CL_MEM_OBJECT_IMAGE3D,
+                width,
+                height,
+                depth,
+                0,      // array size (unused)
+                row_pitch,
+                slice_pitch,
+                0, 0, 0
+            };
+            object_ = ::clCreateImage(
+                context(), 
+                flags, 
+                &format, 
+                &desc, 
+                host_ptr, 
+                &error);
+
+            detail::errHandler(error, __CREATE_IMAGE_ERR);
+            if (err != NULL) {
+                *err = error;
+            }
+        }
+#endif  // #if defined(CL_VERSION_1_2)
+#if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+        if (!useCreateImage)
+        {
+            object_ = ::clCreateImage3D(
+                context(), flags, &format, width, height, depth, row_pitch,
+                slice_pitch, host_ptr, &error);
+
+            detail::errHandler(error, __CREATE_IMAGE3D_ERR);
+            if (err != NULL) {
+                *err = error;
+            }
+        }
+#endif // #if !defined(CL_VERSION_1_2) || defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS)
+    }
+
+    //! \brief Default constructor - initializes to NULL.
+    Image3D() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image3D(const Image3D& image3D) : Image(image3D) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Image3D(const cl_mem& image3D) : Image(image3D) { }
+
+    /*! \brief Assignment from Image3D - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image3D& operator = (const Image3D& rhs)
+    {
+        if (this != &rhs) {
+            Image::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image3D& operator = (const cl_mem& rhs)
+    {
+        Image::operator=(rhs);
+        return *this;
+    }
+};
+
+#if !defined(CL_VERSION_1_2)
+/*! \brief Class interface for GL 3D Image Memory objects.
+ *
+ *  This is provided to facilitate interoperability with OpenGL.
+ * 
+ *  See Memory for details about copy semantics, etc.
+ * 
+ *  \see Memory
+ */
+class Image3DGL : public Image3D
+{
+public:
+    /*! \brief Constructs an Image3DGL in a specified context, from a given
+     *         GL Texture.
+     *
+     *  Wraps clCreateFromGLTexture3D().
+     */
+    Image3DGL(
+        const Context& context,
+        cl_mem_flags flags,
+        GLenum target,
+        GLint  miplevel,
+        GLuint texobj,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateFromGLTexture3D(
+            context(),
+            flags,
+            target,
+            miplevel,
+            texobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_TEXTURE_3D_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    //! \brief Default constructor - initializes to NULL.
+    Image3DGL() : Image3D() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image3DGL(const Image3DGL& image) : Image3D(image) { }
+
+    /*! \brief Constructor from cl_mem - takes ownership.
+     *
+     *  See Memory for further details.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Image3DGL(const cl_mem& image) : Image3D(image) { }
+
+    /*! \brief Assignment from Image3DGL - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image3DGL& operator = (const Image3DGL& rhs)
+    {
+        if (this != &rhs) {
+            Image3D::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment from cl_mem - performs shallow copy.
+     *
+     *  See Memory for further details.
+     */
+    Image3DGL& operator = (const cl_mem& rhs)
+    {
+        Image3D::operator=(rhs);
+        return *this;
+    }
+};
+#endif // #if !defined(CL_VERSION_1_2)
+
+#if defined(CL_VERSION_1_2)
+/*! \class ImageGL
+ * \brief general image interface for GL interop.
+ * We abstract the 2D and 3D GL images into a single instance here
+ * that wraps all GL sourced images on the grounds that setup information
+ * was performed by OpenCL anyway.
+ */
+class ImageGL : public Image
+{
+public:
+    ImageGL(
+        const Context& context,
+        cl_mem_flags flags,
+        GLenum target,
+        GLint  miplevel,
+        GLuint texobj,
+        cl_int * err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateFromGLTexture(
+            context(), 
+            flags, 
+            target,
+            miplevel,
+            texobj,
+            &error);
+
+        detail::errHandler(error, __CREATE_GL_TEXTURE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    ImageGL() : Image() { }
+
+    ImageGL(const ImageGL& image) : Image(image) { }
+
+    __CL_EXPLICIT_CONSTRUCTORS ImageGL(const cl_mem& image) : Image(image) { }
+
+    ImageGL& operator = (const ImageGL& rhs)
+    {
+        if (this != &rhs) {
+            Image::operator=(rhs);
+        }
+        return *this;
+    }
+
+    ImageGL& operator = (const cl_mem& rhs)
+    {
+        Image::operator=(rhs);
+        return *this;
+    }
+};
+#endif // #if defined(CL_VERSION_1_2)
+
+/*! \brief Class interface for cl_sampler.
+ *
+ *  \note Copies of these objects are shallow, meaning that the copy will refer
+ *        to the same underlying cl_sampler as the original.  For details, see
+ *        clRetainSampler() and clReleaseSampler().
+ *
+ *  \see cl_sampler 
+ */
+class Sampler : public detail::Wrapper<cl_sampler>
+{
+public:
+    /*! \brief Destructor.
+     *
+     *  This calls clReleaseSampler() on the value held by this instance.
+     */
+    ~Sampler() { }
+
+    //! \brief Default constructor - initializes to NULL.
+    Sampler() { }
+
+    /*! \brief Constructs a Sampler in a specified context.
+     *
+     *  Wraps clCreateSampler().
+     */
+    Sampler(
+        const Context& context,
+        cl_bool normalized_coords,
+        cl_addressing_mode addressing_mode,
+        cl_filter_mode filter_mode,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateSampler(
+            context(), 
+            normalized_coords,
+            addressing_mode,
+            filter_mode,
+            &error);
+
+        detail::errHandler(error, __CREATE_SAMPLER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     * 
+     *  This calls clRetainSampler() on the parameter's cl_sampler.
+     */
+    Sampler(const Sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
+
+    /*! \brief Constructor from cl_sampler - takes ownership.
+     * 
+     *  This effectively transfers ownership of a refcount on the cl_sampler
+     *  into the new Sampler object.
+     */
+    Sampler(const cl_sampler& sampler) : detail::Wrapper<cl_type>(sampler) { }
+
+    /*! \brief Assignment operator from Sampler.
+     * 
+     *  This calls clRetainSampler() on the parameter and clReleaseSampler()
+     *  on the previous value held by this instance.
+     */
+    Sampler& operator = (const Sampler& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment operator from cl_sampler - takes ownership.
+     *
+     *  This effectively transfers ownership of a refcount on the rhs and calls
+     *  clReleaseSampler() on the value previously held by this instance.
+     */
+    Sampler& operator = (const cl_sampler& rhs)
+    {
+        detail::Wrapper<cl_type>::operator=(rhs);
+        return *this;
+    }
+
+    //! \brief Wrapper for clGetSamplerInfo().
+    template <typename T>
+    cl_int getInfo(cl_sampler_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetSamplerInfo, object_, name, param),
+            __GET_SAMPLER_INFO_ERR);
+    }
+
+    //! \brief Wrapper for clGetSamplerInfo() that returns by value.
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_sampler_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_sampler_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+};
+
+class Program;
+class CommandQueue;
+class Kernel;
+
+//! \brief Class interface for specifying NDRange values.
+class NDRange
+{
+private:
+    size_t<3> sizes_;
+    cl_uint dimensions_;
+
+public:
+    //! \brief Default constructor - resulting range has zero dimensions.
+    NDRange()
+        : dimensions_(0)
+    { }
+
+    //! \brief Constructs one-dimensional range.
+    NDRange(::size_t size0)
+        : dimensions_(1)
+    {
+        sizes_[0] = size0;
+    }
+
+    //! \brief Constructs two-dimensional range.
+    NDRange(::size_t size0, ::size_t size1)
+        : dimensions_(2)
+    {
+        sizes_[0] = size0;
+        sizes_[1] = size1;
+    }
+
+    //! \brief Constructs three-dimensional range.
+    NDRange(::size_t size0, ::size_t size1, ::size_t size2)
+        : dimensions_(3)
+    {
+        sizes_[0] = size0;
+        sizes_[1] = size1;
+        sizes_[2] = size2;
+    }
+
+    /*! \brief Conversion operator to const ::size_t *.
+     *  
+     *  \returns a pointer to the size of the first dimension.
+     */
+    operator const ::size_t*() const { 
+        return (const ::size_t*) sizes_; 
+    }
+
+    //! \brief Queries the number of dimensions in the range.
+    ::size_t dimensions() const { return dimensions_; }
+};
+
+//! \brief A zero-dimensional range.
+static const NDRange NullRange;
+
+//! \brief Local address wrapper for use with Kernel::setArg
+struct LocalSpaceArg
+{
+    ::size_t size_;
+};
+
+namespace detail {
+
+template <typename T>
+struct KernelArgumentHandler
+{
+    static ::size_t size(const T&) { return sizeof(T); }
+    static T* ptr(T& value) { return &value; }
+};
+
+template <>
+struct KernelArgumentHandler<LocalSpaceArg>
+{
+    static ::size_t size(const LocalSpaceArg& value) { return value.size_; }
+    static void* ptr(LocalSpaceArg&) { return NULL; }
+};
+
+} 
+//! \endcond
+
+/*! __local
+ * \brief Helper function for generating LocalSpaceArg objects.
+ * Deprecated. Replaced with Local.
+ */
+inline CL_EXT_PREFIX__VERSION_1_1_DEPRECATED LocalSpaceArg
+__local(::size_t size) CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED;
+inline LocalSpaceArg
+__local(::size_t size)
+{
+    LocalSpaceArg ret = { size };
+    return ret;
+}
+
+/*! Local
+ * \brief Helper function for generating LocalSpaceArg objects.
+ */
+inline LocalSpaceArg
+Local(::size_t size)
+{
+    LocalSpaceArg ret = { size };
+    return ret;
+}
+
+//class KernelFunctor;
+
+/*! \brief Class interface for cl_kernel.
+ *
+ *  \note Copies of these objects are shallow, meaning that the copy will refer
+ *        to the same underlying cl_kernel as the original.  For details, see
+ *        clRetainKernel() and clReleaseKernel().
+ *
+ *  \see cl_kernel
+ */
+class Kernel : public detail::Wrapper<cl_kernel>
+{
+public:
+    inline Kernel(const Program& program, const char* name, cl_int* err = NULL);
+
+    /*! \brief Destructor.
+     *
+     *  This calls clReleaseKernel() on the value held by this instance.
+     */
+    ~Kernel() { }
+
+    //! \brief Default constructor - initializes to NULL.
+    Kernel() { }
+
+    /*! \brief Copy constructor - performs shallow copy.
+     * 
+     *  This calls clRetainKernel() on the parameter's cl_kernel.
+     */
+    Kernel(const Kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }
+
+    /*! \brief Constructor from cl_kernel - takes ownership.
+     * 
+     *  This effectively transfers ownership of a refcount on the cl_kernel
+     *  into the new Kernel object.
+     */
+    __CL_EXPLICIT_CONSTRUCTORS Kernel(const cl_kernel& kernel) : detail::Wrapper<cl_type>(kernel) { }
+
+    /*! \brief Assignment operator from Kernel.
+     * 
+     *  This calls clRetainKernel() on the parameter and clReleaseKernel()
+     *  on the previous value held by this instance.
+     */
+    Kernel& operator = (const Kernel& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    /*! \brief Assignment operator from cl_kernel - takes ownership.
+     *
+     *  This effectively transfers ownership of a refcount on the rhs and calls
+     *  clReleaseKernel() on the value previously held by this instance.
+     */
+    Kernel& operator = (const cl_kernel& rhs)
+    {
+        detail::Wrapper<cl_type>::operator=(rhs);
+        return *this;
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_kernel_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetKernelInfo, object_, name, param),
+            __GET_KERNEL_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_kernel_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_kernel_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+#if defined(CL_VERSION_1_2)
+    template <typename T>
+    cl_int getArgInfo(cl_uint argIndex, cl_kernel_arg_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetKernelArgInfo, object_, argIndex, name, param),
+            __GET_KERNEL_ARG_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_kernel_arg_info, name>::param_type
+    getArgInfo(cl_uint argIndex, cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_kernel_arg_info, name>::param_type param;
+        cl_int result = getArgInfo(argIndex, name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+#endif // #if defined(CL_VERSION_1_2)
+
+    template <typename T>
+    cl_int getWorkGroupInfo(
+        const Device& device, cl_kernel_work_group_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(
+                &::clGetKernelWorkGroupInfo, object_, device(), name, param),
+                __GET_KERNEL_WORK_GROUP_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_kernel_work_group_info, name>::param_type
+        getWorkGroupInfo(const Device& device, cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+        detail::cl_kernel_work_group_info, name>::param_type param;
+        cl_int result = getWorkGroupInfo(device, name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    template <typename T>
+    cl_int setArg(cl_uint index, T value)
+    {
+        return detail::errHandler(
+            ::clSetKernelArg(
+                object_,
+                index,
+                detail::KernelArgumentHandler<T>::size(value),
+                detail::KernelArgumentHandler<T>::ptr(value)),
+            __SET_KERNEL_ARGS_ERR);
+    }
+
+    cl_int setArg(cl_uint index, ::size_t size, void* argPtr)
+    {
+        return detail::errHandler(
+            ::clSetKernelArg(object_, index, size, argPtr),
+            __SET_KERNEL_ARGS_ERR);
+    }
+};
+
+/*! \class Program
+ * \brief Program interface that implements cl_program.
+ */
+class Program : public detail::Wrapper<cl_program>
+{
+public:
+    typedef VECTOR_CLASS<std::pair<const void*, ::size_t> > Binaries;
+    typedef VECTOR_CLASS<std::pair<const char*, ::size_t> > Sources;
+
+    Program(
+        const STRING_CLASS& source,
+		bool build = false,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+        const char * strings = source.c_str();
+        const ::size_t length  = source.size();
+
+        Context context = Context::getDefault(err);
+
+        object_ = ::clCreateProgramWithSource(
+            context(), (cl_uint)1, &strings, &length, &error);
+
+        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
+
+        if (error == CL_SUCCESS && build) {
+
+            error = ::clBuildProgram(
+                object_,
+                0,
+                NULL,
+                "",
+                NULL,
+                NULL);
+
+            detail::errHandler(error, __BUILD_PROGRAM_ERR);
+        }
+
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Program(
+        const Context& context,
+        const STRING_CLASS& source,
+        bool build = false,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+        const char * strings = source.c_str();
+        const ::size_t length  = source.size();
+
+        object_ = ::clCreateProgramWithSource(
+            context(), (cl_uint)1, &strings, &length, &error);
+
+        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
+
+        if (error == CL_SUCCESS && build) {
+
+            error = ::clBuildProgram(
+                object_,
+                0,
+                NULL,
+                "",
+                NULL,
+                NULL);
+
+            detail::errHandler(error, __BUILD_PROGRAM_ERR);
+        }
+
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    Program(
+        const Context& context,
+        const Sources& sources,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+        const ::size_t n = (::size_t)sources.size();
+        ::size_t* lengths = (::size_t*) alloca(n * sizeof(::size_t));
+        const char** strings = (const char**) alloca(n * sizeof(const char*));
+
+        for (::size_t i = 0; i < n; ++i) {
+            strings[i] = sources[(int)i].first;
+            lengths[i] = sources[(int)i].second;
+        }
+
+        object_ = ::clCreateProgramWithSource(
+            context(), (cl_uint)n, strings, lengths, &error);
+
+        detail::errHandler(error, __CREATE_PROGRAM_WITH_SOURCE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    /**
+     * Construct a program object from a list of devices and a per-device list of binaries.
+     * \param context A valid OpenCL context in which to construct the program.
+     * \param devices A vector of OpenCL device objects for which the program will be created.
+     * \param binaries A vector of pairs of a pointer to a binary object and its length.
+     * \param binaryStatus An optional vector that on completion will be resized to
+     *   match the size of binaries and filled with values to specify if each binary
+     *   was successfully loaded.
+     *   Set to CL_SUCCESS if the binary was successfully loaded.
+     *   Set to CL_INVALID_VALUE if the length is 0 or the binary pointer is NULL.
+     *   Set to CL_INVALID_BINARY if the binary provided is not valid for the matching device.
+     * \param err if non-NULL will be set to CL_SUCCESS on successful operation or one of the following errors:
+     *   CL_INVALID_CONTEXT if context is not a valid context.
+     *   CL_INVALID_VALUE if the length of devices is zero; or if the length of binaries does not match the length of devices; 
+     *     or if any entry in binaries is NULL or has length 0.
+     *   CL_INVALID_DEVICE if OpenCL devices listed in devices are not in the list of devices associated with context.
+     *   CL_INVALID_BINARY if an invalid program binary was encountered for any device. binaryStatus will return specific status for each device.
+     *   CL_OUT_OF_HOST_MEMORY if there is a failure to allocate resources required by the OpenCL implementation on the host.
+     */
+    Program(
+        const Context& context,
+        const VECTOR_CLASS<Device>& devices,
+        const Binaries& binaries,
+        VECTOR_CLASS<cl_int>* binaryStatus = NULL,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        
+        const ::size_t numDevices = devices.size();
+        
+        // Catch size mismatch early and return
+        if(binaries.size() != numDevices) {
+            error = CL_INVALID_VALUE;
+            detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR);
+            if (err != NULL) {
+                *err = error;
+            }
+            return;
+        }
+
+        ::size_t* lengths = (::size_t*) alloca(numDevices * sizeof(::size_t));
+        const unsigned char** images = (const unsigned char**) alloca(numDevices * sizeof(const unsigned char**));
+
+        for (::size_t i = 0; i < numDevices; ++i) {
+            images[i] = (const unsigned char*)binaries[i].first;
+            lengths[i] = binaries[(int)i].second;
+        }
+
+        cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
+        for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
+            deviceIDs[deviceIndex] = (devices[deviceIndex])();
+        }
+
+        if(binaryStatus) {
+            binaryStatus->resize(numDevices);
+        }
+        
+        object_ = ::clCreateProgramWithBinary(
+            context(), (cl_uint) devices.size(),
+            deviceIDs,
+            lengths, images, binaryStatus != NULL
+               ? &binaryStatus->front()
+               : NULL, &error);
+
+        detail::errHandler(error, __CREATE_PROGRAM_WITH_BINARY_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    
+#if defined(CL_VERSION_1_2)
+    /**
+     * Create program using builtin kernels.
+     * \param kernelNames Semi-colon separated list of builtin kernel names
+     */
+    Program(
+        const Context& context,
+        const VECTOR_CLASS<Device>& devices,
+        const STRING_CLASS& kernelNames,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+
+        ::size_t numDevices = devices.size();
+        cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
+        for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
+            deviceIDs[deviceIndex] = (devices[deviceIndex])();
+        }
+        
+        object_ = ::clCreateProgramWithBuiltInKernels(
+            context(), 
+            (cl_uint) devices.size(),
+            deviceIDs,
+            kernelNames.c_str(), 
+            &error);
+
+        detail::errHandler(error, __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+#endif // #if defined(CL_VERSION_1_2)
+
+    Program() { }
+
+    Program(const Program& program) : detail::Wrapper<cl_type>(program) { }
+
+    __CL_EXPLICIT_CONSTRUCTORS Program(const cl_program& program) : detail::Wrapper<cl_type>(program) { }
+
+    Program& operator = (const Program& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    Program& operator = (const cl_program& rhs)
+    {
+        detail::Wrapper<cl_type>::operator=(rhs);
+        return *this;
+    }
+
+    cl_int build(
+        const VECTOR_CLASS<Device>& devices,
+        const char* options = NULL,
+        void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+        void* data = NULL) const
+    {
+        ::size_t numDevices = devices.size();
+        cl_device_id* deviceIDs = (cl_device_id*) alloca(numDevices * sizeof(cl_device_id));
+        for( ::size_t deviceIndex = 0; deviceIndex < numDevices; ++deviceIndex ) {
+            deviceIDs[deviceIndex] = (devices[deviceIndex])();
+        }
+
+        return detail::errHandler(
+            ::clBuildProgram(
+                object_,
+                (cl_uint)
+                devices.size(),
+                deviceIDs,
+                options,
+                notifyFptr,
+                data),
+                __BUILD_PROGRAM_ERR);
+    }
+
+    cl_int build(
+        const char* options = NULL,
+        void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+        void* data = NULL) const
+    {
+        return detail::errHandler(
+            ::clBuildProgram(
+                object_,
+                0,
+                NULL,
+                options,
+                notifyFptr,
+                data),
+                __BUILD_PROGRAM_ERR);
+    }
+
+#if defined(CL_VERSION_1_2)
+	cl_int compile(
+        const char* options = NULL,
+        void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+        void* data = NULL) const
+    {
+        return detail::errHandler(
+            ::clCompileProgram(
+                object_,
+                0,
+                NULL,
+                options,
+				0,
+				NULL,
+				NULL,
+                notifyFptr,
+                data),
+                __COMPILE_PROGRAM_ERR);
+    }
+#endif
+
+    template <typename T>
+    cl_int getInfo(cl_program_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(&::clGetProgramInfo, object_, name, param),
+            __GET_PROGRAM_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_program_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_program_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    template <typename T>
+    cl_int getBuildInfo(
+        const Device& device, cl_program_build_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(
+                &::clGetProgramBuildInfo, object_, device(), name, param),
+                __GET_PROGRAM_BUILD_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_program_build_info, name>::param_type
+    getBuildInfo(const Device& device, cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_program_build_info, name>::param_type param;
+        cl_int result = getBuildInfo(device, name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    cl_int createKernels(VECTOR_CLASS<Kernel>* kernels)
+    {
+        cl_uint numKernels;
+        cl_int err = ::clCreateKernelsInProgram(object_, 0, NULL, &numKernels);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
+        }
+
+        Kernel* value = (Kernel*) alloca(numKernels * sizeof(Kernel));
+        err = ::clCreateKernelsInProgram(
+            object_, numKernels, (cl_kernel*) value, NULL);
+        if (err != CL_SUCCESS) {
+            return detail::errHandler(err, __CREATE_KERNELS_IN_PROGRAM_ERR);
+        }
+
+        kernels->assign(&value[0], &value[numKernels]);
+        return CL_SUCCESS;
+    }
+};
+
+#if defined(CL_VERSION_1_2)
+inline Program linkProgram(
+    Program input1,
+    Program input2,
+    const char* options = NULL,
+    void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+    void* data = NULL,
+    cl_int* err = NULL) 
+{
+    cl_int err_local = CL_SUCCESS;
+
+    cl_program programs[2] = { input1(), input2() };
+
+    Context ctx = input1.getInfo<CL_PROGRAM_CONTEXT>();
+
+    cl_program prog = ::clLinkProgram(
+        ctx(),
+        0,
+        NULL,
+        options,
+        2,
+        programs,
+        notifyFptr,
+        data,
+        &err_local);
+
+    detail::errHandler(err_local,__COMPILE_PROGRAM_ERR);
+    if (err != NULL) {
+        *err = err_local;
+    }
+
+    return Program(prog);
+}
+
+inline Program linkProgram(
+    VECTOR_CLASS<Program> inputPrograms,
+    const char* options = NULL,
+    void (CL_CALLBACK * notifyFptr)(cl_program, void *) = NULL,
+    void* data = NULL,
+    cl_int* err = NULL) 
+{
+    cl_int err_local = CL_SUCCESS;
+
+    cl_program * programs = (cl_program*) alloca(inputPrograms.size() * sizeof(cl_program));
+
+    if (programs != NULL) {
+        for (unsigned int i = 0; i < inputPrograms.size(); i++) {
+          programs[i] = inputPrograms[i]();
+        }
+    } 
+
+    cl_program prog = ::clLinkProgram(
+        Context::getDefault()(),
+        0,
+        NULL,
+        options,
+        (cl_uint)inputPrograms.size(),
+        programs,
+        notifyFptr,
+        data,
+        &err_local);
+
+    detail::errHandler(err_local,__COMPILE_PROGRAM_ERR);
+    if (err != NULL) {
+        *err = err_local;
+    }
+
+    return Program(prog);
+}
+#endif
+
+template<>
+inline VECTOR_CLASS<char *> cl::Program::getInfo<CL_PROGRAM_BINARIES>(cl_int* err) const
+{
+    VECTOR_CLASS< ::size_t> sizes = getInfo<CL_PROGRAM_BINARY_SIZES>();
+    VECTOR_CLASS<char *> binaries;
+    for (VECTOR_CLASS< ::size_t>::iterator s = sizes.begin(); s != sizes.end(); ++s) 
+    {
+        char *ptr = NULL;
+        if (*s != 0) 
+            ptr = new char[*s];
+        binaries.push_back(ptr);
+    }
+    
+    cl_int result = getInfo(CL_PROGRAM_BINARIES, &binaries);
+    if (err != NULL) {
+        *err = result;
+    }
+    return binaries;
+}
+
+inline Kernel::Kernel(const Program& program, const char* name, cl_int* err)
+{
+    cl_int error;
+
+    object_ = ::clCreateKernel(program(), name, &error);
+    detail::errHandler(error, __CREATE_KERNEL_ERR);
+
+    if (err != NULL) {
+        *err = error;
+    }
+
+}
+
+/*! \class CommandQueue
+ * \brief CommandQueue interface for cl_command_queue.
+ */
+class CommandQueue : public detail::Wrapper<cl_command_queue>
+{
+private:
+    static volatile int default_initialized_;
+    static CommandQueue default_;
+    static volatile cl_int default_error_;
+public:
+   CommandQueue(
+        cl_command_queue_properties properties,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+
+        Context context = Context::getDefault(&error);
+        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+        if (error != CL_SUCCESS) {
+            if (err != NULL) {
+                *err = error;
+            }
+        }
+        else {
+            Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
+
+            object_ = ::clCreateCommandQueue(
+                context(), device(), properties, &error);
+
+            detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+            if (err != NULL) {
+                *err = error;
+            }
+        }
+    }
+    /*!
+    * \brief Constructs a CommandQueue for an implementation defined device in the given context
+    */
+    explicit CommandQueue(
+        const Context& context,
+        cl_command_queue_properties properties = 0,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        VECTOR_CLASS<cl::Device> devices;
+        error = context.getInfo(CL_CONTEXT_DEVICES, &devices);
+
+        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+        if (error != CL_SUCCESS)
+        {
+            if (err != NULL) {
+                *err = error;
+            }
+            return;
+        }
+
+        object_ = ::clCreateCommandQueue(context(), devices[0](), properties, &error);
+
+        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+        if (err != NULL) {
+            *err = error;
+        }
+
+    }
+
+    CommandQueue(
+        const Context& context,
+        const Device& device,
+        cl_command_queue_properties properties = 0,
+        cl_int* err = NULL)
+    {
+        cl_int error;
+        object_ = ::clCreateCommandQueue(
+            context(), device(), properties, &error);
+
+        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+
+    static CommandQueue getDefault(cl_int * err = NULL) 
+    {
+        int state = detail::compare_exchange(
+            &default_initialized_, 
+            __DEFAULT_BEING_INITIALIZED, __DEFAULT_NOT_INITIALIZED);
+        
+        if (state & __DEFAULT_INITIALIZED) {
+            if (err != NULL) {
+                *err = default_error_;
+            }
+            return default_;
+        }
+
+        if (state & __DEFAULT_BEING_INITIALIZED) {
+              // Assume writes will propagate eventually...
+              while(default_initialized_ != __DEFAULT_INITIALIZED) {
+                  detail::fence();
+              }
+
+            if (err != NULL) {
+                *err = default_error_;
+            }
+            return default_;
+        }
+
+        cl_int error;
+
+        Context context = Context::getDefault(&error);
+        detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+
+        if (error != CL_SUCCESS) {
+            if (err != NULL) {
+                *err = error;
+            }
+        }
+        else {
+            Device device = context.getInfo<CL_CONTEXT_DEVICES>()[0];
+
+            default_ = CommandQueue(context, device, 0, &error);
+
+            detail::errHandler(error, __CREATE_COMMAND_QUEUE_ERR);
+            if (err != NULL) {
+                *err = error;
+            }
+        }
+
+        detail::fence();
+
+        default_error_ = error;
+        // Assume writes will propagate eventually...
+        default_initialized_ = __DEFAULT_INITIALIZED;
+
+        detail::fence();
+
+        if (err != NULL) {
+            *err = default_error_;
+        }
+        return default_;
+
+    }
+
+    CommandQueue() { }
+
+    CommandQueue(const CommandQueue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }
+
+    CommandQueue(const cl_command_queue& commandQueue) : detail::Wrapper<cl_type>(commandQueue) { }
+
+    CommandQueue& operator = (const CommandQueue& rhs)
+    {
+        if (this != &rhs) {
+            detail::Wrapper<cl_type>::operator=(rhs);
+        }
+        return *this;
+    }
+
+    CommandQueue& operator = (const cl_command_queue& rhs)
+    {
+        detail::Wrapper<cl_type>::operator=(rhs);
+        return *this;
+    }
+
+    template <typename T>
+    cl_int getInfo(cl_command_queue_info name, T* param) const
+    {
+        return detail::errHandler(
+            detail::getInfo(
+                &::clGetCommandQueueInfo, object_, name, param),
+                __GET_COMMAND_QUEUE_INFO_ERR);
+    }
+
+    template <cl_int name> typename
+    detail::param_traits<detail::cl_command_queue_info, name>::param_type
+    getInfo(cl_int* err = NULL) const
+    {
+        typename detail::param_traits<
+            detail::cl_command_queue_info, name>::param_type param;
+        cl_int result = getInfo(name, &param);
+        if (err != NULL) {
+            *err = result;
+        }
+        return param;
+    }
+
+    cl_int enqueueReadBuffer(
+        const Buffer& buffer,
+        cl_bool blocking,
+        ::size_t offset,
+        ::size_t size,
+        void* ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueReadBuffer(
+                object_, buffer(), blocking, offset, size,
+                ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_READ_BUFFER_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueWriteBuffer(
+        const Buffer& buffer,
+        cl_bool blocking,
+        ::size_t offset,
+        ::size_t size,
+        const void* ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueWriteBuffer(
+                object_, buffer(), blocking, offset, size,
+                ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+                __ENQUEUE_WRITE_BUFFER_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueCopyBuffer(
+        const Buffer& src,
+        const Buffer& dst,
+        ::size_t src_offset,
+        ::size_t dst_offset,
+        ::size_t size,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueCopyBuffer(
+                object_, src(), dst(), src_offset, dst_offset, size,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQEUE_COPY_BUFFER_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueReadBufferRect(
+        const Buffer& buffer,
+        cl_bool blocking,
+        const size_t<3>& buffer_offset,
+        const size_t<3>& host_offset,
+        const size_t<3>& region,
+        ::size_t buffer_row_pitch,
+        ::size_t buffer_slice_pitch,
+        ::size_t host_row_pitch,
+        ::size_t host_slice_pitch,
+        void *ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueReadBufferRect(
+                object_, 
+                buffer(), 
+                blocking, 
+                (const ::size_t *)buffer_offset,
+                (const ::size_t *)host_offset,
+                (const ::size_t *)region,
+                buffer_row_pitch,
+                buffer_slice_pitch,
+                host_row_pitch,
+                host_slice_pitch,
+                ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+                __ENQUEUE_READ_BUFFER_RECT_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueWriteBufferRect(
+        const Buffer& buffer,
+        cl_bool blocking,
+        const size_t<3>& buffer_offset,
+        const size_t<3>& host_offset,
+        const size_t<3>& region,
+        ::size_t buffer_row_pitch,
+        ::size_t buffer_slice_pitch,
+        ::size_t host_row_pitch,
+        ::size_t host_slice_pitch,
+        void *ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueWriteBufferRect(
+                object_, 
+                buffer(), 
+                blocking, 
+                (const ::size_t *)buffer_offset,
+                (const ::size_t *)host_offset,
+                (const ::size_t *)region,
+                buffer_row_pitch,
+                buffer_slice_pitch,
+                host_row_pitch,
+                host_slice_pitch,
+                ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+                __ENQUEUE_WRITE_BUFFER_RECT_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueCopyBufferRect(
+        const Buffer& src,
+        const Buffer& dst,
+        const size_t<3>& src_origin,
+        const size_t<3>& dst_origin,
+        const size_t<3>& region,
+        ::size_t src_row_pitch,
+        ::size_t src_slice_pitch,
+        ::size_t dst_row_pitch,
+        ::size_t dst_slice_pitch,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueCopyBufferRect(
+                object_, 
+                src(), 
+                dst(), 
+                (const ::size_t *)src_origin, 
+                (const ::size_t *)dst_origin, 
+                (const ::size_t *)region,
+                src_row_pitch,
+                src_slice_pitch,
+                dst_row_pitch,
+                dst_slice_pitch,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQEUE_COPY_BUFFER_RECT_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+#if defined(CL_VERSION_1_2)
+    /**
+     * Enqueue a command to fill a buffer object with a pattern
+     * of a given size. The pattern is specified a as vector.
+     * \tparam PatternType The datatype of the pattern field. 
+     *     The pattern type must be an accepted OpenCL data type.
+     */
+    template<typename PatternType>
+    cl_int enqueueFillBuffer(
+        const Buffer& buffer,
+        PatternType pattern,
+        ::size_t offset,
+        ::size_t size,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueFillBuffer(
+                object_, 
+                buffer(),
+                static_cast<void*>(&pattern),
+                sizeof(PatternType), 
+                offset, 
+                size,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+                __ENQUEUE_FILL_BUFFER_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+#endif // #if defined(CL_VERSION_1_2)
+
+    cl_int enqueueReadImage(
+        const Image& image,
+        cl_bool blocking,
+        const size_t<3>& origin,
+        const size_t<3>& region,
+        ::size_t row_pitch,
+        ::size_t slice_pitch,
+        void* ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueReadImage(
+                object_, image(), blocking, (const ::size_t *) origin,
+                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_READ_IMAGE_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueWriteImage(
+        const Image& image,
+        cl_bool blocking,
+        const size_t<3>& origin,
+        const size_t<3>& region,
+        ::size_t row_pitch,
+        ::size_t slice_pitch,
+        void* ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueWriteImage(
+                object_, image(), blocking, (const ::size_t *) origin,
+                (const ::size_t *) region, row_pitch, slice_pitch, ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_WRITE_IMAGE_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueCopyImage(
+        const Image& src,
+        const Image& dst,
+        const size_t<3>& src_origin,
+        const size_t<3>& dst_origin,
+        const size_t<3>& region,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueCopyImage(
+                object_, src(), dst(), (const ::size_t *) src_origin,
+                (const ::size_t *)dst_origin, (const ::size_t *) region,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_COPY_IMAGE_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+#if defined(CL_VERSION_1_2)
+    /**
+     * Enqueue a command to fill an image object with a specified color.
+     * \param fillColor is the color to use to fill the image.
+     *     This is a four component RGBA floating-point color value if
+     *     the image channel data type is not an unnormalized signed or
+     *     unsigned data type.
+     */
+    cl_int enqueueFillImage(
+        const Image& image,
+        cl_float4 fillColor,
+        const size_t<3>& origin,
+        const size_t<3>& region,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueFillImage(
+                object_, 
+                image(),
+                static_cast<void*>(&fillColor), 
+                (const ::size_t *) origin, 
+                (const ::size_t *) region,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+                __ENQUEUE_FILL_IMAGE_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    /**
+     * Enqueue a command to fill an image object with a specified color.
+     * \param fillColor is the color to use to fill the image.
+     *     This is a four component RGBA signed integer color value if
+     *     the image channel data type is an unnormalized signed integer
+     *     type.
+     */
+    cl_int enqueueFillImage(
+        const Image& image,
+        cl_int4 fillColor,
+        const size_t<3>& origin,
+        const size_t<3>& region,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueFillImage(
+                object_, 
+                image(),
+                static_cast<void*>(&fillColor), 
+                (const ::size_t *) origin, 
+                (const ::size_t *) region,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+                __ENQUEUE_FILL_IMAGE_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    /**
+     * Enqueue a command to fill an image object with a specified color.
+     * \param fillColor is the color to use to fill the image.
+     *     This is a four component RGBA unsigned integer color value if
+     *     the image channel data type is an unnormalized unsigned integer
+     *     type.
+     */
+    cl_int enqueueFillImage(
+        const Image& image,
+        cl_uint4 fillColor,
+        const size_t<3>& origin,
+        const size_t<3>& region,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueFillImage(
+                object_, 
+                image(),
+                static_cast<void*>(&fillColor), 
+                (const ::size_t *) origin, 
+                (const ::size_t *) region,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+                __ENQUEUE_FILL_IMAGE_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+#endif // #if defined(CL_VERSION_1_2)
+
+    cl_int enqueueCopyImageToBuffer(
+        const Image& src,
+        const Buffer& dst,
+        const size_t<3>& src_origin,
+        const size_t<3>& region,
+        ::size_t dst_offset,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueCopyImageToBuffer(
+                object_, src(), dst(), (const ::size_t *) src_origin,
+                (const ::size_t *) region, dst_offset,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueCopyBufferToImage(
+        const Buffer& src,
+        const Image& dst,
+        ::size_t src_offset,
+        const size_t<3>& dst_origin,
+        const size_t<3>& region,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueCopyBufferToImage(
+                object_, src(), dst(), src_offset,
+                (const ::size_t *) dst_origin, (const ::size_t *) region,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    void* enqueueMapBuffer(
+        const Buffer& buffer,
+        cl_bool blocking,
+        cl_map_flags flags,
+        ::size_t offset,
+        ::size_t size,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL,
+        cl_int* err = NULL) const
+    {
+        cl_int error;
+        void * result = ::clEnqueueMapBuffer(
+            object_, buffer(), blocking, flags, offset, size,
+            (events != NULL) ? (cl_uint) events->size() : 0,
+            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+            (cl_event*) event,
+            &error);
+
+        detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+        return result;
+    }
+
+    void* enqueueMapImage(
+        const Image& buffer,
+        cl_bool blocking,
+        cl_map_flags flags,
+        const size_t<3>& origin,
+        const size_t<3>& region,
+        ::size_t * row_pitch,
+        ::size_t * slice_pitch,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL,
+        cl_int* err = NULL) const
+    {
+        cl_int error;
+        void * result = ::clEnqueueMapImage(
+            object_, buffer(), blocking, flags,
+            (const ::size_t *) origin, (const ::size_t *) region,
+            row_pitch, slice_pitch,
+            (events != NULL) ? (cl_uint) events->size() : 0,
+            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+            (cl_event*) event,
+            &error);
+
+        detail::errHandler(error, __ENQUEUE_MAP_IMAGE_ERR);
+        if (err != NULL) {
+              *err = error;
+        }
+        return result;
+    }
+
+    cl_int enqueueUnmapMemObject(
+        const Memory& memory,
+        void* mapped_ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueUnmapMemObject(
+                object_, memory(), mapped_ptr,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+#if defined(CL_VERSION_1_2)
+    /**
+     * Enqueues a marker command which waits for either a list of events to complete, 
+     * or all previously enqueued commands to complete.
+     *
+     * Enqueues a marker command which waits for either a list of events to complete, 
+     * or if the list is empty it waits for all commands previously enqueued in command_queue 
+     * to complete before it completes. This command returns an event which can be waited on, 
+     * i.e. this event can be waited on to insure that all events either in the event_wait_list 
+     * or all previously enqueued commands, queued before this command to command_queue, 
+     * have completed.
+     */
+    cl_int enqueueMarkerWithWaitList(
+        const VECTOR_CLASS<Event> *events = 0,
+        Event *event = 0)
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueMarkerWithWaitList(
+                object_,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_MARKER_WAIT_LIST_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    /**
+     * A synchronization point that enqueues a barrier operation.
+     *
+     * Enqueues a barrier command which waits for either a list of events to complete, 
+     * or if the list is empty it waits for all commands previously enqueued in command_queue 
+     * to complete before it completes. This command blocks command execution, that is, any 
+     * following commands enqueued after it do not execute until it completes. This command 
+     * returns an event which can be waited on, i.e. this event can be waited on to insure that 
+     * all events either in the event_wait_list or all previously enqueued commands, queued 
+     * before this command to command_queue, have completed.
+     */
+    cl_int enqueueBarrierWithWaitList(
+        const VECTOR_CLASS<Event> *events = 0,
+        Event *event = 0)
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueBarrierWithWaitList(
+                object_,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_BARRIER_WAIT_LIST_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+    
+    /**
+     * Enqueues a command to indicate with which device a set of memory objects
+     * should be associated.
+     */
+    cl_int enqueueMigrateMemObjects(
+        const VECTOR_CLASS<Memory> &memObjects,
+        cl_mem_migration_flags flags,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL
+        )
+    {
+        cl_event tmp;
+        
+        cl_mem* localMemObjects = static_cast<cl_mem*>(alloca(memObjects.size() * sizeof(cl_mem)));
+        for( int i = 0; i < (int)memObjects.size(); ++i ) {
+            localMemObjects[i] = memObjects[i]();
+        }
+
+
+        cl_int err = detail::errHandler(
+            ::clEnqueueMigrateMemObjects(
+                object_, 
+                (cl_uint)memObjects.size(), 
+                static_cast<const cl_mem*>(localMemObjects),
+                flags,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+#endif // #if defined(CL_VERSION_1_2)
+
+    cl_int enqueueNDRangeKernel(
+        const Kernel& kernel,
+        const NDRange& offset,
+        const NDRange& global,
+        const NDRange& local = NullRange,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueNDRangeKernel(
+                object_, kernel(), (cl_uint) global.dimensions(),
+                offset.dimensions() != 0 ? (const ::size_t*) offset : NULL,
+                (const ::size_t*) global,
+                local.dimensions() != 0 ? (const ::size_t*) local : NULL,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_NDRANGE_KERNEL_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueTask(
+        const Kernel& kernel,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueTask(
+                object_, kernel(),
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_TASK_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+    cl_int enqueueNativeKernel(
+        void (CL_CALLBACK *userFptr)(void *),
+        std::pair<void*, ::size_t> args,
+        const VECTOR_CLASS<Memory>* mem_objects = NULL,
+        const VECTOR_CLASS<const void*>* mem_locs = NULL,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL) const
+    {
+        cl_mem * mems = (mem_objects != NULL && mem_objects->size() > 0) 
+            ? (cl_mem*) alloca(mem_objects->size() * sizeof(cl_mem))
+            : NULL;
+
+        if (mems != NULL) {
+            for (unsigned int i = 0; i < mem_objects->size(); i++) {
+                mems[i] = ((*mem_objects)[i])();
+            }
+        }
+
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            ::clEnqueueNativeKernel(
+                object_, userFptr, args.first, args.second,
+                (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                mems,
+                (mem_locs != NULL) ? (const void **) &mem_locs->front() : NULL,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_NATIVE_KERNEL);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) 
+    CL_EXT_PREFIX__VERSION_1_1_DEPRECATED 
+    cl_int enqueueMarker(Event* event = NULL) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+    {
+        return detail::errHandler(
+            ::clEnqueueMarker(object_, (cl_event*) event),
+            __ENQUEUE_MARKER_ERR);
+    }
+
+    CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+    cl_int enqueueWaitForEvents(const VECTOR_CLASS<Event>& events) const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+    {
+        return detail::errHandler(
+            ::clEnqueueWaitForEvents(
+                object_,
+                (cl_uint) events.size(),
+                (const cl_event*) &events.front()),
+            __ENQUEUE_WAIT_FOR_EVENTS_ERR);
+    }
+#endif // #if defined(CL_VERSION_1_1)
+
+    cl_int enqueueAcquireGLObjects(
+         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+         const VECTOR_CLASS<Event>* events = NULL,
+         Event* event = NULL) const
+     {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+             ::clEnqueueAcquireGLObjects(
+                 object_,
+                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+                 (events != NULL) ? (cl_uint) events->size() : 0,
+                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                 (event != NULL) ? &tmp : NULL),
+             __ENQUEUE_ACQUIRE_GL_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+     }
+
+    cl_int enqueueReleaseGLObjects(
+         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+         const VECTOR_CLASS<Event>* events = NULL,
+         Event* event = NULL) const
+     {
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+             ::clEnqueueReleaseGLObjects(
+                 object_,
+                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+                 (events != NULL) ? (cl_uint) events->size() : 0,
+                 (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+                 (event != NULL) ? &tmp : NULL),
+             __ENQUEUE_RELEASE_GL_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+     }
+
+#if defined (USE_DX_INTEROP)
+typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueAcquireD3D10ObjectsKHR)(
+    cl_command_queue command_queue, cl_uint num_objects,
+    const cl_mem* mem_objects, cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list, cl_event* event);
+typedef CL_API_ENTRY cl_int (CL_API_CALL *PFN_clEnqueueReleaseD3D10ObjectsKHR)(
+    cl_command_queue command_queue, cl_uint num_objects,
+    const cl_mem* mem_objects,  cl_uint num_events_in_wait_list,
+    const cl_event* event_wait_list, cl_event* event);
+
+    cl_int enqueueAcquireD3D10Objects(
+         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+         const VECTOR_CLASS<Event>* events = NULL,
+         Event* event = NULL) const
+    {
+        static PFN_clEnqueueAcquireD3D10ObjectsKHR pfn_clEnqueueAcquireD3D10ObjectsKHR = NULL;
+#if defined(CL_VERSION_1_2)
+        cl_context context = getInfo<CL_QUEUE_CONTEXT>();
+        cl::Device device(getInfo<CL_QUEUE_DEVICE>());
+        cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
+        __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueAcquireD3D10ObjectsKHR);
+#endif
+#if defined(CL_VERSION_1_1)
+        __INIT_CL_EXT_FCN_PTR(clEnqueueAcquireD3D10ObjectsKHR);
+#endif
+        
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+             pfn_clEnqueueAcquireD3D10ObjectsKHR(
+                 object_,
+                 (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                 (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+                 (events != NULL) ? (cl_uint) events->size() : 0,
+                 (events != NULL) ? (cl_event*) &events->front() : NULL,
+                 (event != NULL) ? &tmp : NULL),
+             __ENQUEUE_ACQUIRE_GL_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+     }
+
+    cl_int enqueueReleaseD3D10Objects(
+         const VECTOR_CLASS<Memory>* mem_objects = NULL,
+         const VECTOR_CLASS<Event>* events = NULL,
+         Event* event = NULL) const
+    {
+        static PFN_clEnqueueReleaseD3D10ObjectsKHR pfn_clEnqueueReleaseD3D10ObjectsKHR = NULL;
+#if defined(CL_VERSION_1_2)
+        cl_context context = getInfo<CL_QUEUE_CONTEXT>();
+        cl::Device device(getInfo<CL_QUEUE_DEVICE>());
+        cl_platform_id platform = device.getInfo<CL_DEVICE_PLATFORM>();
+        __INIT_CL_EXT_FCN_PTR_PLATFORM(platform, clEnqueueReleaseD3D10ObjectsKHR);
+#endif // #if defined(CL_VERSION_1_2)
+#if defined(CL_VERSION_1_1)
+        __INIT_CL_EXT_FCN_PTR(clEnqueueReleaseD3D10ObjectsKHR);
+#endif // #if defined(CL_VERSION_1_1)
+
+        cl_event tmp;
+        cl_int err = detail::errHandler(
+            pfn_clEnqueueReleaseD3D10ObjectsKHR(
+                object_,
+                (mem_objects != NULL) ? (cl_uint) mem_objects->size() : 0,
+                (mem_objects != NULL) ? (const cl_mem *) &mem_objects->front(): NULL,
+                (events != NULL) ? (cl_uint) events->size() : 0,
+                (events != NULL) ? (cl_event*) &events->front() : NULL,
+                (event != NULL) ? &tmp : NULL),
+            __ENQUEUE_RELEASE_GL_ERR);
+
+        if (event != NULL && err == CL_SUCCESS)
+            *event = tmp;
+
+        return err;
+    }
+#endif
+
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_USE_DEPRECATED_OPENCL_1_1_APIS) || (defined(CL_VERSION_1_1) && !defined(CL_VERSION_1_2)) 
+    CL_EXT_PREFIX__VERSION_1_1_DEPRECATED
+    cl_int enqueueBarrier() const CL_EXT_SUFFIX__VERSION_1_1_DEPRECATED
+    {
+        return detail::errHandler(
+            ::clEnqueueBarrier(object_),
+            __ENQUEUE_BARRIER_ERR);
+    }
+#endif // #if defined(CL_VERSION_1_1)
+
+    cl_int flush() const
+    {
+        return detail::errHandler(::clFlush(object_), __FLUSH_ERR);
+    }
+
+    cl_int finish() const
+    {
+        return detail::errHandler(::clFinish(object_), __FINISH_ERR);
+    }
+};
+
+#ifdef _WIN32
+__declspec(selectany) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
+__declspec(selectany) CommandQueue CommandQueue::default_;
+__declspec(selectany) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS;
+#else
+__attribute__((weak)) volatile int CommandQueue::default_initialized_ = __DEFAULT_NOT_INITIALIZED;
+__attribute__((weak)) CommandQueue CommandQueue::default_;
+__attribute__((weak)) volatile cl_int CommandQueue::default_error_ = CL_SUCCESS;
+#endif
+
+template< typename IteratorType >
+Buffer::Buffer(
+    const Context &context,
+    IteratorType startIterator,
+    IteratorType endIterator,
+    bool readOnly,
+    bool useHostPtr,
+    cl_int* err)
+{
+    typedef typename std::iterator_traits<IteratorType>::value_type DataType;
+    cl_int error;
+
+    cl_mem_flags flags = 0;
+    if( readOnly ) {
+        flags |= CL_MEM_READ_ONLY;
+    }
+    else {
+        flags |= CL_MEM_READ_WRITE;
+    }
+    if( useHostPtr ) {
+        flags |= CL_MEM_USE_HOST_PTR;
+    }
+    
+    ::size_t size = sizeof(DataType)*(endIterator - startIterator);
+
+    if( useHostPtr ) {
+        object_ = ::clCreateBuffer(context(), flags, size, static_cast<DataType*>(&*startIterator), &error);
+    } else {
+        object_ = ::clCreateBuffer(context(), flags, size, 0, &error);
+    }
+
+    detail::errHandler(error, __CREATE_BUFFER_ERR);
+    if (err != NULL) {
+        *err = error;
+    }
+
+    if( !useHostPtr ) {
+        CommandQueue queue(context, 0, &error);
+        detail::errHandler(error, __CREATE_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+
+        error = cl::copy(queue, startIterator, endIterator, *this);
+        detail::errHandler(error, __CREATE_BUFFER_ERR);
+        if (err != NULL) {
+            *err = error;
+        }
+    }
+}
+
+inline cl_int enqueueReadBuffer(
+    const Buffer& buffer,
+    cl_bool blocking,
+    ::size_t offset,
+    ::size_t size,
+    void* ptr,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueReadBuffer(buffer, blocking, offset, size, ptr, events, event);
+}
+
+inline cl_int enqueueWriteBuffer(
+        const Buffer& buffer,
+        cl_bool blocking,
+        ::size_t offset,
+        ::size_t size,
+        const void* ptr,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueWriteBuffer(buffer, blocking, offset, size, ptr, events, event);
+}
+
+inline void* enqueueMapBuffer(
+        const Buffer& buffer,
+        cl_bool blocking,
+        cl_map_flags flags,
+        ::size_t offset,
+        ::size_t size,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL,
+        cl_int* err = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+    detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+    if (err != NULL) {
+        *err = error;
+    }
+
+    void * result = ::clEnqueueMapBuffer(
+            queue(), buffer(), blocking, flags, offset, size,
+            (events != NULL) ? (cl_uint) events->size() : 0,
+            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+            (cl_event*) event,
+            &error);
+
+    detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+    if (err != NULL) {
+        *err = error;
+    }
+    return result;
+}
+
+inline cl_int enqueueUnmapMemObject(
+    const Memory& memory,
+    void* mapped_ptr,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+    detail::errHandler(error, __ENQUEUE_MAP_BUFFER_ERR);
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    cl_event tmp;
+    cl_int err = detail::errHandler(
+        ::clEnqueueUnmapMemObject(
+            queue(), memory(), mapped_ptr,
+            (events != NULL) ? (cl_uint) events->size() : 0,
+            (events != NULL && events->size() > 0) ? (cl_event*) &events->front() : NULL,
+            (event != NULL) ? &tmp : NULL),
+        __ENQUEUE_UNMAP_MEM_OBJECT_ERR);
+
+    if (event != NULL && err == CL_SUCCESS)
+        *event = tmp;
+
+    return err;
+}
+
+inline cl_int enqueueCopyBuffer(
+        const Buffer& src,
+        const Buffer& dst,
+        ::size_t src_offset,
+        ::size_t dst_offset,
+        ::size_t size,
+        const VECTOR_CLASS<Event>* events = NULL,
+        Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueCopyBuffer(src, dst, src_offset, dst_offset, size, events, event);
+}
+
+/**
+ * Blocking copy operation between iterators and a buffer.
+ * Host to Device.
+ * Uses default command queue.
+ */
+template< typename IteratorType >
+inline cl_int copy( IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer )
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+    if (error != CL_SUCCESS)
+        return error;
+
+    return cl::copy(queue, startIterator, endIterator, buffer);
+}
+
+/**
+ * Blocking copy operation between iterators and a buffer.
+ * Device to Host.
+ * Uses default command queue.
+ */
+template< typename IteratorType >
+inline cl_int copy( const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator )
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+    if (error != CL_SUCCESS)
+        return error;
+
+    return cl::copy(queue, buffer, startIterator, endIterator);
+}
+
+/**
+ * Blocking copy operation between iterators and a buffer.
+ * Host to Device.
+ * Uses specified queue.
+ */
+template< typename IteratorType >
+inline cl_int copy( const CommandQueue &queue, IteratorType startIterator, IteratorType endIterator, cl::Buffer &buffer )
+{
+    typedef typename std::iterator_traits<IteratorType>::value_type DataType;
+    cl_int error;
+    
+    ::size_t length = endIterator-startIterator;
+    ::size_t byteLength = length*sizeof(DataType);
+
+    DataType *pointer = 
+        static_cast<DataType*>(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_WRITE, 0, byteLength, 0, 0, &error));
+    // if exceptions enabled, enqueueMapBuffer will throw
+    if( error != CL_SUCCESS ) {
+        return error;
+    }
+#if defined(_MSC_VER)
+    std::copy(
+        startIterator, 
+        endIterator, 
+        stdext::checked_array_iterator<DataType*>(
+            pointer, length));
+#else
+    std::copy(startIterator, endIterator, pointer);
+#endif
+    Event endEvent;
+    error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent);
+    // if exceptions enabled, enqueueUnmapMemObject will throw
+    if( error != CL_SUCCESS ) { 
+        return error;
+    }
+    endEvent.wait();
+    return CL_SUCCESS;
+}
+
+/**
+ * Blocking copy operation between iterators and a buffer.
+ * Device to Host.
+ * Uses specified queue.
+ */
+template< typename IteratorType >
+inline cl_int copy( const CommandQueue &queue, const cl::Buffer &buffer, IteratorType startIterator, IteratorType endIterator )
+{
+    typedef typename std::iterator_traits<IteratorType>::value_type DataType;
+    cl_int error;
+        
+    ::size_t length = endIterator-startIterator;
+    ::size_t byteLength = length*sizeof(DataType);
+
+    DataType *pointer = 
+        static_cast<DataType*>(queue.enqueueMapBuffer(buffer, CL_TRUE, CL_MAP_READ, 0, byteLength, 0, 0, &error));
+    // if exceptions enabled, enqueueMapBuffer will throw
+    if( error != CL_SUCCESS ) {
+        return error;
+    }
+    std::copy(pointer, pointer + length, startIterator);
+    Event endEvent;
+    error = queue.enqueueUnmapMemObject(buffer, pointer, 0, &endEvent);
+    // if exceptions enabled, enqueueUnmapMemObject will throw
+    if( error != CL_SUCCESS ) { 
+        return error;
+    }
+    endEvent.wait();
+    return CL_SUCCESS;
+}
+
+#if defined(CL_VERSION_1_1)
+inline cl_int enqueueReadBufferRect(
+    const Buffer& buffer,
+    cl_bool blocking,
+    const size_t<3>& buffer_offset,
+    const size_t<3>& host_offset,
+    const size_t<3>& region,
+    ::size_t buffer_row_pitch,
+    ::size_t buffer_slice_pitch,
+    ::size_t host_row_pitch,
+    ::size_t host_slice_pitch,
+    void *ptr,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueReadBufferRect(
+        buffer, 
+        blocking, 
+        buffer_offset, 
+        host_offset,
+        region,
+        buffer_row_pitch,
+        buffer_slice_pitch,
+        host_row_pitch,
+        host_slice_pitch,
+        ptr, 
+        events, 
+        event);
+}
+
+inline cl_int enqueueWriteBufferRect(
+    const Buffer& buffer,
+    cl_bool blocking,
+    const size_t<3>& buffer_offset,
+    const size_t<3>& host_offset,
+    const size_t<3>& region,
+    ::size_t buffer_row_pitch,
+    ::size_t buffer_slice_pitch,
+    ::size_t host_row_pitch,
+    ::size_t host_slice_pitch,
+    void *ptr,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueWriteBufferRect(
+        buffer, 
+        blocking, 
+        buffer_offset, 
+        host_offset,
+        region,
+        buffer_row_pitch,
+        buffer_slice_pitch,
+        host_row_pitch,
+        host_slice_pitch,
+        ptr, 
+        events, 
+        event);
+}
+
+inline cl_int enqueueCopyBufferRect(
+    const Buffer& src,
+    const Buffer& dst,
+    const size_t<3>& src_origin,
+    const size_t<3>& dst_origin,
+    const size_t<3>& region,
+    ::size_t src_row_pitch,
+    ::size_t src_slice_pitch,
+    ::size_t dst_row_pitch,
+    ::size_t dst_slice_pitch,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueCopyBufferRect(
+        src,
+        dst,
+        src_origin,
+        dst_origin,
+        region,
+        src_row_pitch,
+        src_slice_pitch,
+        dst_row_pitch,
+        dst_slice_pitch,
+        events, 
+        event);
+}
+#endif
+
+inline cl_int enqueueReadImage(
+    const Image& image,
+    cl_bool blocking,
+    const size_t<3>& origin,
+    const size_t<3>& region,
+    ::size_t row_pitch,
+    ::size_t slice_pitch,
+    void* ptr,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL) 
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueReadImage(
+        image,
+        blocking,
+        origin,
+        region,
+        row_pitch,
+        slice_pitch,
+        ptr,
+        events, 
+        event);
+}
+
+inline cl_int enqueueWriteImage(
+    const Image& image,
+    cl_bool blocking,
+    const size_t<3>& origin,
+    const size_t<3>& region,
+    ::size_t row_pitch,
+    ::size_t slice_pitch,
+    void* ptr,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueWriteImage(
+        image,
+        blocking,
+        origin,
+        region,
+        row_pitch,
+        slice_pitch,
+        ptr,
+        events, 
+        event);
+}
+
+inline cl_int enqueueCopyImage(
+    const Image& src,
+    const Image& dst,
+    const size_t<3>& src_origin,
+    const size_t<3>& dst_origin,
+    const size_t<3>& region,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueCopyImage(
+        src,
+        dst,
+        src_origin,
+        dst_origin,
+        region,
+        events,
+        event);
+}
+
+inline cl_int enqueueCopyImageToBuffer(
+    const Image& src,
+    const Buffer& dst,
+    const size_t<3>& src_origin,
+    const size_t<3>& region,
+    ::size_t dst_offset,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueCopyImageToBuffer(
+        src,
+        dst,
+        src_origin,
+        region,
+        dst_offset,
+        events,
+        event);
+}
+
+inline cl_int enqueueCopyBufferToImage(
+    const Buffer& src,
+    const Image& dst,
+    ::size_t src_offset,
+    const size_t<3>& dst_origin,
+    const size_t<3>& region,
+    const VECTOR_CLASS<Event>* events = NULL,
+    Event* event = NULL)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.enqueueCopyBufferToImage(
+        src,
+        dst,
+        src_offset,
+        dst_origin,
+        region,
+        events,
+        event);
+}
+
+
+inline cl_int flush(void)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    }
+
+    return queue.flush();
+}
+
+inline cl_int finish(void)
+{
+    cl_int error;
+    CommandQueue queue = CommandQueue::getDefault(&error);
+
+    if (error != CL_SUCCESS) {
+        return error;
+    } 
+
+
+    return queue.finish();
+}
+
+// Kernel Functor support
+// New interface as of September 2011
+// Requires the C++11 std::tr1::function (note do not support TR1)
+// Visual Studio 2010 and GCC 4.2
+
+struct EnqueueArgs
+{
+    CommandQueue queue_;
+    const NDRange offset_;
+    const NDRange global_;
+    const NDRange local_;
+    VECTOR_CLASS<Event> events_;
+
+    EnqueueArgs(NDRange global) : 
+      queue_(CommandQueue::getDefault()),
+      offset_(NullRange), 
+      global_(global),
+      local_(NullRange)
+    {
+
+    }
+
+    EnqueueArgs(NDRange global, NDRange local) : 
+      queue_(CommandQueue::getDefault()),
+      offset_(NullRange), 
+      global_(global),
+      local_(local)
+    {
+
+    }
+
+    EnqueueArgs(NDRange offset, NDRange global, NDRange local) : 
+      queue_(CommandQueue::getDefault()),
+      offset_(offset), 
+      global_(global),
+      local_(local)
+    {
+
+    }
+
+    EnqueueArgs(Event e, NDRange global) : 
+      queue_(CommandQueue::getDefault()),
+      offset_(NullRange), 
+      global_(global),
+      local_(NullRange)
+    {
+        events_.push_back(e);
+    }
+
+    EnqueueArgs(Event e, NDRange global, NDRange local) : 
+      queue_(CommandQueue::getDefault()),
+      offset_(NullRange), 
+      global_(global),
+      local_(local)
+    {
+        events_.push_back(e);
+    }
+
+    EnqueueArgs(Event e, NDRange offset, NDRange global, NDRange local) : 
+      queue_(CommandQueue::getDefault()),
+      offset_(offset), 
+      global_(global),
+      local_(local)
+    {
+        events_.push_back(e);
+    }
+
+    EnqueueArgs(const VECTOR_CLASS<Event> &events, NDRange global) : 
+      queue_(CommandQueue::getDefault()),
+      offset_(NullRange), 
+      global_(global),
+      local_(NullRange),
+      events_(events)
+    {
+
+    }
+
+    EnqueueArgs(const VECTOR_CLASS<Event> &events, NDRange global, NDRange local) : 
+      queue_(CommandQueue::getDefault()),
+      offset_(NullRange), 
+      global_(global),
+      local_(local),
+      events_(events)
+    {
+
+    }
+
+    EnqueueArgs(const VECTOR_CLASS<Event> &events, NDRange offset, NDRange global, NDRange local) : 
+      queue_(CommandQueue::getDefault()),
+      offset_(offset), 
+      global_(global),
+      local_(local),
+      events_(events)
+    {
+
+    }
+
+    EnqueueArgs(CommandQueue &queue, NDRange global) : 
+      queue_(queue),
+      offset_(NullRange), 
+      global_(global),
+      local_(NullRange)
+    {
+
+    }
+
+    EnqueueArgs(CommandQueue &queue, NDRange global, NDRange local) : 
+      queue_(queue),
+      offset_(NullRange), 
+      global_(global),
+      local_(local)
+    {
+
+    }
+
+    EnqueueArgs(CommandQueue &queue, NDRange offset, NDRange global, NDRange local) : 
+      queue_(queue),
+      offset_(offset), 
+      global_(global),
+      local_(local)
+    {
+
+    }
+
+    EnqueueArgs(CommandQueue &queue, Event e, NDRange global) : 
+      queue_(queue),
+      offset_(NullRange), 
+      global_(global),
+      local_(NullRange)
+    {
+        events_.push_back(e);
+    }
+
+    EnqueueArgs(CommandQueue &queue, Event e, NDRange global, NDRange local) : 
+      queue_(queue),
+      offset_(NullRange), 
+      global_(global),
+      local_(local)
+    {
+        events_.push_back(e);
+    }
+
+    EnqueueArgs(CommandQueue &queue, Event e, NDRange offset, NDRange global, NDRange local) : 
+      queue_(queue),
+      offset_(offset), 
+      global_(global),
+      local_(local)
+    {
+        events_.push_back(e);
+    }
+
+    EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS<Event> &events, NDRange global) : 
+      queue_(queue),
+      offset_(NullRange), 
+      global_(global),
+      local_(NullRange),
+      events_(events)
+    {
+
+    }
+
+    EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS<Event> &events, NDRange global, NDRange local) : 
+      queue_(queue),
+      offset_(NullRange), 
+      global_(global),
+      local_(local),
+      events_(events)
+    {
+
+    }
+
+    EnqueueArgs(CommandQueue &queue, const VECTOR_CLASS<Event> &events, NDRange offset, NDRange global, NDRange local) : 
+      queue_(queue),
+      offset_(offset), 
+      global_(global),
+      local_(local),
+      events_(events)
+    {
+
+    }
+};
+
+namespace detail {
+
+class NullType {};
+
+template<int index, typename T0>
+struct SetArg
+{
+    static void set (Kernel kernel, T0 arg)
+    {
+        kernel.setArg(index, arg);
+    }
+};  
+
+template<int index>
+struct SetArg<index, NullType>
+{
+    static void set (Kernel, NullType)
+    { 
+    }
+};
+
+template <
+   typename T0,   typename T1,   typename T2,   typename T3,
+   typename T4,   typename T5,   typename T6,   typename T7,
+   typename T8,   typename T9,   typename T10,   typename T11,
+   typename T12,   typename T13,   typename T14,   typename T15,
+   typename T16,   typename T17,   typename T18,   typename T19,
+   typename T20,   typename T21,   typename T22,   typename T23,
+   typename T24,   typename T25,   typename T26,   typename T27,
+   typename T28,   typename T29,   typename T30,   typename T31
+>
+class KernelFunctorGlobal
+{
+private:
+    Kernel kernel_;
+
+public:
+   KernelFunctorGlobal(
+        Kernel kernel) :
+            kernel_(kernel)
+    {}
+
+   KernelFunctorGlobal(
+        const Program& program,
+        const STRING_CLASS name,
+        cl_int * err = NULL) :
+            kernel_(program, name.c_str(), err)
+    {}
+
+    Event operator() (
+        const EnqueueArgs& args,
+        T0 t0,
+        T1 t1 = NullType(),
+        T2 t2 = NullType(),
+        T3 t3 = NullType(),
+        T4 t4 = NullType(),
+        T5 t5 = NullType(),
+        T6 t6 = NullType(),
+        T7 t7 = NullType(),
+        T8 t8 = NullType(),
+        T9 t9 = NullType(),
+        T10 t10 = NullType(),
+        T11 t11 = NullType(),
+        T12 t12 = NullType(),
+        T13 t13 = NullType(),
+        T14 t14 = NullType(),
+        T15 t15 = NullType(),
+        T16 t16 = NullType(),
+        T17 t17 = NullType(),
+        T18 t18 = NullType(),
+        T19 t19 = NullType(),
+        T20 t20 = NullType(),
+        T21 t21 = NullType(),
+        T22 t22 = NullType(),
+        T23 t23 = NullType(),
+        T24 t24 = NullType(),
+        T25 t25 = NullType(),
+        T26 t26 = NullType(),
+        T27 t27 = NullType(),
+        T28 t28 = NullType(),
+        T29 t29 = NullType(),
+        T30 t30 = NullType(),
+        T31 t31 = NullType()
+        )
+    {
+        Event event;
+        SetArg<0, T0>::set(kernel_, t0);
+        SetArg<1, T1>::set(kernel_, t1);
+        SetArg<2, T2>::set(kernel_, t2);
+        SetArg<3, T3>::set(kernel_, t3);
+        SetArg<4, T4>::set(kernel_, t4);
+        SetArg<5, T5>::set(kernel_, t5);
+        SetArg<6, T6>::set(kernel_, t6);
+        SetArg<7, T7>::set(kernel_, t7);
+        SetArg<8, T8>::set(kernel_, t8);
+        SetArg<9, T9>::set(kernel_, t9);
+        SetArg<10, T10>::set(kernel_, t10);
+        SetArg<11, T11>::set(kernel_, t11);
+        SetArg<12, T12>::set(kernel_, t12);
+        SetArg<13, T13>::set(kernel_, t13);
+        SetArg<14, T14>::set(kernel_, t14);
+        SetArg<15, T15>::set(kernel_, t15);
+        SetArg<16, T16>::set(kernel_, t16);
+        SetArg<17, T17>::set(kernel_, t17);
+        SetArg<18, T18>::set(kernel_, t18);
+        SetArg<19, T19>::set(kernel_, t19);
+        SetArg<20, T20>::set(kernel_, t20);
+        SetArg<21, T21>::set(kernel_, t21);
+        SetArg<22, T22>::set(kernel_, t22);
+        SetArg<23, T23>::set(kernel_, t23);
+        SetArg<24, T24>::set(kernel_, t24);
+        SetArg<25, T25>::set(kernel_, t25);
+        SetArg<26, T26>::set(kernel_, t26);
+        SetArg<27, T27>::set(kernel_, t27);
+        SetArg<28, T28>::set(kernel_, t28);
+        SetArg<29, T29>::set(kernel_, t29);
+        SetArg<30, T30>::set(kernel_, t30);
+        SetArg<31, T31>::set(kernel_, t31);
+        
+        args.queue_.enqueueNDRangeKernel(
+            kernel_,
+            args.offset_,
+            args.global_,
+            args.local_,
+            &args.events_,
+            &event);
+        
+        return event;
+    }
+
+};
+
+//------------------------------------------------------------------------------------------------------
+
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22,
+	typename T23,
+	typename T24,
+	typename T25,
+	typename T26,
+	typename T27,
+	typename T28,
+	typename T29,
+	typename T30,
+	typename T31>
+struct functionImplementation_
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27,
+		T28,
+		T29,
+		T30,
+		T31> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 32))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27,
+		T28,
+		T29,
+		T30,
+		T31);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22,
+		T23 arg23,
+		T24 arg24,
+		T25 arg25,
+		T26 arg26,
+		T27 arg27,
+		T28 arg28,
+		T29 arg29,
+		T30 arg30,
+		T31 arg31)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22,
+			arg23,
+			arg24,
+			arg25,
+			arg26,
+			arg27,
+			arg28,
+			arg29,
+			arg30,
+			arg31);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22,
+	typename T23,
+	typename T24,
+	typename T25,
+	typename T26,
+	typename T27,
+	typename T28,
+	typename T29,
+	typename T30>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	T22,
+	T23,
+	T24,
+	T25,
+	T26,
+	T27,
+	T28,
+	T29,
+	T30,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27,
+		T28,
+		T29,
+		T30,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 31))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27,
+		T28,
+		T29,
+		T30);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22,
+		T23 arg23,
+		T24 arg24,
+		T25 arg25,
+		T26 arg26,
+		T27 arg27,
+		T28 arg28,
+		T29 arg29,
+		T30 arg30)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22,
+			arg23,
+			arg24,
+			arg25,
+			arg26,
+			arg27,
+			arg28,
+			arg29,
+			arg30);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22,
+	typename T23,
+	typename T24,
+	typename T25,
+	typename T26,
+	typename T27,
+	typename T28,
+	typename T29>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	T22,
+	T23,
+	T24,
+	T25,
+	T26,
+	T27,
+	T28,
+	T29,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27,
+		T28,
+		T29,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 30))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27,
+		T28,
+		T29);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22,
+		T23 arg23,
+		T24 arg24,
+		T25 arg25,
+		T26 arg26,
+		T27 arg27,
+		T28 arg28,
+		T29 arg29)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22,
+			arg23,
+			arg24,
+			arg25,
+			arg26,
+			arg27,
+			arg28,
+			arg29);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22,
+	typename T23,
+	typename T24,
+	typename T25,
+	typename T26,
+	typename T27,
+	typename T28>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	T22,
+	T23,
+	T24,
+	T25,
+	T26,
+	T27,
+	T28,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27,
+		T28,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 29))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27,
+		T28);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22,
+		T23 arg23,
+		T24 arg24,
+		T25 arg25,
+		T26 arg26,
+		T27 arg27,
+		T28 arg28)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22,
+			arg23,
+			arg24,
+			arg25,
+			arg26,
+			arg27,
+			arg28);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22,
+	typename T23,
+	typename T24,
+	typename T25,
+	typename T26,
+	typename T27>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	T22,
+	T23,
+	T24,
+	T25,
+	T26,
+	T27,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 28))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		T27);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22,
+		T23 arg23,
+		T24 arg24,
+		T25 arg25,
+		T26 arg26,
+		T27 arg27)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22,
+			arg23,
+			arg24,
+			arg25,
+			arg26,
+			arg27);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22,
+	typename T23,
+	typename T24,
+	typename T25,
+	typename T26>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	T22,
+	T23,
+	T24,
+	T25,
+	T26,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 27))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		T26);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22,
+		T23 arg23,
+		T24 arg24,
+		T25 arg25,
+		T26 arg26)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22,
+			arg23,
+			arg24,
+			arg25,
+			arg26);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22,
+	typename T23,
+	typename T24,
+	typename T25>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	T22,
+	T23,
+	T24,
+	T25,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 26))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		T25);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22,
+		T23 arg23,
+		T24 arg24,
+		T25 arg25)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22,
+			arg23,
+			arg24,
+			arg25);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22,
+	typename T23,
+	typename T24>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	T22,
+	T23,
+	T24,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 25))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		T24);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22,
+		T23 arg23,
+		T24 arg24)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22,
+			arg23,
+			arg24);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22,
+	typename T23>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	T22,
+	T23,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 24))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		T23);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22,
+		T23 arg23)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22,
+			arg23);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21,
+	typename T22>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	T22,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 23))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		T22);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21,
+		T22 arg22)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21,
+			arg22);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20,
+	typename T21>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	T21,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 22))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		T21);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20,
+		T21 arg21)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20,
+			arg21);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19,
+	typename T20>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	T20,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 21))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		T20);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19,
+		T20 arg20)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19,
+			arg20);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18,
+	typename T19>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	T19,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 20))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		T19);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18,
+		T19 arg19)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18,
+			arg19);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17,
+	typename T18>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	T18,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 19))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		T18);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17,
+		T18 arg18)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17,
+			arg18);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16,
+	typename T17>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	T17,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 18))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		T17);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16,
+		T17 arg17)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16,
+			arg17);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15,
+	typename T16>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	T16,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 17))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		T16);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15,
+		T16 arg16)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15,
+			arg16);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14,
+	typename T15>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	T15,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 16))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		T15);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14,
+		T15 arg15)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14,
+			arg15);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13,
+	typename T14>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	T14,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 15))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		T14);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13,
+		T14 arg14)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13,
+			arg14);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12,
+	typename T13>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	T13,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 14))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		T13);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12,
+		T13 arg13)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12,
+			arg13);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11,
+	typename T12>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	T12,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 13))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		T12);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11,
+		T12 arg12)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11,
+			arg12);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10,
+	typename T11>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	T11,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 12))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		T11);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10,
+		T11 arg11)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10,
+			arg11);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9,
+	typename T10>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	T10,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 11))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		T10);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9,
+		T10 arg10)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9,
+			arg10);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8,
+	typename T9>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	T9,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 10))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		T9);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8,
+		T9 arg9)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8,
+			arg9);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7,
+	typename T8>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	T8,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 9))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		T8);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7,
+		T8 arg8)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7,
+			arg8);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6,
+	typename T7>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	T7,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 8))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		T7);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6,
+		T7 arg7)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6,
+			arg7);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5,
+	typename T6>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	T6,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 7))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		T6);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5,
+		T6 arg6)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5,
+			arg6);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4,
+	typename T5>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	T5,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 6))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		T5);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4,
+		T5 arg5)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4,
+			arg5);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3,
+	typename T4>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	T4,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		T4,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 5))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3,
+		T4);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3,
+		T4 arg4)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3,
+			arg4);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2,
+	typename T3>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	T3,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		T3,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 4))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2,
+		T3);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2,
+		T3 arg3)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2,
+			arg3);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1,
+	typename T2>
+struct functionImplementation_
+<	T0,
+	T1,
+	T2,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		T2,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 3))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1,
+		T2);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1,
+		T2 arg2)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1,
+			arg2);
+	}
+
+
+};
+
+template<
+	typename T0,
+	typename T1>
+struct functionImplementation_
+<	T0,
+	T1,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		T1,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 2))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0,
+		T1);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0,
+		T1 arg1)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0,
+			arg1);
+	}
+
+
+};
+
+template<
+	typename T0>
+struct functionImplementation_
+<	T0,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType,
+	NullType>
+{
+	typedef detail::KernelFunctorGlobal<
+		T0,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType,
+		NullType> FunctorType;
+
+    FunctorType functor_;
+
+    functionImplementation_(const FunctorType &functor) :
+        functor_(functor)
+    {
+    
+        #if (defined(_WIN32) && defined(_VARIADIC_MAX) && (_VARIADIC_MAX < 1))
+        // Fail variadic expansion for dev11
+        static_assert(0, "Visual Studio has a hard limit of argument count for a std::function expansion. Please define _VARIADIC_MAX to be 10. If you need more arguments than that VC12 and below cannot support it.");
+        #endif
+            
+    }
+
+	//! \brief Return type of the functor
+	typedef Event result_type;
+
+	//! \brief Function signature of kernel functor with no event dependency.
+	typedef Event type_(
+		const EnqueueArgs&,
+		T0);
+
+	Event operator()(
+		const EnqueueArgs& enqueueArgs,
+		T0 arg0)
+	{
+		return functor_(
+			enqueueArgs,
+			arg0);
+	}
+
+
+};
+
+
+
+
+
+} // namespace detail
+
+//----------------------------------------------------------------------------------------------
+
+template <
+   typename T0,   typename T1 = detail::NullType,   typename T2 = detail::NullType,
+   typename T3 = detail::NullType,   typename T4 = detail::NullType,
+   typename T5 = detail::NullType,   typename T6 = detail::NullType,
+   typename T7 = detail::NullType,   typename T8 = detail::NullType,
+   typename T9 = detail::NullType,   typename T10 = detail::NullType,
+   typename T11 = detail::NullType,   typename T12 = detail::NullType,
+   typename T13 = detail::NullType,   typename T14 = detail::NullType,
+   typename T15 = detail::NullType,   typename T16 = detail::NullType,
+   typename T17 = detail::NullType,   typename T18 = detail::NullType,
+   typename T19 = detail::NullType,   typename T20 = detail::NullType,
+   typename T21 = detail::NullType,   typename T22 = detail::NullType,
+   typename T23 = detail::NullType,   typename T24 = detail::NullType,
+   typename T25 = detail::NullType,   typename T26 = detail::NullType,
+   typename T27 = detail::NullType,   typename T28 = detail::NullType,
+   typename T29 = detail::NullType,   typename T30 = detail::NullType,
+   typename T31 = detail::NullType
+>
+struct make_kernel :
+    public detail::functionImplementation_<
+               T0,   T1,   T2,   T3,
+               T4,   T5,   T6,   T7,
+               T8,   T9,   T10,   T11,
+               T12,   T13,   T14,   T15,
+               T16,   T17,   T18,   T19,
+               T20,   T21,   T22,   T23,
+               T24,   T25,   T26,   T27,
+               T28,   T29,   T30,   T31
+    >
+{
+public:
+	typedef detail::KernelFunctorGlobal<             
+		       T0,   T1,   T2,   T3,
+               T4,   T5,   T6,   T7,
+               T8,   T9,   T10,   T11,
+               T12,   T13,   T14,   T15,
+               T16,   T17,   T18,   T19,
+               T20,   T21,   T22,   T23,
+               T24,   T25,   T26,   T27,
+               T28,   T29,   T30,   T31
+    > FunctorType;
+
+    make_kernel(
+        const Program& program,
+        const STRING_CLASS name,
+        cl_int * err = NULL) :
+           detail::functionImplementation_<
+                    T0,   T1,   T2,   T3,
+                       T4,   T5,   T6,   T7,
+                       T8,   T9,   T10,   T11,
+                       T12,   T13,   T14,   T15,
+                       T16,   T17,   T18,   T19,
+                       T20,   T21,   T22,   T23,
+                       T24,   T25,   T26,   T27,
+                       T28,   T29,   T30,   T31
+           >(
+            FunctorType(program, name, err)) 
+    {}
+
+    make_kernel(
+        const Kernel kernel) :
+           detail::functionImplementation_<
+                    T0,   T1,   T2,   T3,
+                       T4,   T5,   T6,   T7,
+                       T8,   T9,   T10,   T11,
+                       T12,   T13,   T14,   T15,
+                       T16,   T17,   T18,   T19,
+                       T20,   T21,   T22,   T23,
+                       T24,   T25,   T26,   T27,
+                       T28,   T29,   T30,   T31
+           >(
+            FunctorType(kernel)) 
+    {}    
+};
+
+
+//----------------------------------------------------------------------------------------------------------------------
+
+#undef __ERR_STR
+#if !defined(__CL_USER_OVERRIDE_ERROR_STRINGS)
+#undef __GET_DEVICE_INFO_ERR
+#undef __GET_PLATFORM_INFO_ERR
+#undef __GET_DEVICE_IDS_ERR
+#undef __GET_CONTEXT_INFO_ERR
+#undef __GET_EVENT_INFO_ERR
+#undef __GET_EVENT_PROFILE_INFO_ERR
+#undef __GET_MEM_OBJECT_INFO_ERR
+#undef __GET_IMAGE_INFO_ERR
+#undef __GET_SAMPLER_INFO_ERR
+#undef __GET_KERNEL_INFO_ERR
+#undef __GET_KERNEL_ARG_INFO_ERR
+#undef __GET_KERNEL_WORK_GROUP_INFO_ERR
+#undef __GET_PROGRAM_INFO_ERR
+#undef __GET_PROGRAM_BUILD_INFO_ERR
+#undef __GET_COMMAND_QUEUE_INFO_ERR
+
+#undef __CREATE_CONTEXT_ERR
+#undef __CREATE_CONTEXT_FROM_TYPE_ERR
+#undef __GET_SUPPORTED_IMAGE_FORMATS_ERR
+
+#undef __CREATE_BUFFER_ERR
+#undef __CREATE_SUBBUFFER_ERR
+#undef __CREATE_IMAGE2D_ERR
+#undef __CREATE_IMAGE3D_ERR
+#undef __CREATE_SAMPLER_ERR
+#undef __SET_MEM_OBJECT_DESTRUCTOR_CALLBACK_ERR
+
+#undef __CREATE_USER_EVENT_ERR
+#undef __SET_USER_EVENT_STATUS_ERR
+#undef __SET_EVENT_CALLBACK_ERR
+#undef __SET_PRINTF_CALLBACK_ERR
+
+#undef __WAIT_FOR_EVENTS_ERR
+
+#undef __CREATE_KERNEL_ERR
+#undef __SET_KERNEL_ARGS_ERR
+#undef __CREATE_PROGRAM_WITH_SOURCE_ERR
+#undef __CREATE_PROGRAM_WITH_BINARY_ERR
+#undef __CREATE_PROGRAM_WITH_BUILT_IN_KERNELS_ERR
+#undef __BUILD_PROGRAM_ERR
+#undef __CREATE_KERNELS_IN_PROGRAM_ERR
+
+#undef __CREATE_COMMAND_QUEUE_ERR
+#undef __SET_COMMAND_QUEUE_PROPERTY_ERR
+#undef __ENQUEUE_READ_BUFFER_ERR
+#undef __ENQUEUE_WRITE_BUFFER_ERR
+#undef __ENQUEUE_READ_BUFFER_RECT_ERR
+#undef __ENQUEUE_WRITE_BUFFER_RECT_ERR
+#undef __ENQEUE_COPY_BUFFER_ERR
+#undef __ENQEUE_COPY_BUFFER_RECT_ERR
+#undef __ENQUEUE_READ_IMAGE_ERR
+#undef __ENQUEUE_WRITE_IMAGE_ERR
+#undef __ENQUEUE_COPY_IMAGE_ERR
+#undef __ENQUEUE_COPY_IMAGE_TO_BUFFER_ERR
+#undef __ENQUEUE_COPY_BUFFER_TO_IMAGE_ERR
+#undef __ENQUEUE_MAP_BUFFER_ERR
+#undef __ENQUEUE_MAP_IMAGE_ERR
+#undef __ENQUEUE_UNMAP_MEM_OBJECT_ERR
+#undef __ENQUEUE_NDRANGE_KERNEL_ERR
+#undef __ENQUEUE_TASK_ERR
+#undef __ENQUEUE_NATIVE_KERNEL
+
+#undef __CL_EXPLICIT_CONSTRUCTORS
+
+#undef __UNLOAD_COMPILER_ERR
+#endif //__CL_USER_OVERRIDE_ERROR_STRINGS
+
+#undef __CL_FUNCTION_TYPE
+
+// Extensions
+/**
+ * Deprecated APIs for 1.2
+ */
+#if defined(CL_VERSION_1_1)
+#undef __INIT_CL_EXT_FCN_PTR
+#endif // #if defined(CL_VERSION_1_1)
+#undef __CREATE_SUB_DEVICES
+
+#if defined(USE_CL_DEVICE_FISSION)
+#undef __PARAM_NAME_DEVICE_FISSION
+#endif // USE_CL_DEVICE_FISSION
+
+#undef __DEFAULT_NOT_INITIALIZED 
+#undef __DEFAULT_BEING_INITIALIZED 
+#undef __DEFAULT_INITIALIZED
+
+} // namespace cl
+
+#ifdef _WIN32
+#pragma pop_macro("max")
+#endif // _WIN32
+
+#endif // CL_HPP_
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp
new file mode 100644
index 0000000000..11b29333ce
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.cpp
@@ -0,0 +1,754 @@
+/*
+  This file is part of c-ethash.
+
+  c-ethash is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  c-ethash is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file ethash_cl_miner.cpp
+* @author Tim Hughes <tim@twistedfury.com>
+* @date 2015
+*/
+
+
+#define _CRT_SECURE_NO_WARNINGS
+
+#include <assert.h>
+#include <queue>
+#include "ethash_cl_miner.h"
+#include <libethash/util.h>
+
+#undef min
+#undef max
+
+#define HASH_BYTES 32
+
+static char const ethash_inner_code[] = R"(
+
+// author Tim Hughes <tim@twistedfury.com>
+// Tested on Radeon HD 7850
+// Hashrate: 15940347 hashes/s
+// Bandwidth: 124533 MB/s
+// search kernel should fit in <= 84 VGPRS (3 wavefronts)
+
+#define THREADS_PER_HASH (128 / 16)
+#define HASHES_PER_LOOP (GROUP_SIZE / THREADS_PER_HASH)
+
+#define FNV_PRIME	0x01000193
+
+__constant uint2 const Keccak_f1600_RC[24] = {
+	(uint2)(0x00000001, 0x00000000),
+	(uint2)(0x00008082, 0x00000000),
+	(uint2)(0x0000808a, 0x80000000),
+	(uint2)(0x80008000, 0x80000000),
+	(uint2)(0x0000808b, 0x00000000),
+	(uint2)(0x80000001, 0x00000000),
+	(uint2)(0x80008081, 0x80000000),
+	(uint2)(0x00008009, 0x80000000),
+	(uint2)(0x0000008a, 0x00000000),
+	(uint2)(0x00000088, 0x00000000),
+	(uint2)(0x80008009, 0x00000000),
+	(uint2)(0x8000000a, 0x00000000),
+	(uint2)(0x8000808b, 0x00000000),
+	(uint2)(0x0000008b, 0x80000000),
+	(uint2)(0x00008089, 0x80000000),
+	(uint2)(0x00008003, 0x80000000),
+	(uint2)(0x00008002, 0x80000000),
+	(uint2)(0x00000080, 0x80000000),
+	(uint2)(0x0000800a, 0x00000000),
+	(uint2)(0x8000000a, 0x80000000),
+	(uint2)(0x80008081, 0x80000000),
+	(uint2)(0x00008080, 0x80000000),
+	(uint2)(0x80000001, 0x00000000),
+	(uint2)(0x80008008, 0x80000000),
+};
+
+void keccak_f1600_round(uint2* a, uint r, uint out_size)
+{
+   #if !__ENDIAN_LITTLE__
+	for (uint i = 0; i != 25; ++i)
+		a[i] = a[i].yx;
+   #endif
+
+	uint2 b[25];
+	uint2 t;
+
+	// Theta
+	b[0] = a[0] ^ a[5] ^ a[10] ^ a[15] ^ a[20];
+	b[1] = a[1] ^ a[6] ^ a[11] ^ a[16] ^ a[21];
+	b[2] = a[2] ^ a[7] ^ a[12] ^ a[17] ^ a[22];
+	b[3] = a[3] ^ a[8] ^ a[13] ^ a[18] ^ a[23];
+	b[4] = a[4] ^ a[9] ^ a[14] ^ a[19] ^ a[24];
+	t = b[4] ^ (uint2)(b[1].x << 1 | b[1].y >> 31, b[1].y << 1 | b[1].x >> 31);
+	a[0] ^= t;
+	a[5] ^= t;
+	a[10] ^= t;
+	a[15] ^= t;
+	a[20] ^= t;
+	t = b[0] ^ (uint2)(b[2].x << 1 | b[2].y >> 31, b[2].y << 1 | b[2].x >> 31);
+	a[1] ^= t;
+	a[6] ^= t;
+	a[11] ^= t;
+	a[16] ^= t;
+	a[21] ^= t;
+	t = b[1] ^ (uint2)(b[3].x << 1 | b[3].y >> 31, b[3].y << 1 | b[3].x >> 31);
+	a[2] ^= t;
+	a[7] ^= t;
+	a[12] ^= t;
+	a[17] ^= t;
+	a[22] ^= t;
+	t = b[2] ^ (uint2)(b[4].x << 1 | b[4].y >> 31, b[4].y << 1 | b[4].x >> 31);
+	a[3] ^= t;
+	a[8] ^= t;
+	a[13] ^= t;
+	a[18] ^= t;
+	a[23] ^= t;
+	t = b[3] ^ (uint2)(b[0].x << 1 | b[0].y >> 31, b[0].y << 1 | b[0].x >> 31);
+	a[4] ^= t;
+	a[9] ^= t;
+	a[14] ^= t;
+	a[19] ^= t;
+	a[24] ^= t;
+
+	// Rho Pi
+	b[0] = a[0];
+	b[10] = (uint2)(a[1].x << 1 | a[1].y >> 31, a[1].y << 1 | a[1].x >> 31);
+	b[7] = (uint2)(a[10].x << 3 | a[10].y >> 29, a[10].y << 3 | a[10].x >> 29);
+	b[11] = (uint2)(a[7].x << 6 | a[7].y >> 26, a[7].y << 6 | a[7].x >> 26);
+	b[17] = (uint2)(a[11].x << 10 | a[11].y >> 22, a[11].y << 10 | a[11].x >> 22);
+	b[18] = (uint2)(a[17].x << 15 | a[17].y >> 17, a[17].y << 15 | a[17].x >> 17);
+	b[3] = (uint2)(a[18].x << 21 | a[18].y >> 11, a[18].y << 21 | a[18].x >> 11);
+	b[5] = (uint2)(a[3].x << 28 | a[3].y >> 4, a[3].y << 28 | a[3].x >> 4);
+	b[16] = (uint2)(a[5].y << 4 | a[5].x >> 28, a[5].x << 4 | a[5].y >> 28);
+	b[8] = (uint2)(a[16].y << 13 | a[16].x >> 19, a[16].x << 13 | a[16].y >> 19);
+	b[21] = (uint2)(a[8].y << 23 | a[8].x >> 9, a[8].x << 23 | a[8].y >> 9);
+	b[24] = (uint2)(a[21].x << 2 | a[21].y >> 30, a[21].y << 2 | a[21].x >> 30);
+	b[4] = (uint2)(a[24].x << 14 | a[24].y >> 18, a[24].y << 14 | a[24].x >> 18);
+	b[15] = (uint2)(a[4].x << 27 | a[4].y >> 5, a[4].y << 27 | a[4].x >> 5);
+	b[23] = (uint2)(a[15].y << 9 | a[15].x >> 23, a[15].x << 9 | a[15].y >> 23);
+	b[19] = (uint2)(a[23].y << 24 | a[23].x >> 8, a[23].x << 24 | a[23].y >> 8);
+	b[13] = (uint2)(a[19].x << 8 | a[19].y >> 24, a[19].y << 8 | a[19].x >> 24);
+	b[12] = (uint2)(a[13].x << 25 | a[13].y >> 7, a[13].y << 25 | a[13].x >> 7);
+	b[2] = (uint2)(a[12].y << 11 | a[12].x >> 21, a[12].x << 11 | a[12].y >> 21);
+	b[20] = (uint2)(a[2].y << 30 | a[2].x >> 2, a[2].x << 30 | a[2].y >> 2);
+	b[14] = (uint2)(a[20].x << 18 | a[20].y >> 14, a[20].y << 18 | a[20].x >> 14);
+	b[22] = (uint2)(a[14].y << 7 | a[14].x >> 25, a[14].x << 7 | a[14].y >> 25);
+	b[9] = (uint2)(a[22].y << 29 | a[22].x >> 3, a[22].x << 29 | a[22].y >> 3);
+	b[6] = (uint2)(a[9].x << 20 | a[9].y >> 12, a[9].y << 20 | a[9].x >> 12);
+	b[1] = (uint2)(a[6].y << 12 | a[6].x >> 20, a[6].x << 12 | a[6].y >> 20);
+
+	// Chi
+	a[0] = bitselect(b[0] ^ b[2], b[0], b[1]);
+	a[1] = bitselect(b[1] ^ b[3], b[1], b[2]);
+	a[2] = bitselect(b[2] ^ b[4], b[2], b[3]);
+	a[3] = bitselect(b[3] ^ b[0], b[3], b[4]);
+	if (out_size >= 4)
+	{
+		a[4] = bitselect(b[4] ^ b[1], b[4], b[0]);
+		a[5] = bitselect(b[5] ^ b[7], b[5], b[6]);
+		a[6] = bitselect(b[6] ^ b[8], b[6], b[7]);
+		a[7] = bitselect(b[7] ^ b[9], b[7], b[8]);
+		a[8] = bitselect(b[8] ^ b[5], b[8], b[9]);
+		if (out_size >= 8)
+		{
+			a[9] = bitselect(b[9] ^ b[6], b[9], b[5]);
+			a[10] = bitselect(b[10] ^ b[12], b[10], b[11]);
+			a[11] = bitselect(b[11] ^ b[13], b[11], b[12]);
+			a[12] = bitselect(b[12] ^ b[14], b[12], b[13]);
+			a[13] = bitselect(b[13] ^ b[10], b[13], b[14]);
+			a[14] = bitselect(b[14] ^ b[11], b[14], b[10]);
+			a[15] = bitselect(b[15] ^ b[17], b[15], b[16]);
+			a[16] = bitselect(b[16] ^ b[18], b[16], b[17]);
+			a[17] = bitselect(b[17] ^ b[19], b[17], b[18]);
+			a[18] = bitselect(b[18] ^ b[15], b[18], b[19]);
+			a[19] = bitselect(b[19] ^ b[16], b[19], b[15]);
+			a[20] = bitselect(b[20] ^ b[22], b[20], b[21]);
+			a[21] = bitselect(b[21] ^ b[23], b[21], b[22]);
+			a[22] = bitselect(b[22] ^ b[24], b[22], b[23]);
+			a[23] = bitselect(b[23] ^ b[20], b[23], b[24]);
+			a[24] = bitselect(b[24] ^ b[21], b[24], b[20]);
+		}
+	}
+
+	// Iota
+	a[0] ^= Keccak_f1600_RC[r];
+
+   #if !__ENDIAN_LITTLE__
+	for (uint i = 0; i != 25; ++i)
+		a[i] = a[i].yx;
+   #endif
+}
+
+void keccak_f1600_no_absorb(ulong* a, uint in_size, uint out_size, uint isolate)
+{
+	for (uint i = in_size; i != 25; ++i)
+	{
+		a[i] = 0;
+	}
+#if __ENDIAN_LITTLE__
+	a[in_size] ^= 0x0000000000000001;
+	a[24-out_size*2] ^= 0x8000000000000000;
+#else
+	a[in_size] ^= 0x0100000000000000;
+	a[24-out_size*2] ^= 0x0000000000000080;
+#endif
+
+	// Originally I unrolled the first and last rounds to interface
+	// better with surrounding code, however I haven't done this
+	// without causing the AMD compiler to blow up the VGPR usage.
+	uint r = 0;
+	do
+	{
+		// This dynamic branch stops the AMD compiler unrolling the loop
+		// and additionally saves about 33% of the VGPRs, enough to gain another
+		// wavefront. Ideally we'd get 4 in flight, but 3 is the best I can
+		// massage out of the compiler. It doesn't really seem to matter how
+		// much we try and help the compiler save VGPRs because it seems to throw
+		// that information away, hence the implementation of keccak here
+		// doesn't bother.
+		if (isolate) 
+		{
+			keccak_f1600_round((uint2*)a, r++, 25);
+		}
+	}
+	while (r < 23);
+	
+	// final round optimised for digest size
+	keccak_f1600_round((uint2*)a, r++, out_size);
+}
+
+#define copy(dst, src, count) for (uint i = 0; i != count; ++i) { (dst)[i] = (src)[i]; }
+
+#define countof(x) (sizeof(x) / sizeof(x[0]))
+
+uint fnv(uint x, uint y)
+{
+	return x * FNV_PRIME ^ y;
+}
+
+uint4 fnv4(uint4 x, uint4 y)
+{
+	return x * FNV_PRIME ^ y;
+}
+
+uint fnv_reduce(uint4 v)
+{
+	return fnv(fnv(fnv(v.x, v.y), v.z), v.w);
+}
+
+typedef union
+{
+	ulong ulongs[32 / sizeof(ulong)];
+	uint uints[32 / sizeof(uint)];
+} hash32_t;
+
+typedef union
+{
+	ulong ulongs[64 / sizeof(ulong)];
+	uint4 uint4s[64 / sizeof(uint4)];
+} hash64_t;
+
+typedef union
+{
+	uint uints[128 / sizeof(uint)];
+	uint4 uint4s[128 / sizeof(uint4)];
+} hash128_t;
+
+hash64_t init_hash(__constant hash32_t const* header, ulong nonce, uint isolate)
+{
+	hash64_t init;
+	uint const init_size = countof(init.ulongs);
+	uint const hash_size = countof(header->ulongs);
+	
+	// sha3_512(header .. nonce)
+	ulong state[25];
+	copy(state, header->ulongs, hash_size);
+	state[hash_size] = nonce;
+	keccak_f1600_no_absorb(state, hash_size + 1, init_size, isolate);
+
+	copy(init.ulongs, state, init_size);
+	return init;
+}
+
+uint inner_loop(uint4 init, uint thread_id, __local uint* share, __global hash128_t const* g_dag, uint isolate)
+{
+	uint4 mix = init;
+
+	// share init0
+	if (thread_id == 0)
+		*share = mix.x;
+	barrier(CLK_LOCAL_MEM_FENCE);
+	uint init0 = *share;
+
+	uint a = 0;
+	do
+	{
+		bool update_share = thread_id == (a/4) % THREADS_PER_HASH;
+
+		#pragma unroll
+		for (uint i = 0; i != 4; ++i)
+		{
+			if (update_share)
+			{
+				uint m[4] = { mix.x, mix.y, mix.z, mix.w };
+				*share = fnv(init0 ^ (a+i), m[i]) % DAG_SIZE;
+			}
+			barrier(CLK_LOCAL_MEM_FENCE);
+
+			mix = fnv4(mix, g_dag[*share].uint4s[thread_id]);
+		}
+	}
+	while ((a += 4) != (ACCESSES & isolate));
+
+	return fnv_reduce(mix);
+}
+
+hash32_t final_hash(hash64_t const* init, hash32_t const* mix, uint isolate)
+{
+	ulong state[25];
+
+	hash32_t hash;
+	uint const hash_size = countof(hash.ulongs);
+	uint const init_size = countof(init->ulongs);
+	uint const mix_size = countof(mix->ulongs);
+
+	// keccak_256(keccak_512(header..nonce) .. mix);
+	copy(state, init->ulongs, init_size);
+	copy(state + init_size, mix->ulongs, mix_size);
+	keccak_f1600_no_absorb(state, init_size+mix_size, hash_size, isolate);
+
+	// copy out
+	copy(hash.ulongs, state, hash_size);
+	return hash;
+}
+
+hash32_t compute_hash_simple(
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong nonce,
+	uint isolate
+	)
+{
+	hash64_t init = init_hash(g_header, nonce, isolate);
+
+	hash128_t mix;
+	for (uint i = 0; i != countof(mix.uint4s); ++i)
+	{
+		mix.uint4s[i] = init.uint4s[i % countof(init.uint4s)];
+	}
+	
+	uint mix_val = mix.uints[0];
+	uint init0 = mix.uints[0];
+	uint a = 0;
+	do
+	{
+		uint pi = fnv(init0 ^ a, mix_val) % DAG_SIZE;
+		uint n = (a+1) % countof(mix.uints);
+
+		#pragma unroll
+		for (uint i = 0; i != countof(mix.uints); ++i)
+		{
+			mix.uints[i] = fnv(mix.uints[i], g_dag[pi].uints[i]);
+			mix_val = i == n ? mix.uints[i] : mix_val;
+		}
+	}
+	while (++a != (ACCESSES & isolate));
+
+	// reduce to output
+	hash32_t fnv_mix;
+	for (uint i = 0; i != countof(fnv_mix.uints); ++i)
+	{
+		fnv_mix.uints[i] = fnv_reduce(mix.uint4s[i]);
+	}
+	
+	return final_hash(&init, &fnv_mix, isolate);
+}
+
+typedef union
+{
+	struct
+	{
+		hash64_t init;
+		uint pad; // avoid lds bank conflicts
+	};
+	hash32_t mix;
+} compute_hash_share;
+
+hash32_t compute_hash(
+	__local compute_hash_share* share,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong nonce,
+	uint isolate
+	)
+{
+	uint const gid = get_global_id(0);
+
+	// Compute one init hash per work item.
+	hash64_t init = init_hash(g_header, nonce, isolate);
+
+	// Threads work together in this phase in groups of 8.
+	uint const thread_id = gid % THREADS_PER_HASH;
+	uint const hash_id = (gid % GROUP_SIZE) / THREADS_PER_HASH;
+
+	hash32_t mix;
+	uint i = 0;
+	do
+	{
+		// share init with other threads
+		if (i == thread_id)
+			share[hash_id].init = init;
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		uint4 thread_init = share[hash_id].init.uint4s[thread_id % (64 / sizeof(uint4))];
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		uint thread_mix = inner_loop(thread_init, thread_id, share[hash_id].mix.uints, g_dag, isolate);
+
+		share[hash_id].mix.uints[thread_id] = thread_mix;
+		barrier(CLK_LOCAL_MEM_FENCE);
+
+		if (i == thread_id)
+			mix = share[hash_id].mix;
+		barrier(CLK_LOCAL_MEM_FENCE);
+	}
+	while (++i != (THREADS_PER_HASH & isolate));
+
+	return final_hash(&init, &mix, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash_simple(
+	__global hash32_t* g_hashes,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong start_nonce,
+	uint isolate
+	)
+{
+	uint const gid = get_global_id(0);
+	g_hashes[gid] = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search_simple(
+	__global volatile uint* restrict g_output,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong start_nonce,
+	ulong target,
+	uint isolate
+	)
+{
+	uint const gid = get_global_id(0);
+	hash32_t hash = compute_hash_simple(g_header, g_dag, start_nonce + gid, isolate);
+
+	if (hash.ulongs[countof(hash.ulongs)-1] < target)
+	{
+		uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
+		g_output[slot] = gid;
+	}
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_hash(
+	__global hash32_t* g_hashes,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong start_nonce,
+	uint isolate
+	)
+{
+	__local compute_hash_share share[HASHES_PER_LOOP];
+
+	uint const gid = get_global_id(0);
+	g_hashes[gid] = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
+}
+
+__attribute__((reqd_work_group_size(GROUP_SIZE, 1, 1)))
+__kernel void ethash_search(
+	__global volatile uint* restrict g_output,
+	__constant hash32_t const* g_header,
+	__global hash128_t const* g_dag,
+	ulong start_nonce,
+	ulong target,
+	uint isolate
+	)
+{
+	__local compute_hash_share share[HASHES_PER_LOOP];
+
+	uint const gid = get_global_id(0);
+	hash32_t hash = compute_hash(share, g_header, g_dag, start_nonce + gid, isolate);
+
+	if (hash.ulongs[countof(hash.ulongs)-1] < target)
+	{
+		uint slot = min(MAX_OUTPUTS, atomic_inc(&g_output[0]) + 1);
+		g_output[slot] = gid;
+	}
+}
+
+)";
+
+static void add_definition(std::string& source, char const* id, unsigned value)
+{
+	char buf[256];
+	sprintf(buf, "#define %s %uu\n", id, value);
+	source.insert(source.begin(), buf, buf + strlen(buf));
+}
+
+ethash_cl_miner::ethash_cl_miner()
+{
+}
+
+bool ethash_cl_miner::init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size)
+{
+	// store params
+	m_params = params;
+
+	// get all platforms
+    std::vector<cl::Platform> platforms;
+    cl::Platform::get(&platforms);
+	if (platforms.empty())
+	{
+		debugf("No OpenCL platforms found.\n");
+		return false;
+	}
+
+	// use default platform
+	debugf("Using platform: %s\n", platforms[0].getInfo<CL_PLATFORM_NAME>().c_str());
+
+    // get GPU device of the default platform
+    std::vector<cl::Device> devices;
+    platforms[0].getDevices(CL_DEVICE_TYPE_ALL, &devices);
+    if (devices.empty())
+	{
+		debugf("No OpenCL devices found.\n");
+		return false;
+	}
+
+	// use default device
+	cl::Device& device = devices[0];
+	debugf("Using device: %s\n", device.getInfo<CL_DEVICE_NAME>().c_str());
+
+	// create context
+	m_context = cl::Context({device});
+	m_queue = cl::CommandQueue(m_context, device);
+
+	// use requested workgroup size, but we require multiple of 8
+	m_workgroup_size = ((workgroup_size + 7) / 8) * 8;
+
+	// patch source code
+	std::string code = ethash_inner_code;
+	add_definition(code, "GROUP_SIZE", m_workgroup_size);
+	add_definition(code, "DAG_SIZE", (unsigned)(params.full_size / MIX_BYTES));
+	add_definition(code, "ACCESSES", ACCESSES);
+	add_definition(code, "MAX_OUTPUTS", c_max_search_results);
+	//debugf("%s", code.c_str());
+
+	// create miner OpenCL program
+	cl::Program::Sources sources;
+	sources.push_back({code.c_str(), code.size()});
+
+	cl::Program program(m_context, sources);
+	try
+	{
+		program.build({device});
+	}
+	catch (cl::Error err)
+	{
+		debugf("%s\n", program.getBuildInfo<CL_PROGRAM_BUILD_LOG>(device).c_str());
+		return false;
+	}
+	m_hash_kernel = cl::Kernel(program, "ethash_hash");
+	m_search_kernel = cl::Kernel(program, "ethash_search");
+
+	// create buffer for dag
+	m_dag = cl::Buffer(m_context, CL_MEM_READ_ONLY, params.full_size);
+	
+	// create buffer for header
+	m_header = cl::Buffer(m_context, CL_MEM_READ_ONLY, 32);
+
+	// compute dag on CPU
+	{
+		void* cache_mem = malloc(params.cache_size + 63);
+		ethash_cache cache;
+		cache.mem = (void*)(((uintptr_t)cache_mem + 63) & ~63);
+		ethash_mkcache(&cache, &params, seed);
+
+		// if this throws then it's because we probably need to subdivide the dag uploads for compatibility
+		void* dag_ptr = m_queue.enqueueMapBuffer(m_dag, true, CL_MAP_WRITE_INVALIDATE_REGION, 0, params.full_size);
+		ethash_compute_full_data(dag_ptr, &params, &cache);
+		m_queue.enqueueUnmapMemObject(m_dag, dag_ptr);
+
+		free(cache_mem);
+	}
+
+	// create mining buffers
+	for (unsigned i = 0; i != c_num_buffers; ++i)
+	{
+		m_hash_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY | CL_MEM_HOST_READ_ONLY, 32*c_hash_batch_size);
+		m_search_buf[i] = cl::Buffer(m_context, CL_MEM_WRITE_ONLY, (c_max_search_results + 1) * sizeof(uint32_t));
+	}
+	return true;
+}
+
+void ethash_cl_miner::hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count)
+{
+	struct pending_batch
+	{
+		unsigned base;
+		unsigned count;
+		unsigned buf;
+	};
+	std::queue<pending_batch> pending;
+	
+	// update header constant buffer
+	m_queue.enqueueWriteBuffer(m_header, true, 0, 32, header);
+
+	/*
+	__kernel void ethash_combined_hash(
+		__global hash32_t* g_hashes,
+		__constant hash32_t const* g_header,
+		__global hash128_t const* g_dag,
+		ulong start_nonce,
+		uint isolate
+		)
+	*/
+	m_hash_kernel.setArg(1, m_header);
+	m_hash_kernel.setArg(2, m_dag);
+	m_hash_kernel.setArg(3, nonce);
+	m_hash_kernel.setArg(4, ~0u); // have to pass this to stop the compile unrolling the loop
+
+	unsigned buf = 0;
+	for (unsigned i = 0; i < count || !pending.empty(); )
+	{
+		// how many this batch
+		if (i < count)
+		{
+			unsigned const this_count = std::min(count - i, c_hash_batch_size);
+			unsigned const batch_count = std::max(this_count, m_workgroup_size);
+
+			// supply output hash buffer to kernel
+			m_hash_kernel.setArg(0, m_hash_buf[buf]);
+
+			// execute it!
+			clock_t start_time = clock();
+			m_queue.enqueueNDRangeKernel(
+				m_hash_kernel,
+				cl::NullRange,
+				cl::NDRange(batch_count),
+				cl::NDRange(m_workgroup_size)
+				);
+			m_queue.flush();
+		
+			pending.push({i, this_count, buf});
+			i += this_count;
+			buf = (buf + 1) % c_num_buffers;
+		}
+
+		// read results
+		if (i == count || pending.size() == c_num_buffers)
+		{
+			pending_batch const& batch = pending.front();
+
+			// could use pinned host pointer instead, but this path isn't that important.
+			uint8_t* hashes = (uint8_t*)m_queue.enqueueMapBuffer(m_hash_buf[batch.buf], true, CL_MAP_READ, 0, batch.count * HASH_BYTES);
+			memcpy(ret + batch.base*HASH_BYTES, hashes, batch.count*HASH_BYTES);
+			m_queue.enqueueUnmapMemObject(m_hash_buf[batch.buf], hashes);
+
+			pending.pop();
+		}
+	}
+}
+
+
+void ethash_cl_miner::search(uint8_t const* header, uint64_t target, search_hook& hook)
+{
+	struct pending_batch
+	{
+		uint64_t start_nonce;
+		unsigned buf;
+	};
+	std::queue<pending_batch> pending;
+
+	static uint32_t const c_zero = 0;
+
+	// update header constant buffer
+	m_queue.enqueueWriteBuffer(m_header, false, 0, 32, header);
+	for (unsigned i = 0; i != c_num_buffers; ++i)
+	{
+		m_queue.enqueueWriteBuffer(m_search_buf[i], false, 0, 4, &c_zero);
+	}
+	cl::Event pre_return_event;
+	m_queue.enqueueBarrierWithWaitList(NULL, &pre_return_event);
+
+	/*
+	__kernel void ethash_combined_search(
+		__global hash32_t* g_hashes,			// 0
+		__constant hash32_t const* g_header,	// 1
+		__global hash128_t const* g_dag,		// 2
+		ulong start_nonce,						// 3
+		ulong target,							// 4
+		uint isolate							// 5
+	)
+	*/
+	m_search_kernel.setArg(1, m_header);
+	m_search_kernel.setArg(2, m_dag);
+
+	// pass these to stop the compiler unrolling the loops
+	m_search_kernel.setArg(4, target);
+	m_search_kernel.setArg(5, ~0u);
+
+
+	unsigned buf = 0;
+	for (uint64_t start_nonce = 0; ; start_nonce += c_search_batch_size)
+	{
+		// supply output buffer to kernel
+		m_search_kernel.setArg(0, m_search_buf[buf]);
+		m_search_kernel.setArg(3, start_nonce);
+
+		// execute it!
+		m_queue.enqueueNDRangeKernel(m_search_kernel, cl::NullRange, c_search_batch_size, m_workgroup_size);
+		
+		pending.push({start_nonce, buf});
+		buf = (buf + 1) % c_num_buffers;
+
+		// read results
+		if (pending.size() == c_num_buffers)
+		{
+			pending_batch const& batch = pending.front();
+
+			// could use pinned host pointer instead
+			uint32_t* results = (uint32_t*)m_queue.enqueueMapBuffer(m_search_buf[batch.buf], true, CL_MAP_READ, 0, (1+c_max_search_results) * sizeof(uint32_t));
+			unsigned num_found = std::min(results[0], c_max_search_results);
+
+			uint64_t nonces[c_max_search_results];
+			for (unsigned i = 0; i != num_found; ++i)
+			{
+				nonces[i] = batch.start_nonce + results[i+1];
+			}
+			
+			m_queue.enqueueUnmapMemObject(m_search_buf[batch.buf], results);
+			
+			bool exit = num_found && hook.found(nonces, num_found);
+			exit |= hook.searched(batch.start_nonce, c_search_batch_size); // always report searched before exit
+			if (exit)
+				break;
+
+			pending.pop();
+		}
+	}
+
+	// not safe to return until this is ready
+	pre_return_event.wait();
+}
+
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h
new file mode 100644
index 0000000000..f37100d91d
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cl/ethash_cl_miner.h
@@ -0,0 +1,43 @@
+#pragma once
+
+#define __CL_ENABLE_EXCEPTIONS 
+#define CL_USE_DEPRECATED_OPENCL_2_0_APIS
+#include "cl.hpp"
+#include <time.h>
+#include <libethash/ethash.h>
+
+class ethash_cl_miner
+{
+public:
+	struct search_hook
+	{
+		// reports progress, return true to abort
+		virtual bool found(uint64_t const* nonces, uint32_t count) = 0;
+		virtual bool searched(uint64_t start_nonce, uint32_t count) = 0;
+	};
+
+public:
+	ethash_cl_miner();
+
+	bool init(ethash_params const& params, const uint8_t seed[32], unsigned workgroup_size = 64);
+
+	void hash(uint8_t* ret, uint8_t const* header, uint64_t nonce, unsigned count);
+	void search(uint8_t const* header, uint64_t target, search_hook& hook);
+
+private:
+	static unsigned const c_max_search_results = 63;
+	static unsigned const c_num_buffers = 2;
+	static unsigned const c_hash_batch_size = 1024;
+	static unsigned const c_search_batch_size = 1024*256;
+
+	ethash_params m_params;
+	cl::Context m_context;
+	cl::CommandQueue m_queue;
+	cl::Kernel m_hash_kernel;
+	cl::Kernel m_search_kernel;
+	cl::Buffer m_dag;
+	cl::Buffer m_header;
+	cl::Buffer m_hash_buf[c_num_buffers];
+	cl::Buffer m_search_buf[c_num_buffers];
+	unsigned m_workgroup_size;
+};
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt
new file mode 100644
index 0000000000..b30ed3e2dc
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/CMakeLists.txt
@@ -0,0 +1,15 @@
+find_package(CUDA)
+ 
+# Pass options to NVCC
+
+ 
+if (CUDA_FOUND)
+set(CUDA_NVCC_FLAGS "   -gencode;arch=compute_30,code=sm_30;
+                        -gencode;arch=compute_20,code=sm_20;
+                        -gencode;arch=compute_11,code=sm_11;
+                        -gencode;arch=compute_12,code=sm_12;
+                        -gencode;arch=compute_13,code=sm_13;")
+cuda_add_executable(
+    ethash-cuda
+    libethash.cu)
+endif()
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu
new file mode 100644
index 0000000000..f06653f2db
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cu
@@ -0,0 +1,879 @@
+/*
+	Copyright 2009 NVIDIA Corporation.  All rights reserved.
+
+	NOTICE TO LICENSEE:   
+
+	This source code and/or documentation ("Licensed Deliverables") are subject 
+	to NVIDIA intellectual property rights under U.S. and international Copyright 
+	laws.  
+
+	These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL 
+	to NVIDIA and is being provided under the terms and conditions of a form of 
+	NVIDIA software license agreement by and between NVIDIA and Licensee ("License 
+	Agreement") or electronically accepted by Licensee.  Notwithstanding any terms 
+	or conditions to the contrary in the License Agreement, reproduction or 
+	disclosure of the Licensed Deliverables to any third party without the express 
+	written consent of NVIDIA is prohibited.     
+
+	NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, 
+	NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED 
+	DELIVERABLES FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED 
+	WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE 
+	LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 
+	NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.   NOTWITHSTANDING ANY 
+	TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL 
+	NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, 
+	OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,	WHETHER 
+	IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,  ARISING OUT OF 
+	OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES.  
+
+	U.S. Government End Users. These Licensed Deliverables are a "commercial item" 
+	as that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of 
+	"commercial computer  software"  and "commercial computer software documentation" 
+	as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the 
+	U.S. Government only as a commercial end item.  Consistent with 48 C.F.R.12.212 
+	and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government 
+	End Users acquire the Licensed Deliverables with only those rights set forth 
+	herein. 
+
+	Any use of the Licensed Deliverables in individual and commercial software must 
+	include, in the user documentation and internal comments to the code, the above 
+	Disclaimer and U.S. Government End Users Notice.
+ */
+
+/*
+ *	cuPrintf.cu
+ *
+ *	This is a printf command callable from within a kernel. It is set
+ *	up so that output is sent to a memory buffer, which is emptied from
+ *	the host side - but only after a cudaThreadSynchronize() on the host.
+ *
+ *	Currently, there is a limitation of around 200 characters of output
+ *	and no more than 10 arguments to a single cuPrintf() call. Issue
+ *	multiple calls if longer format strings are required.
+ *
+ *	It requires minimal setup, and is *NOT* optimised for performance.
+ *	For example, writes are not coalesced - this is because there is an
+ *	assumption that people will not want to printf from every single one
+ *	of thousands of threads, but only from individual threads at a time.
+ *
+ *	Using this is simple - it requires one host-side call to initialise
+ *	everything, and then kernels can call cuPrintf at will. Sample code
+ *	is the easiest way to demonstrate:
+ *
+	#include "cuPrintf.cu"
+ 	
+	__global__ void testKernel(int val)
+	{
+		cuPrintf("Value is: %d\n", val);
+	}
+
+	int main()
+	{
+		cudaPrintfInit();
+		testKernel<<< 2, 3 >>>(10);
+		cudaPrintfDisplay(stdout, true);
+		cudaPrintfEnd();
+        return 0;
+	}
+ *
+ *	See the header file, "cuPrintf.cuh" for more info, especially
+ *	arguments to cudaPrintfInit() and cudaPrintfDisplay();
+ */
+
+#ifndef CUPRINTF_CU
+#define CUPRINTF_CU
+
+#include "cuPrintf.cuh"
+#if __CUDA_ARCH__ > 100      // Atomics only used with > sm_10 architecture
+#include <sm_11_atomic_functions.h>
+#endif
+
+// This is the smallest amount of memory, per-thread, which is allowed.
+// It is also the largest amount of space a single printf() can take up
+const static int CUPRINTF_MAX_LEN = 256;
+
+// This structure is used internally to track block/thread output restrictions.
+typedef struct __align__(8) {
+	int threadid;				// CUPRINTF_UNRESTRICTED for unrestricted
+	int blockid;				// CUPRINTF_UNRESTRICTED for unrestricted
+} cuPrintfRestriction;
+
+// The main storage is in a global print buffer, which has a known
+// start/end/length. These are atomically updated so it works as a
+// circular buffer.
+// Since the only control primitive that can be used is atomicAdd(),
+// we cannot wrap the pointer as such. The actual address must be
+// calculated from printfBufferPtr by mod-ing with printfBufferLength.
+// For sm_10 architecture, we must subdivide the buffer per-thread
+// since we do not even have an atomic primitive.
+__constant__ static char *globalPrintfBuffer = NULL;         // Start of circular buffer (set up by host)
+__constant__ static int printfBufferLength = 0;              // Size of circular buffer (set up by host)
+__device__ static cuPrintfRestriction restrictRules;         // Output restrictions
+__device__ volatile static char *printfBufferPtr = NULL;     // Current atomically-incremented non-wrapped offset
+
+// This is the header preceeding all printf entries.
+// NOTE: It *must* be size-aligned to the maximum entity size (size_t)
+typedef struct __align__(8) {
+    unsigned short magic;                   // Magic number says we're valid
+    unsigned short fmtoffset;               // Offset of fmt string into buffer
+    unsigned short blockid;                 // Block ID of author
+    unsigned short threadid;                // Thread ID of author
+} cuPrintfHeader;
+
+// Special header for sm_10 architecture
+#define CUPRINTF_SM10_MAGIC   0xC810        // Not a valid ascii character
+typedef struct __align__(16) {
+    unsigned short magic;                   // sm_10 specific magic number
+    unsigned short unused;
+    unsigned int thread_index;              // thread ID for this buffer
+    unsigned int thread_buf_len;            // per-thread buffer length
+    unsigned int offset;                    // most recent printf's offset
+} cuPrintfHeaderSM10;
+
+
+// Because we can't write an element which is not aligned to its bit-size,
+// we have to align all sizes and variables on maximum-size boundaries.
+// That means sizeof(double) in this case, but we'll use (long long) for
+// better arch<1.3 support
+#define CUPRINTF_ALIGN_SIZE      sizeof(long long)
+
+// All our headers are prefixed with a magic number so we know they're ready
+#define CUPRINTF_SM11_MAGIC  (unsigned short)0xC811        // Not a valid ascii character
+
+
+//
+//  getNextPrintfBufPtr
+//
+//  Grabs a block of space in the general circular buffer, using an
+//  atomic function to ensure that it's ours. We handle wrapping
+//  around the circular buffer and return a pointer to a place which
+//  can be written to.
+//
+//  Important notes:
+//      1. We always grab CUPRINTF_MAX_LEN bytes
+//      2. Because of 1, we never worry about wrapping around the end
+//      3. Because of 1, printfBufferLength *must* be a factor of CUPRINTF_MAX_LEN
+//
+//  This returns a pointer to the place where we own.
+//
+__device__ static char *getNextPrintfBufPtr()
+{
+    // Initialisation check
+    if(!printfBufferPtr)
+        return NULL;
+
+	// Thread/block restriction check
+	if((restrictRules.blockid != CUPRINTF_UNRESTRICTED) && (restrictRules.blockid != (blockIdx.x + gridDim.x*blockIdx.y)))
+		return NULL;
+	if((restrictRules.threadid != CUPRINTF_UNRESTRICTED) && (restrictRules.threadid != (threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z)))
+		return NULL;
+
+	// Conditional section, dependent on architecture
+#if __CUDA_ARCH__ == 100
+    // For sm_10 architectures, we have no atomic add - this means we must split the
+    // entire available buffer into per-thread blocks. Inefficient, but what can you do.
+    int thread_count = (gridDim.x * gridDim.y) * (blockDim.x * blockDim.y * blockDim.z);
+    int thread_index = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z +
+                       (blockIdx.x + gridDim.x*blockIdx.y) * (blockDim.x * blockDim.y * blockDim.z);
+    
+    // Find our own block of data and go to it. Make sure the per-thread length
+	// is a precise multiple of CUPRINTF_MAX_LEN, otherwise we risk size and
+	// alignment issues! We must round down, of course.
+    unsigned int thread_buf_len = printfBufferLength / thread_count;
+	thread_buf_len &= ~(CUPRINTF_MAX_LEN-1);
+
+	// We *must* have a thread buffer length able to fit at least two printfs (one header, one real)
+	if(thread_buf_len < (CUPRINTF_MAX_LEN * 2))
+		return NULL;
+
+	// Now address our section of the buffer. The first item is a header.
+    char *myPrintfBuffer = globalPrintfBuffer + (thread_buf_len * thread_index);
+    cuPrintfHeaderSM10 hdr = *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer;
+    if(hdr.magic != CUPRINTF_SM10_MAGIC)
+    {
+        // If our header is not set up, initialise it
+        hdr.magic = CUPRINTF_SM10_MAGIC;
+        hdr.thread_index = thread_index;
+        hdr.thread_buf_len = thread_buf_len;
+        hdr.offset = 0;         // Note we start at 0! We pre-increment below.
+        *(cuPrintfHeaderSM10 *)(void *)myPrintfBuffer = hdr;       // Write back the header
+
+        // For initial setup purposes, we might need to init thread0's header too
+        // (so that cudaPrintfDisplay() below will work). This is only run once.
+        cuPrintfHeaderSM10 *tophdr = (cuPrintfHeaderSM10 *)(void *)globalPrintfBuffer;
+        tophdr->thread_buf_len = thread_buf_len;
+    }
+
+    // Adjust the offset by the right amount, and wrap it if need be
+    unsigned int offset = hdr.offset + CUPRINTF_MAX_LEN;
+    if(offset >= hdr.thread_buf_len)
+        offset = CUPRINTF_MAX_LEN;
+
+    // Write back the new offset for next time and return a pointer to it
+    ((cuPrintfHeaderSM10 *)(void *)myPrintfBuffer)->offset = offset;
+    return myPrintfBuffer + offset;
+#else
+    // Much easier with an atomic operation!
+    size_t offset = atomicAdd((unsigned int *)&printfBufferPtr, CUPRINTF_MAX_LEN) - (size_t)globalPrintfBuffer;
+    offset %= printfBufferLength;
+    return globalPrintfBuffer + offset;
+#endif
+}
+
+
+//
+//  writePrintfHeader
+//
+//  Inserts the header for containing our UID, fmt position and
+//  block/thread number. We generate it dynamically to avoid
+//	issues arising from requiring pre-initialisation.
+//
+__device__ static void writePrintfHeader(char *ptr, char *fmtptr)
+{
+    if(ptr)
+    {
+        cuPrintfHeader header;
+        header.magic = CUPRINTF_SM11_MAGIC;
+        header.fmtoffset = (unsigned short)(fmtptr - ptr);
+        header.blockid = blockIdx.x + gridDim.x*blockIdx.y;
+        header.threadid = threadIdx.x + blockDim.x*threadIdx.y + blockDim.x*blockDim.y*threadIdx.z;
+        *(cuPrintfHeader *)(void *)ptr = header;
+    }
+}
+
+
+//
+//  cuPrintfStrncpy
+//
+//  This special strncpy outputs an aligned length value, followed by the
+//  string. It then zero-pads the rest of the string until a 64-aligned
+//  boundary. The length *includes* the padding. A pointer to the byte
+//  just after the \0 is returned.
+//
+//  This function could overflow CUPRINTF_MAX_LEN characters in our buffer.
+//  To avoid it, we must count as we output and truncate where necessary.
+//
+__device__ static char *cuPrintfStrncpy(char *dest, const char *src, int n, char *end)
+{
+    // Initialisation and overflow check
+    if(!dest || !src || (dest >= end))
+        return NULL;
+
+    // Prepare to write the length specifier. We're guaranteed to have
+    // at least "CUPRINTF_ALIGN_SIZE" bytes left because we only write out in
+    // chunks that size, and CUPRINTF_MAX_LEN is aligned with CUPRINTF_ALIGN_SIZE.
+    int *lenptr = (int *)(void *)dest;
+    int len = 0;
+    dest += CUPRINTF_ALIGN_SIZE;
+
+    // Now copy the string
+    while(n--)
+    {
+        if(dest >= end)     // Overflow check
+            break;
+
+        len++;
+        *dest++ = *src;
+        if(*src++ == '\0')
+            break;
+    }
+
+    // Now write out the padding bytes, and we have our length.
+    while((dest < end) && (((long)dest & (CUPRINTF_ALIGN_SIZE-1)) != 0))
+    {
+        len++;
+        *dest++ = 0;
+    }
+    *lenptr = len;
+    return (dest < end) ? dest : NULL;        // Overflow means return NULL
+}
+
+
+//
+//  copyArg
+//
+//  This copies a length specifier and then the argument out to the
+//  data buffer. Templates let the compiler figure all this out at
+//  compile-time, making life much simpler from the programming
+//  point of view. I'm assuimg all (const char *) is a string, and
+//  everything else is the variable it points at. I'd love to see
+//  a better way of doing it, but aside from parsing the format
+//  string I can't think of one.
+//
+//  The length of the data type is inserted at the beginning (so that
+//  the display can distinguish between float and double), and the
+//  pointer to the end of the entry is returned.
+//
+__device__ static char *copyArg(char *ptr, const char *arg, char *end)
+{
+    // Initialisation check
+    if(!ptr || !arg)
+        return NULL;
+
+    // strncpy does all our work. We just terminate.
+    if((ptr = cuPrintfStrncpy(ptr, arg, CUPRINTF_MAX_LEN, end)) != NULL)
+        *ptr = 0;
+
+    return ptr;
+}
+
+template <typename T>
+__device__ static char *copyArg(char *ptr, T &arg, char *end)
+{
+    // Initisalisation and overflow check. Alignment rules mean that
+    // we're at least CUPRINTF_ALIGN_SIZE away from "end", so we only need
+    // to check that one offset.
+    if(!ptr || ((ptr+CUPRINTF_ALIGN_SIZE) >= end))
+        return NULL;
+
+    // Write the length and argument
+    *(int *)(void *)ptr = sizeof(arg);
+    ptr += CUPRINTF_ALIGN_SIZE;
+    *(T *)(void *)ptr = arg;
+    ptr += CUPRINTF_ALIGN_SIZE;
+    *ptr = 0;
+
+    return ptr;
+}
+
+
+//
+//  cuPrintf
+//
+//  Templated printf functions to handle multiple arguments.
+//  Note we return the total amount of data copied, not the number
+//  of characters output. But then again, who ever looks at the
+//  return from printf() anyway?
+//
+//  The format is to grab a block of circular buffer space, the
+//  start of which will hold a header and a pointer to the format
+//  string. We then write in all the arguments, and finally the
+//  format string itself. This is to make it easy to prevent
+//  overflow of our buffer (we support up to 10 arguments, each of
+//  which can be 12 bytes in length - that means that only the
+//  format string (or a %s) can actually overflow; so the overflow
+//  check need only be in the strcpy function.
+//
+//  The header is written at the very last because that's what
+//  makes it look like we're done.
+//
+//  Errors, which are basically lack-of-initialisation, are ignored
+//  in the called functions because NULL pointers are passed around
+//
+
+// All printf variants basically do the same thing, setting up the
+// buffer, writing all arguments, then finalising the header. For
+// clarity, we'll pack the code into some big macros.
+#define CUPRINTF_PREAMBLE \
+    char *start, *end, *bufptr, *fmtstart; \
+    if((start = getNextPrintfBufPtr()) == NULL) return 0; \
+    end = start + CUPRINTF_MAX_LEN; \
+    bufptr = start + sizeof(cuPrintfHeader);
+
+// Posting an argument is easy
+#define CUPRINTF_ARG(argname) \
+	bufptr = copyArg(bufptr, argname, end);
+
+// After args are done, record start-of-fmt and write the fmt and header
+#define CUPRINTF_POSTAMBLE \
+    fmtstart = bufptr; \
+    end = cuPrintfStrncpy(bufptr, fmt, CUPRINTF_MAX_LEN, end); \
+    writePrintfHeader(start, end ? fmtstart : NULL); \
+    return end ? (int)(end - start) : 0;
+
+__device__ int cuPrintf(const char *fmt)
+{
+	CUPRINTF_PREAMBLE;
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1> __device__ int cuPrintf(const char *fmt, T1 arg1)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_ARG(arg7);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8)
+{
+	CUPRINTF_PREAMBLE;
+
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_ARG(arg7);
+	CUPRINTF_ARG(arg8);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_ARG(arg7);
+	CUPRINTF_ARG(arg8);
+	CUPRINTF_ARG(arg9);
+
+	CUPRINTF_POSTAMBLE;
+}
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename T10> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10)
+{
+	CUPRINTF_PREAMBLE;
+	    
+	CUPRINTF_ARG(arg1);
+	CUPRINTF_ARG(arg2);
+	CUPRINTF_ARG(arg3);
+	CUPRINTF_ARG(arg4);
+	CUPRINTF_ARG(arg5);
+	CUPRINTF_ARG(arg6);
+	CUPRINTF_ARG(arg7);
+	CUPRINTF_ARG(arg8);
+	CUPRINTF_ARG(arg9);
+	CUPRINTF_ARG(arg10);
+
+	CUPRINTF_POSTAMBLE;
+}
+#undef CUPRINTF_PREAMBLE
+#undef CUPRINTF_ARG
+#undef CUPRINTF_POSTAMBLE
+
+
+//
+//	cuPrintfRestrict
+//
+//	Called to restrict output to a given thread/block.
+//	We store the info in "restrictRules", which is set up at
+//	init time by the host. It's not the cleanest way to do this
+//	because it means restrictions will last between
+//	invocations, but given the output-pointer continuity,
+//	I feel this is reasonable.
+//
+__device__ void cuPrintfRestrict(int threadid, int blockid)
+{
+    int thread_count = blockDim.x * blockDim.y * blockDim.z;
+	if(((threadid < thread_count) && (threadid >= 0)) || (threadid == CUPRINTF_UNRESTRICTED))
+		restrictRules.threadid = threadid;
+
+	int block_count = gridDim.x * gridDim.y;
+	if(((blockid < block_count) && (blockid >= 0)) || (blockid == CUPRINTF_UNRESTRICTED))
+		restrictRules.blockid = blockid;
+}
+
+
+///////////////////////////////////////////////////////////////////////////////
+// HOST SIDE
+
+#include <stdio.h>
+static FILE *printf_fp;
+
+static char *printfbuf_start=NULL;
+static char *printfbuf_device=NULL;
+static int printfbuf_len=0;
+
+
+//
+//  outputPrintfData
+//
+//  Our own internal function, which takes a pointer to a data buffer
+//  and passes it through libc's printf for output.
+//
+//  We receive the formate string and a pointer to where the data is
+//  held. We then run through and print it out.
+//
+//  Returns 0 on failure, 1 on success
+//
+static int outputPrintfData(char *fmt, char *data)
+{
+    // Format string is prefixed by a length that we don't need
+    fmt += CUPRINTF_ALIGN_SIZE;
+
+    // Now run through it, printing everything we can. We must
+    // run to every % character, extract only that, and use printf
+    // to format it.
+    char *p = strchr(fmt, '%');
+    while(p != NULL)
+    {
+        // Print up to the % character
+        *p = '\0';
+        fputs(fmt, printf_fp);
+        *p = '%';           // Put back the %
+
+        // Now handle the format specifier
+        char *format = p++;         // Points to the '%'
+        p += strcspn(p, "%cdiouxXeEfgGaAnps");
+        if(*p == '\0')              // If no format specifier, print the whole thing
+        {
+            fmt = format;
+            break;
+        }
+
+        // Cut out the format bit and use printf to print it. It's prefixed
+        // by its length.
+        int arglen = *(int *)data;
+        if(arglen > CUPRINTF_MAX_LEN)
+        {
+            fputs("Corrupt printf buffer data - aborting\n", printf_fp);
+            return 0;
+        }
+
+        data += CUPRINTF_ALIGN_SIZE;
+        
+        char specifier = *p++;
+        char c = *p;        // Store for later
+        *p = '\0';
+        switch(specifier)
+        {
+            // These all take integer arguments
+            case 'c':
+            case 'd':
+            case 'i':
+            case 'o':
+            case 'u':
+            case 'x':
+            case 'X':
+            case 'p':
+                fprintf(printf_fp, format, *((int *)data));
+                break;
+
+            // These all take double arguments
+            case 'e':
+            case 'E':
+            case 'f':
+            case 'g':
+            case 'G':
+            case 'a':
+            case 'A':
+                if(arglen == 4)     // Float vs. Double thing
+                    fprintf(printf_fp, format, *((float *)data));
+                else
+                    fprintf(printf_fp, format, *((double *)data));
+                break;
+
+            // Strings are handled in a special way
+            case 's':
+                fprintf(printf_fp, format, (char *)data);
+                break;
+
+            // % is special
+            case '%':
+                fprintf(printf_fp, "%%");
+                break;
+
+            // Everything else is just printed out as-is
+            default:
+                fprintf(printf_fp, format);
+                break;
+        }
+        data += CUPRINTF_ALIGN_SIZE;         // Move on to next argument
+        *p = c;                     // Restore what we removed
+        fmt = p;                    // Adjust fmt string to be past the specifier
+        p = strchr(fmt, '%');       // and get the next specifier
+    }
+
+    // Print out the last of the string
+    fputs(fmt, printf_fp);
+    return 1;
+}
+
+
+//
+//  doPrintfDisplay
+//
+//  This runs through the blocks of CUPRINTF_MAX_LEN-sized data, calling the
+//  print function above to display them. We've got this separate from
+//  cudaPrintfDisplay() below so we can handle the SM_10 architecture
+//  partitioning.
+//
+static int doPrintfDisplay(int headings, int clear, char *bufstart, char *bufend, char *bufptr, char *endptr)
+{
+    // Grab, piece-by-piece, each output element until we catch
+    // up with the circular buffer end pointer
+    int printf_count=0;
+    char printfbuf_local[CUPRINTF_MAX_LEN+1];
+    printfbuf_local[CUPRINTF_MAX_LEN] = '\0';
+
+    while(bufptr != endptr)
+    {
+        // Wrap ourselves at the end-of-buffer
+        if(bufptr == bufend)
+            bufptr = bufstart;
+
+        // Adjust our start pointer to within the circular buffer and copy a block.
+        cudaMemcpy(printfbuf_local, bufptr, CUPRINTF_MAX_LEN, cudaMemcpyDeviceToHost);
+
+        // If the magic number isn't valid, then this write hasn't gone through
+        // yet and we'll wait until it does (or we're past the end for non-async printfs).
+        cuPrintfHeader *hdr = (cuPrintfHeader *)printfbuf_local;
+        if((hdr->magic != CUPRINTF_SM11_MAGIC) || (hdr->fmtoffset >= CUPRINTF_MAX_LEN))
+        {
+            //fprintf(printf_fp, "Bad magic number in printf header\n");
+            break;
+        }
+
+        // Extract all the info and get this printf done
+        if(headings)
+            fprintf(printf_fp, "[%d, %d]: ", hdr->blockid, hdr->threadid);
+        if(hdr->fmtoffset == 0)
+            fprintf(printf_fp, "printf buffer overflow\n");
+        else if(!outputPrintfData(printfbuf_local+hdr->fmtoffset, printfbuf_local+sizeof(cuPrintfHeader)))
+            break;
+        printf_count++;
+
+        // Clear if asked
+        if(clear)
+            cudaMemset(bufptr, 0, CUPRINTF_MAX_LEN);
+
+        // Now advance our start location, because we're done, and keep copying
+        bufptr += CUPRINTF_MAX_LEN;
+    }
+
+    return printf_count;
+}
+
+
+//
+//  cudaPrintfInit
+//
+//  Takes a buffer length to allocate, creates the memory on the device and
+//  returns a pointer to it for when a kernel is called. It's up to the caller
+//  to free it.
+//
+extern "C" cudaError_t cudaPrintfInit(size_t bufferLen)
+{
+    // Fix up bufferlen to be a multiple of CUPRINTF_MAX_LEN
+    bufferLen = (bufferLen < CUPRINTF_MAX_LEN) ? CUPRINTF_MAX_LEN : bufferLen;
+    if((bufferLen % CUPRINTF_MAX_LEN) > 0)
+        bufferLen += (CUPRINTF_MAX_LEN - (bufferLen % CUPRINTF_MAX_LEN));
+    printfbuf_len = (int)bufferLen;
+
+    // Allocate a print buffer on the device and zero it
+    if(cudaMalloc((void **)&printfbuf_device, printfbuf_len) != cudaSuccess)
+		return cudaErrorInitializationError;
+    cudaMemset(printfbuf_device, 0, printfbuf_len);
+    printfbuf_start = printfbuf_device;         // Where we start reading from
+
+	// No restrictions to begin with
+	cuPrintfRestriction restrict;
+	restrict.threadid = restrict.blockid = CUPRINTF_UNRESTRICTED;
+	cudaMemcpyToSymbol(restrictRules, &restrict, sizeof(restrict));
+
+    // Initialise the buffer and the respective lengths/pointers.
+    cudaMemcpyToSymbol(globalPrintfBuffer, &printfbuf_device, sizeof(char *));
+    cudaMemcpyToSymbol(printfBufferPtr, &printfbuf_device, sizeof(char *));
+    cudaMemcpyToSymbol(printfBufferLength, &printfbuf_len, sizeof(printfbuf_len));
+
+    return cudaSuccess;
+}
+
+
+//
+//  cudaPrintfEnd
+//
+//  Frees up the memory which we allocated
+//
+extern "C" void cudaPrintfEnd()
+{
+    if(!printfbuf_start || !printfbuf_device)
+        return;
+
+    cudaFree(printfbuf_device);
+    printfbuf_start = printfbuf_device = NULL;
+}
+
+
+//
+//  cudaPrintfDisplay
+//
+//  Each call to this function dumps the entire current contents
+//	of the printf buffer to the pre-specified FILE pointer. The
+//	circular "start" pointer is advanced so that subsequent calls
+//	dumps only new stuff.
+//
+//  In the case of async memory access (via streams), call this
+//  repeatedly to keep trying to empty the buffer. If it's a sync
+//  access, then the whole buffer should empty in one go.
+//
+//	Arguments:
+//		outputFP     - File descriptor to output to (NULL => stdout)
+//		showThreadID - If true, prints [block,thread] before each line
+//
+extern "C" cudaError_t cudaPrintfDisplay(void *outputFP, bool showThreadID)
+{
+	printf_fp = (FILE *)((outputFP == NULL) ? stdout : outputFP);
+
+    // For now, we force "synchronous" mode which means we're not concurrent
+	// with kernel execution. This also means we don't need clearOnPrint.
+	// If you're patching it for async operation, here's where you want it.
+    bool sync_printfs = true;
+	bool clearOnPrint = false;
+
+    // Initialisation check
+    if(!printfbuf_start || !printfbuf_device || !printf_fp)
+        return cudaErrorMissingConfiguration;
+
+    // To determine which architecture we're using, we read the
+    // first short from the buffer - it'll be the magic number
+    // relating to the version.
+    unsigned short magic;
+    cudaMemcpy(&magic, printfbuf_device, sizeof(unsigned short), cudaMemcpyDeviceToHost);
+
+    // For SM_10 architecture, we've split our buffer into one-per-thread.
+    // That means we must do each thread block separately. It'll require
+    // extra reading. We also, for now, don't support async printfs because
+    // that requires tracking one start pointer per thread.
+    if(magic == CUPRINTF_SM10_MAGIC)
+    {
+        sync_printfs = true;
+	    clearOnPrint = false;
+        int blocklen = 0;
+        char *blockptr = printfbuf_device;
+        while(blockptr < (printfbuf_device + printfbuf_len))
+        {
+            cuPrintfHeaderSM10 hdr;
+            cudaMemcpy(&hdr, blockptr, sizeof(hdr), cudaMemcpyDeviceToHost);
+
+            // We get our block-size-step from the very first header
+            if(hdr.thread_buf_len != 0)
+                blocklen = hdr.thread_buf_len;
+
+            // No magic number means no printfs from this thread
+            if(hdr.magic != CUPRINTF_SM10_MAGIC)
+            {
+                if(blocklen == 0)
+                {
+                    fprintf(printf_fp, "No printf headers found at all!\n");
+                    break;                              // No valid headers!
+                }
+                blockptr += blocklen;
+                continue;
+            }
+
+            // "offset" is non-zero then we can print the block contents
+            if(hdr.offset > 0)
+            {
+                // For synchronous printfs, we must print from endptr->bufend, then from start->end
+                if(sync_printfs)
+                    doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+hdr.offset+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len);
+                doPrintfDisplay(showThreadID, clearOnPrint, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.thread_buf_len, blockptr+CUPRINTF_MAX_LEN, blockptr+hdr.offset+CUPRINTF_MAX_LEN);
+            }
+
+            // Move on to the next block and loop again
+            blockptr += hdr.thread_buf_len;
+        }
+    }
+    // For SM_11 and up, everything is a single buffer and it's simple
+    else if(magic == CUPRINTF_SM11_MAGIC)
+    {
+	    // Grab the current "end of circular buffer" pointer.
+        char *printfbuf_end = NULL;
+        cudaMemcpyFromSymbol(&printfbuf_end, printfBufferPtr, sizeof(char *));
+
+        // Adjust our starting and ending pointers to within the block
+        char *bufptr = ((printfbuf_start - printfbuf_device) % printfbuf_len) + printfbuf_device;
+        char *endptr = ((printfbuf_end - printfbuf_device) % printfbuf_len) + printfbuf_device;
+
+        // For synchronous (i.e. after-kernel-exit) printf display, we have to handle circular
+        // buffer wrap carefully because we could miss those past "end".
+        if(sync_printfs)
+            doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, endptr, printfbuf_device+printfbuf_len);
+        doPrintfDisplay(showThreadID, clearOnPrint, printfbuf_device, printfbuf_device+printfbuf_len, bufptr, endptr);
+
+        printfbuf_start = printfbuf_end;
+    }
+    else
+        ;//printf("Bad magic number in cuPrintf buffer header\n");
+
+    // If we were synchronous, then we must ensure that the memory is cleared on exit
+    // otherwise another kernel launch with a different grid size could conflict.
+    if(sync_printfs)
+        cudaMemset(printfbuf_device, 0, printfbuf_len);
+
+    return cudaSuccess;
+}
+
+// Cleanup
+#undef CUPRINTF_MAX_LEN
+#undef CUPRINTF_ALIGN_SIZE
+#undef CUPRINTF_SM10_MAGIC
+#undef CUPRINTF_SM11_MAGIC
+
+#endif
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh
new file mode 100644
index 0000000000..cf3fe48688
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/cuPrintf.cuh
@@ -0,0 +1,162 @@
+/*
+	Copyright 2009 NVIDIA Corporation.  All rights reserved.
+
+	NOTICE TO LICENSEE:   
+
+	This source code and/or documentation ("Licensed Deliverables") are subject 
+	to NVIDIA intellectual property rights under U.S. and international Copyright 
+	laws.  
+
+	These Licensed Deliverables contained herein is PROPRIETARY and CONFIDENTIAL 
+	to NVIDIA and is being provided under the terms and conditions of a form of 
+	NVIDIA software license agreement by and between NVIDIA and Licensee ("License 
+	Agreement") or electronically accepted by Licensee.  Notwithstanding any terms 
+	or conditions to the contrary in the License Agreement, reproduction or 
+	disclosure of the Licensed Deliverables to any third party without the express 
+	written consent of NVIDIA is prohibited.     
+
+	NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, 
+	NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THESE LICENSED 
+	DELIVERABLES FOR ANY PURPOSE.  IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED 
+	WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE 
+	LICENSED DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY, 
+	NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.   NOTWITHSTANDING ANY 
+	TERMS OR CONDITIONS TO THE CONTRARY IN THE LICENSE AGREEMENT, IN NO EVENT SHALL 
+	NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, 
+	OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,	WHETHER 
+	IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,  ARISING OUT OF 
+	OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THESE LICENSED DELIVERABLES.  
+
+	U.S. Government End Users. These Licensed Deliverables are a "commercial item" 
+	as that term is defined at  48 C.F.R. 2.101 (OCT 1995), consisting  of 
+	"commercial computer  software"  and "commercial computer software documentation" 
+	as such terms are  used in 48 C.F.R. 12.212 (SEPT 1995) and is provided to the 
+	U.S. Government only as a commercial end item.  Consistent with 48 C.F.R.12.212 
+	and 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all U.S. Government 
+	End Users acquire the Licensed Deliverables with only those rights set forth 
+	herein. 
+
+	Any use of the Licensed Deliverables in individual and commercial software must 
+	include, in the user documentation and internal comments to the code, the above 
+	Disclaimer and U.S. Government End Users Notice.
+ */
+
+#ifndef CUPRINTF_H
+#define CUPRINTF_H
+
+/*
+ *	This is the header file supporting cuPrintf.cu and defining both
+ *	the host and device-side interfaces. See that file for some more
+ *	explanation and sample use code. See also below for details of the
+ *	host-side interfaces.
+ *
+ *  Quick sample code:
+ *
+	#include "cuPrintf.cu"
+ 	
+	__global__ void testKernel(int val)
+	{
+		cuPrintf("Value is: %d\n", val);
+	}
+
+	int main()
+	{
+		cudaPrintfInit();
+		testKernel<<< 2, 3 >>>(10);
+		cudaPrintfDisplay(stdout, true);
+		cudaPrintfEnd();
+        return 0;
+	}
+ */
+
+///////////////////////////////////////////////////////////////////////////////
+// DEVICE SIDE
+// External function definitions for device-side code
+
+// Abuse of templates to simulate varargs
+__device__ int cuPrintf(const char *fmt);
+template <typename T1> __device__ int cuPrintf(const char *fmt, T1 arg1);
+template <typename T1, typename T2> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2);
+template <typename T1, typename T2, typename T3> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3);
+template <typename T1, typename T2, typename T3, typename T4> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4);
+template <typename T1, typename T2, typename T3, typename T4, typename T5> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9);
+template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6, typename T7, typename T8, typename T9, typename T10> __device__ int cuPrintf(const char *fmt, T1 arg1, T2 arg2, T3 arg3, T4 arg4, T5 arg5, T6 arg6, T7 arg7, T8 arg8, T9 arg9, T10 arg10);
+
+
+//
+//	cuPrintfRestrict
+//
+//	Called to restrict output to a given thread/block. Pass
+//	the constant CUPRINTF_UNRESTRICTED to unrestrict output
+//	for thread/block IDs. Note you can therefore allow
+//	"all printfs from block 3" or "printfs from thread 2
+//	on all blocks", or "printfs only from block 1, thread 5".
+//
+//	Arguments:
+//		threadid - Thread ID to allow printfs from
+//		blockid - Block ID to allow printfs from
+//
+//	NOTE: Restrictions last between invocations of
+//	kernels unless cudaPrintfInit() is called again.
+//
+#define CUPRINTF_UNRESTRICTED	-1
+__device__ void cuPrintfRestrict(int threadid, int blockid);
+
+
+
+///////////////////////////////////////////////////////////////////////////////
+// HOST SIDE
+// External function definitions for host-side code
+
+//
+//	cudaPrintfInit
+//
+//	Call this once to initialise the printf system. If the output
+//	file or buffer size needs to be changed, call cudaPrintfEnd()
+//	before re-calling cudaPrintfInit().
+//
+//	The default size for the buffer is 1 megabyte. For CUDA
+//	architecture 1.1 and above, the buffer is filled linearly and
+//	is completely used;	however for architecture 1.0, the buffer
+//	is divided into as many segments are there are threads, even
+//	if some threads do not call cuPrintf().
+//
+//	Arguments:
+//		bufferLen - Length, in bytes, of total space to reserve
+//		            (in device global memory) for output.
+//
+//	Returns:
+//		cudaSuccess if all is well.
+//
+extern "C" cudaError_t cudaPrintfInit(size_t bufferLen=1048576);   // 1-meg - that's enough for 4096 printfs by all threads put together
+
+//
+//	cudaPrintfEnd
+//
+//	Cleans up all memories allocated by cudaPrintfInit().
+//	Call this at exit, or before calling cudaPrintfInit() again.
+//
+extern "C" void cudaPrintfEnd();
+
+//
+//	cudaPrintfDisplay
+//
+//	Dumps the contents of the output buffer to the specified
+//	file pointer. If the output pointer is not specified,
+//	the default "stdout" is used.
+//
+//	Arguments:
+//		outputFP     - A file pointer to an output stream.
+//		showThreadID - If "true", output strings are prefixed
+//		               by "[blockid, threadid] " at output.
+//
+//	Returns:
+//		cudaSuccess if all is well.
+//
+extern "C" cudaError_t cudaPrintfDisplay(void *outputFP=NULL, bool showThreadID=false);
+
+#endif  // CUPRINTF_H
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu
new file mode 100644
index 0000000000..3e53c88531
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash-cuda/libethash.cu
@@ -0,0 +1,27 @@
+#include "cuPrintf.cu"
+#include <stdio.h>
+
+__global__ void device_greetings(void)
+{
+	cuPrintf("Hello, world from the device!\n");
+}
+
+int main(void)
+{
+	// greet from the host
+	printf("Hello, world from the host!\n");
+
+	// initialize cuPrintf
+	cudaPrintfInit();
+
+	// launch a kernel with a single thread to greet from the device
+	device_greetings<<<1,1>>>();
+
+	// display the device's greeting
+	cudaPrintfDisplay();
+
+	// clean up after cuPrintf
+	cudaPrintfEnd();
+
+	return 0;
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt
new file mode 100644
index 0000000000..bef63ef0a1
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/CMakeLists.txt
@@ -0,0 +1,33 @@
+set(LIBRARY ethash)
+set(CMAKE_BUILD_TYPE Release)
+
+if (NOT MSVC)
+	set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=gnu99")
+endif()
+
+set(FILES 	util.c
+          	util.h
+          	internal.c
+          	ethash.h
+          	endian.h
+          	compiler.h
+          	fnv.h
+          	data_sizes.h)
+
+if (NOT CRYPTOPP_FOUND)
+	find_package(CryptoPP 5.6.2)
+endif()
+
+if (CRYPTOPP_FOUND)
+	add_definitions(-DWITH_CRYPTOPP)
+	include_directories( ${CRYPTOPP_INCLUDE_DIRS} )
+	list(APPEND FILES sha3_cryptopp.cpp sha3_cryptopp.h)
+else()
+	list(APPEND FILES sha3.c sha3.h)
+endif()
+
+add_library(${LIBRARY} ${FILES})
+
+if (CRYPTOPP_FOUND)
+	TARGET_LINK_LIBRARIES(${LIBRARY} ${CRYPTOPP_LIBRARIES})
+endif()
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h
new file mode 100644
index 0000000000..9695871cdc
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/compiler.h
@@ -0,0 +1,33 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file compiler.h
+ * @date 2014
+ */
+#pragma once
+
+// Visual Studio doesn't support the inline keyword in C mode
+#if defined(_MSC_VER) && !defined(__cplusplus)
+#define inline __inline
+#endif
+
+// pretend restrict is a standard keyword
+#if defined(_MSC_VER)
+#define restrict __restrict
+#else
+#define restrict __restrict__
+#endif
+
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h
new file mode 100644
index 0000000000..40417cb718
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/data_sizes.h
@@ -0,0 +1,248 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software FoundationUUU,either version 3 of the LicenseUUU,or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be usefulU,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If notUUU,see <http://www.gnu.org/licenses/>.
+*/
+
+/** @file nth_prime.h
+* @author Matthew Wampler-Doty <negacthulhu@gmail.com>
+* @date 2015
+*/
+
+// TODO: Update this after ~7 years
+
+#pragma once
+
+#include <stdint.h>
+//#include <Security/Security.h>
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stdint.h>
+
+// 500 Epochs worth of tabulated DAG sizes (~3.5 Years)
+
+// Generated with the following Mathematica Code:
+// GetDataSizes[n_] := Module[{
+//        DAGSizeBytesInit = 2^30,
+//        MixBytes = 128,
+//        DAGGrowth = 113000000,
+//        j = 0},
+//        Reap[
+//          While[j < n,
+//            Module[{i =
+//              Floor[(DAGSizeBytesInit + DAGGrowth * j) / MixBytes]},
+//              While[! PrimeQ[i], i--];
+//              Sow[i*MixBytes]; j++]]]][[2]][[1]]
+
+static const size_t dag_sizes[] = {
+        1073739904U, 1186739584U, 1299741568U, 1412741248U, 1525741696U,
+        1638736768U, 1751741312U, 1864740736U, 1977740672U, 2090740864U,
+        2203740544U, 2316741248U, 2429739392U, 2542740352U, 2655741824U,
+        2768739712U, 2881740416U, 2994741632U, 3107740544U, 3220741504U,
+        3333738112U, 3446741632U, 3559741312U, 3672740224U, 3785740928U,
+        3898738304U, 4011741824U, 4124739712U, 4237735808U, 4350740864U,
+        4463741824U, 4576741504U, 4689741184U, 4802739328U, 4915741568U,
+        5028740224U, 5141740672U, 5254738304U, 5367741824U, 5480737664U,
+        5593738112U, 5706741632U, 5819740544U, 5932734592U, 6045739904U,
+        6158740096U, 6271740032U, 6384731776U, 6497732992U, 6610740352U,
+        6723741056U, 6836741504U, 6949740416U, 7062740096U, 7175741824U,
+        7288740224U, 7401741184U, 7514741632U, 7627741568U, 7740739712U,
+        7853739136U, 7966740352U, 8079741568U, 8192739712U, 8305738624U,
+        8418740864U, 8531740288U, 8644740736U, 8757735808U, 8870738816U,
+        8983739264U, 9096740992U, 9209740928U, 9322739584U, 9435741824U,
+        9548741504U, 9661739392U, 9774738304U, 9887741312U, 10000738688U,
+        10113739136U, 10226741632U, 10339739776U, 10452741248U, 10565740928U,
+        10678736512U, 10791734656U, 10904741248U, 11017738112U, 11130741632U,
+        11243741312U, 11356739456U, 11469740416U, 11582734976U, 11695739008U,
+        11808741248U, 11921734784U, 12034739072U, 12147741568U, 12260737408U,
+        12373741696U, 12486738304U, 12599740544U, 12712740224U, 12825741184U,
+        12938736256U, 13051741312U, 13164737408U, 13277738368U, 13390738048U,
+        13503741824U, 13616741504U, 13729737088U, 13842740096U, 13955741312U,
+        14068741504U, 14181740416U, 14294741632U, 14407739776U, 14520740224U,
+        14633740928U, 14746736512U, 14859741824U, 14972740736U, 15085740928U,
+        15198738304U, 15311732096U, 15424740736U, 15537739904U, 15650741632U,
+        15763741568U, 15876737152U, 15989741696U, 16102740608U, 16215741056U,
+        16328741248U, 16441740416U, 16554737792U, 16667740288U, 16780740992U,
+        16893738112U, 17006741632U, 17119739008U, 17232735616U, 17345739392U,
+        17458740352U, 17571736192U, 17684739712U, 17797739392U, 17910740096U,
+        18023741312U, 18136740736U, 18249738112U, 18362738816U, 18475735424U,
+        18588740224U, 18701738368U, 18814736768U, 18927737216U, 19040739968U,
+        19153739648U, 19266736768U, 19379737984U, 19492739456U, 19605738368U,
+        19718740352U, 19831741312U, 19944736384U, 20057741696U, 20170741376U,
+        20283741824U, 20396737408U, 20509741696U, 20622741376U, 20735739008U,
+        20848741504U, 20961740672U, 21074739328U, 21187740032U, 21300739456U,
+        21413741696U, 21526740608U, 21639741824U, 21752737408U, 21865741696U,
+        21978741376U, 22091741824U, 22204738432U, 22317740672U, 22430740096U,
+        22543736704U, 22656741248U, 22769739904U, 22882739584U, 22995740288U,
+        23108740736U, 23221740928U, 23334741376U, 23447737216U, 23560740992U,
+        23673741184U, 23786740864U, 23899737728U, 24012741248U, 24125734784U,
+        24238736512U, 24351741824U, 24464740736U, 24577737088U, 24690741632U,
+        24803739776U, 24916740736U, 25029740416U, 25142740864U, 25255741568U,
+        25368741248U, 25481740672U, 25594741376U, 25707741568U, 25820741504U,
+        25933730432U, 26046739072U, 26159741824U, 26272741504U, 26385740672U,
+        26498740096U, 26611741568U, 26724740992U, 26837739904U, 26950735232U,
+        27063738496U, 27176741248U, 27289741184U, 27402740864U, 27515740544U,
+        27628737152U, 27741740672U, 27854741632U, 27967740544U, 28080739712U,
+        28193738368U, 28306741376U, 28419737728U, 28532739968U, 28645739648U,
+        28758740096U, 28871741312U, 28984739456U, 29097740416U, 29210740864U,
+        29323741312U, 29436740224U, 29549741696U, 29662738304U, 29775741568U,
+        29888741504U, 30001740928U, 30114737024U, 30227735168U, 30340737664U,
+        30453738368U, 30566737024U, 30679733632U, 30792740224U, 30905740928U,
+        31018740352U, 31131740032U, 31244738944U, 31357737344U, 31470741376U,
+        31583740544U, 31696740224U, 31809738112U, 31922739328U, 32035737472U,
+        32148740992U, 32261741696U, 32374740352U, 32487741824U, 32600740736U,
+        32713739648U, 32826740608U, 32939729792U, 33052740992U, 33165740672U,
+        33278739584U, 33391741312U, 33504739712U, 33617740928U, 33730740608U,
+        33843738496U, 33956739968U, 34069741696U, 34182739328U, 34295741824U,
+        34408739968U, 34521740672U, 34634736512U, 34747741568U, 34860741248U,
+        34973739392U, 35086738304U, 35199741056U, 35312736896U, 35425741184U,
+        35538741376U, 35651740288U, 35764737152U, 35877741184U, 35990739584U,
+        36103740544U, 36216740992U, 36329739392U, 36442737536U, 36555741568U,
+        36668740736U, 36781741184U, 36894737024U, 37007741312U, 37120739456U,
+        37233741184U, 37346736256U, 37459736192U, 37572734336U, 37685739904U,
+        37798740352U, 37911737728U, 38024741504U, 38137739648U, 38250740608U,
+        38363741824U, 38476740992U, 38589741184U, 38702740096U, 38815741312U,
+        38928741248U, 39041738368U, 39154739584U, 39267741824U, 39380739712U,
+        39493735808U, 39606741632U, 39719741312U, 39832741504U, 39945739648U,
+        40058740352U, 40171740032U, 40284740992U, 40397740672U, 40510740352U,
+        40623740288U, 40736738176U, 40849737856U, 40962741376U, 41075739776U,
+        41188737664U, 41301735808U, 41414738048U, 41527741312U, 41640740992U,
+        41753739904U, 41866739072U, 41979738496U, 42092740736U, 42205739648U,
+        42318740608U, 42431741312U, 42544738688U, 42657741184U, 42770738048U,
+        42883741568U, 42996741248U, 43109740928U, 43222736512U, 43335741056U,
+        43448730496U, 43561740416U, 43674741632U, 43787740544U, 43900741504U,
+        44013739648U, 44126740864U, 44239740544U, 44352741248U, 44465738368U,
+        44578735232U, 44691739264U, 44804741504U, 44917741696U, 45030741376U,
+        45143741824U, 45256740992U, 45369739136U, 45482740096U, 45595739776U,
+        45708739712U, 45821740672U, 45934741376U, 46047741056U, 46160741248U,
+        46273737088U, 46386740864U, 46499739008U, 46612739968U, 46725735296U,
+        46838740864U, 46951741568U, 47064737152U, 47177741696U, 47290741376U,
+        47403738752U, 47516741248U, 47629739648U, 47742741632U, 47855737984U,
+        47968740224U, 48081738368U, 48194741632U, 48307739264U, 48420739712U,
+        48533739136U, 48646738304U, 48759741824U, 48872741504U, 48985739392U,
+        49098741376U, 49211741056U, 49324740992U, 49437738368U, 49550740864U,
+        49663735424U, 49776737408U, 49889740672U, 50002738816U, 50115738752U,
+        50228739712U, 50341741696U, 50454736768U, 50567738752U, 50680739968U,
+        50793736832U, 50906734976U, 51019741568U, 51132739456U, 51245741696U,
+        51358741376U, 51471741056U, 51584738944U, 51697734272U, 51810739072U,
+        51923736448U, 52036740736U, 52149741184U, 52262737024U, 52375738496U,
+        52488740992U, 52601739136U, 52714740352U, 52827736448U, 52940738176U,
+        53053741696U, 53166740864U, 53279741824U, 53392741504U, 53505739136U,
+        53618739584U, 53731741312U, 53844741248U, 53957741696U, 54070741376U,
+        54183740288U, 54296741504U, 54409741696U, 54522739072U, 54635737472U,
+        54748741504U, 54861736064U, 54974740096U, 55087741568U, 55200733568U,
+        55313741696U, 55426734464U, 55539741056U, 55652741504U, 55765741184U,
+        55878741376U, 55991730304U, 56104740992U, 56217740672U, 56330731648U,
+        56443737472U, 56556724352U, 56669740672U, 56782739072U, 56895740032U,
+        57008741248U, 57121741696U, 57234740096U, 57347741312U, 57460741504U
+};
+
+// 500 Epochs worth of tabulated DAG sizes (~3.5 Years)
+
+// Generated with the following Mathematica Code:
+// GetCacheSizes[n_] := Module[{
+//        DAGSizeBytesInit = 2^30,
+//        MixBytes = 128,
+//        DAGGrowth = 113000000,
+//        HashBytes = 64,
+//        DAGParents = 1024,
+//        j = 0},
+//    Reap[
+//      While[j < n,
+//       Module[{i = Floor[(DAGSizeBytesInit + DAGGrowth * j) / (DAGParents * HashBytes)]},
+//        While[! PrimeQ[i], i--];
+//        Sow[i*HashBytes]; j++]]]][[2]][[1]]
+
+const size_t cache_sizes[] = {
+        1048384U, 1158208U, 1268416U, 1377856U, 1489856U, 1599296U, 1710656U,
+        1820608U, 1930816U, 2041024U, 2151872U, 2261696U, 2371904U, 2482624U,
+        2593216U, 2703296U, 2814016U, 2924224U, 3034816U, 3144896U, 3255488U,
+        3365312U, 3475904U, 3586624U, 3696064U, 3806272U, 3917504U, 4027456U,
+        4138304U, 4248512U, 4359104U, 4469312U, 4579264U, 4689728U, 4797376U,
+        4909888U, 5020096U, 5131328U, 5241664U, 5351744U, 5461312U, 5572544U,
+        5683264U, 5793472U, 5903552U, 6014144U, 6121664U, 6235072U, 6344896U,
+        6454592U, 6565952U, 6675904U, 6786112U, 6896704U, 7006784U, 7117888U,
+        7228096U, 7338304U, 7448768U, 7557952U, 7669184U, 7779776U, 7889216U,
+        8000192U, 8110912U, 8220736U, 8331712U, 8441536U, 8552384U, 8662592U,
+        8772928U, 8883136U, 8993728U, 9103168U, 9214528U, 9323968U, 9434816U,
+        9545152U, 9655616U, 9766336U, 9876544U, 9986624U, 10097344U, 10207424U,
+        10316864U, 10427968U, 10538432U, 10649152U, 10758976U, 10869568U, 10979776U,
+        11089472U, 11200832U, 11309632U, 11420608U, 11531584U, 11641792U, 11751104U,
+        11862976U, 11973184U, 12083264U, 12193856U, 12304064U, 12414656U, 12524608U,
+        12635072U, 12745792U, 12855616U, 12965824U, 13076416U, 13187008U, 13297216U,
+        13407808U, 13518016U, 13627072U, 13738688U, 13848256U, 13959488U, 14069696U,
+        14180288U, 14290624U, 14399552U, 14511424U, 14621504U, 14732096U, 14841664U,
+        14951744U, 15062336U, 15172672U, 15283264U, 15393088U, 15504448U, 15614272U,
+        15723712U, 15834944U, 15945152U, 16055744U, 16165696U, 16277056U, 16387136U,
+        16494784U, 16607936U, 16718272U, 16828736U, 16938176U, 17048384U, 17159872U,
+        17266624U, 17380544U, 17490496U, 17600192U, 17711296U, 17821376U, 17931968U,
+        18041152U, 18152896U, 18261952U, 18373568U, 18483392U, 18594112U, 18703936U,
+        18814912U, 18924992U, 19034944U, 19145408U, 19256128U, 19366208U, 19477184U,
+        19587136U, 19696576U, 19808192U, 19916992U, 20028352U, 20137664U, 20249024U,
+        20358848U, 20470336U, 20580544U, 20689472U, 20801344U, 20911424U, 21020096U,
+        21130688U, 21242176U, 21352384U, 21462208U, 21573824U, 21683392U, 21794624U,
+        21904448U, 22013632U, 22125248U, 22235968U, 22344512U, 22456768U, 22566848U,
+        22677056U, 22786496U, 22897984U, 23008064U, 23118272U, 23228992U, 23338816U,
+        23449408U, 23560256U, 23670464U, 23780672U, 23891264U, 24001216U, 24110656U,
+        24221888U, 24332608U, 24442688U, 24552512U, 24662464U, 24773696U, 24884032U,
+        24994496U, 25105216U, 25215296U, 25324864U, 25435712U, 25546432U, 25655744U,
+        25767232U, 25876672U, 25986368U, 26098112U, 26207936U, 26318912U, 26428736U,
+        26539712U, 26650048U, 26760256U, 26869184U, 26979776U, 27091136U, 27201728U,
+        27311552U, 27422272U, 27532352U, 27642304U, 27752896U, 27863744U, 27973952U,
+        28082752U, 28194752U, 28305344U, 28415168U, 28524992U, 28636352U, 28746304U,
+        28857152U, 28967104U, 29077184U, 29187904U, 29298496U, 29408576U, 29518912U,
+        29628992U, 29739968U, 29850176U, 29960512U, 30070336U, 30180544U, 30290752U,
+        30398912U, 30512192U, 30622784U, 30732992U, 30842176U, 30953536U, 31063744U,
+        31174336U, 31284544U, 31395136U, 31504448U, 31615552U, 31725632U, 31835072U,
+        31946176U, 32057024U, 32167232U, 32277568U, 32387008U, 32497984U, 32608832U,
+        32719168U, 32829376U, 32939584U, 33050048U, 33160768U, 33271232U, 33381184U,
+        33491648U, 33601856U, 33712576U, 33822016U, 33932992U, 34042816U, 34153024U,
+        34263104U, 34373824U, 34485056U, 34594624U, 34704832U, 34816064U, 34926272U,
+        35036224U, 35146816U, 35255104U, 35367104U, 35478208U, 35588416U, 35698496U,
+        35808832U, 35918656U, 36029888U, 36139456U, 36250688U, 36360512U, 36471104U,
+        36581696U, 36691136U, 36802112U, 36912448U, 37022912U, 37132864U, 37242944U,
+        37354048U, 37464512U, 37574848U, 37684928U, 37794752U, 37904704U, 38015552U,
+        38125888U, 38236864U, 38345792U, 38457152U, 38567744U, 38678336U, 38787776U,
+        38897216U, 39009088U, 39117632U, 39230144U, 39340352U, 39450304U, 39560384U,
+        39671488U, 39781312U, 39891392U, 40002112U, 40112704U, 40223168U, 40332608U,
+        40443968U, 40553792U, 40664768U, 40774208U, 40884416U, 40993984U, 41105984U,
+        41215424U, 41326528U, 41436992U, 41546048U, 41655872U, 41768128U, 41878336U,
+        41988928U, 42098752U, 42209344U, 42319168U, 42429248U, 42540352U, 42649792U,
+        42761024U, 42871616U, 42981824U, 43092032U, 43201856U, 43312832U, 43423552U,
+        43533632U, 43643584U, 43753792U, 43864384U, 43974976U, 44084032U, 44195392U,
+        44306368U, 44415296U, 44526016U, 44637248U, 44746816U, 44858048U, 44967872U,
+        45078848U, 45188288U, 45299264U, 45409216U, 45518272U, 45630272U, 45740224U,
+        45850432U, 45960896U, 46069696U, 46182208U, 46292416U, 46402624U, 46512064U,
+        46623296U, 46733888U, 46843712U, 46953664U, 47065024U, 47175104U, 47285696U,
+        47395904U, 47506496U, 47615296U, 47726912U, 47837632U, 47947712U, 48055232U,
+        48168128U, 48277952U, 48387392U, 48499648U, 48609472U, 48720064U, 48830272U,
+        48940096U, 49050944U, 49160896U, 49271744U, 49381568U, 49492288U, 49602752U,
+        49712576U, 49822016U, 49934272U, 50042816U, 50154304U, 50264128U, 50374336U,
+        50484416U, 50596288U, 50706752U, 50816704U, 50927168U, 51035456U, 51146944U,
+        51258176U, 51366976U, 51477824U, 51589568U, 51699776U, 51809728U, 51920576U,
+        52030016U, 52140736U, 52251328U, 52361152U, 52470592U, 52582592U, 52691776U,
+        52803136U, 52912576U, 53020736U, 53132224U, 53242688U, 53354816U, 53465536U,
+        53575232U, 53685568U, 53796544U, 53906752U, 54016832U, 54126656U, 54236992U,
+        54347456U, 54457408U, 54569024U, 54679232U, 54789184U, 54899776U, 55008832U,
+        55119296U, 55231168U, 55341248U, 55451584U, 55562048U, 55672256U, 55782208U,
+        55893184U, 56002112U, 56113216U
+};
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h
new file mode 100644
index 0000000000..9ca842e47f
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/endian.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#include <stdint.h>
+#include "compiler.h"
+
+static const uint8_t BitReverseTable256[] =
+        {
+                0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
+                0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
+                0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
+                0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
+                0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
+                0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
+                0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
+                0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
+                0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
+                0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
+                0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
+                0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
+                0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
+                0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
+                0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
+                0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
+        };
+
+static inline uint32_t bitfn_swap32(uint32_t a) {
+    return (BitReverseTable256[a & 0xff] << 24) |
+            (BitReverseTable256[(a >> 8) & 0xff] << 16) |
+            (BitReverseTable256[(a >> 16) & 0xff] << 8) |
+            (BitReverseTable256[(a >> 24) & 0xff]);
+}
+
+static inline uint64_t bitfn_swap64(uint64_t a) {
+    return ((uint64_t) bitfn_swap32((uint32_t) (a >> 32))) |
+            (((uint64_t) bitfn_swap32((uint32_t) a)) << 32);
+}
+
+#if defined(__MINGW32__) || defined(_WIN32)
+  # define LITTLE_ENDIAN 1234
+  # define BYTE_ORDER    LITTLE_ENDIAN
+#elif defined(__FreeBSD__) || defined(__DragonFly__) || defined(__NetBSD__)
+  # include <sys/endian.h>
+#elif defined(__OpenBSD__) || defined(__SVR4)
+  # include <sys/types.h>
+#elif defined(__APPLE__)
+# include <machine/endian.h>
+#elif defined( BSD ) && (BSD >= 199103)
+  # include <machine/endian.h>
+#elif defined( __QNXNTO__ ) && defined( __LITTLEENDIAN__ )
+  # define LITTLE_ENDIAN 1234
+  # define BYTE_ORDER    LITTLE_ENDIAN
+#elif defined( __QNXNTO__ ) && defined( __BIGENDIAN__ )
+  # define BIG_ENDIAN 1234
+  # define BYTE_ORDER    BIG_ENDIAN
+#else
+
+# include <endian.h>
+
+#endif
+
+
+#if LITTLE_ENDIAN == BYTE_ORDER
+
+#define fix_endian32(x) (x)
+#define fix_endian64(x) (x)
+
+#elif BIG_ENDIAN == BYTE_ORDER
+
+#define fix_endian32(x) bitfn_swap32(x)
+#define fix_endian64(x) bitfn_swap64(x)
+
+#else
+# error "endian not supported"
+#endif // BYTE_ORDER
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h
new file mode 100644
index 0000000000..62edd0082c
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/ethash.h
@@ -0,0 +1,88 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file ethash.h
+* @date 2015
+*/
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <string.h>
+#include <stddef.h>
+#include "compiler.h"
+
+#define REVISION 18
+#define DAGSIZE_BYTES_INIT 1073741824U
+#define DAG_GROWTH 113000000U
+#define EPOCH_LENGTH 30000U
+#define MIX_BYTES 128
+#define DAG_PARENTS 256
+#define CACHE_ROUNDS 3
+#define ACCESSES 64
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+typedef struct ethash_params {
+    size_t full_size;               // Size of full data set (in bytes, multiple of mix size (128)).
+    size_t cache_size;              // Size of compute cache (in bytes, multiple of node size (64)).
+} ethash_params;
+
+typedef struct ethash_return_value {
+    uint8_t result[32];
+    uint8_t mix_hash[32];
+} ethash_return_value;
+
+size_t const ethash_get_datasize(const uint32_t block_number);
+size_t const ethash_get_cachesize(const uint32_t block_number);
+
+// initialize the parameters
+static inline void ethash_params_init(ethash_params *params, const uint32_t block_number) {
+    params->full_size = ethash_get_datasize(block_number);
+    params->cache_size = ethash_get_cachesize(block_number);
+}
+
+typedef struct ethash_cache {
+    void *mem;
+} ethash_cache;
+
+void ethash_mkcache(ethash_cache *cache, ethash_params const *params, const uint8_t seed[32]);
+void ethash_compute_full_data(void *mem, ethash_params const *params, ethash_cache const *cache);
+void ethash_full(ethash_return_value *ret, void const *full_mem, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce);
+void ethash_light(ethash_return_value *ret, ethash_cache const *cache, ethash_params const *params, const uint8_t header_hash[32], const uint64_t nonce);
+
+static inline int ethash_check_difficulty(
+        const uint8_t hash[32],
+        const uint8_t difficulty[32]) {
+    // Difficulty is big endian
+    for (int i = 0; i < 32; i++) {
+        if (hash[i] == difficulty[i]) continue;
+        return hash[i] < difficulty[i];
+    }
+    return 0;
+}
+
+int ethash_quick_check_difficulty(
+        const uint8_t header_hash[32],
+        const uint64_t nonce,
+        const uint8_t mix_hash[32],
+        const uint8_t difficulty[32]);
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h
new file mode 100644
index 0000000000..edabeaae2e
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/fnv.h
@@ -0,0 +1,38 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file fnv.h
+* @author Matthew Wampler-Doty <negacthulhu@gmail.com>
+* @date 2015
+*/
+
+#pragma once
+#include <stdint.h>
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define FNV_PRIME 0x01000193
+
+static inline uint32_t fnv_hash(const uint32_t x, const uint32_t y) {
+	return x*FNV_PRIME ^ y;
+}
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c
new file mode 100644
index 0000000000..cc48717d09
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.c
@@ -0,0 +1,297 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file dash.cpp
+* @author Tim Hughes <tim@twistedfury.com>
+* @author Matthew Wampler-Doty
+* @date 2015
+*/
+
+#include <assert.h>
+#include <inttypes.h>
+#include <stddef.h>
+#include "ethash.h"
+#include "fnv.h"
+#include "endian.h"
+#include "internal.h"
+#include "data_sizes.h"
+
+#ifdef WITH_CRYPTOPP
+
+#include "SHA3_cryptopp.h"
+
+#else
+#include "sha3.h"
+#endif // WITH_CRYPTOPP
+
+size_t const ethash_get_datasize(const uint32_t block_number) {
+    assert(block_number / EPOCH_LENGTH < 500);
+    return dag_sizes[block_number / EPOCH_LENGTH];
+}
+
+size_t const ethash_get_cachesize(const uint32_t block_number) {
+    assert(block_number / EPOCH_LENGTH < 500);
+    return cache_sizes[block_number / EPOCH_LENGTH];
+}
+
+// Follows Sergio's "STRICT MEMORY HARD HASHING FUNCTIONS" (2014)
+// https://bitslog.files.wordpress.com/2013/12/memohash-v0-3.pdf
+// SeqMemoHash(s, R, N)
+void static ethash_compute_cache_nodes(
+        node *const nodes,
+        ethash_params const *params,
+        const uint8_t seed[32]) {
+    assert((params->cache_size % sizeof(node)) == 0);
+    uint32_t const num_nodes = (uint32_t)(params->cache_size / sizeof(node));
+
+    SHA3_512(nodes[0].bytes, seed, 32);
+
+    for (unsigned i = 1; i != num_nodes; ++i) {
+        SHA3_512(nodes[i].bytes, nodes[i - 1].bytes, 64);
+    }
+
+    for (unsigned j = 0; j != CACHE_ROUNDS; j++) {
+        for (unsigned i = 0; i != num_nodes; i++) {
+            uint32_t const idx = nodes[i].words[0] % num_nodes;
+            node data;
+            data = nodes[(num_nodes - 1 + i) % num_nodes];
+			for (unsigned w = 0; w != NODE_WORDS; ++w)
+			{
+				data.words[w] ^= nodes[idx].words[w];
+			}
+            SHA3_512(nodes[i].bytes, data.bytes, sizeof(data));
+        }
+    }
+
+    // now perform endian conversion
+#if BYTE_ORDER != LITTLE_ENDIAN
+    for (unsigned w = 0; w != (num_nodes*NODE_WORDS); ++w)
+    {
+        nodes->words[w] = fix_endian32(nodes->words[w]);
+    }
+#endif
+}
+
+void ethash_mkcache(
+  ethash_cache *cache,
+        ethash_params const *params,
+        const uint8_t seed[32]) {
+    node *nodes = (node *) cache->mem;
+    ethash_compute_cache_nodes(nodes, params, seed);
+}
+
+void ethash_calculate_dag_item(
+        node *const ret,
+        const unsigned node_index,
+        const struct ethash_params *params,
+        const struct ethash_cache *cache) {
+
+    uint32_t num_parent_nodes = (uint32_t)(params->cache_size / sizeof(node));
+    node const *cache_nodes = (node const *) cache->mem;
+    node const *init = &cache_nodes[node_index % num_parent_nodes];
+
+	memcpy(ret, init, sizeof(node));
+	ret->words[0] ^= node_index;
+	SHA3_512(ret->bytes, ret->bytes, sizeof(node));
+
+#if defined(_M_X64) && ENABLE_SSE
+    __m128i const fnv_prime = _mm_set1_epi32(FNV_PRIME);
+    __m128i xmm0 = ret->xmm[0];
+    __m128i xmm1 = ret->xmm[1];
+    __m128i xmm2 = ret->xmm[2];
+    __m128i xmm3 = ret->xmm[3];
+#endif
+
+    for (unsigned i = 0; i != DAG_PARENTS; ++i)
+	{
+        uint32_t parent_index = ((node_index ^ i)*FNV_PRIME ^ ret->words[i % NODE_WORDS]) % num_parent_nodes;
+        node const *parent = &cache_nodes[parent_index];
+
+		#if defined(_M_X64) && ENABLE_SSE
+        {
+            xmm0 = _mm_mullo_epi32(xmm0, fnv_prime);
+            xmm1 = _mm_mullo_epi32(xmm1, fnv_prime);
+            xmm2 = _mm_mullo_epi32(xmm2, fnv_prime);
+            xmm3 = _mm_mullo_epi32(xmm3, fnv_prime);
+            xmm0 = _mm_xor_si128(xmm0, parent->xmm[0]);
+            xmm1 = _mm_xor_si128(xmm1, parent->xmm[1]);
+            xmm2 = _mm_xor_si128(xmm2, parent->xmm[2]);
+            xmm3 = _mm_xor_si128(xmm3, parent->xmm[3]);
+
+            // have to write to ret as values are used to compute index
+            ret->xmm[0] = xmm0;
+            ret->xmm[1] = xmm1;
+            ret->xmm[2] = xmm2;
+            ret->xmm[3] = xmm3;
+        }
+        #else
+        {
+            for (unsigned w = 0; w != NODE_WORDS; ++w) {
+                ret->words[w] = fnv_hash(ret->words[w], parent->words[w]);
+            }
+        }
+		#endif
+    }
+
+	SHA3_512(ret->bytes, ret->bytes, sizeof(node));
+}
+
+void ethash_compute_full_data(
+        void *mem,
+        ethash_params const *params,
+        ethash_cache const *cache) {
+    assert((params->full_size % (sizeof(uint32_t) * MIX_WORDS)) == 0);
+    assert((params->full_size % sizeof(node)) == 0);
+    node *full_nodes = mem;
+
+    // now compute full nodes
+    for (unsigned n = 0; n != (params->full_size / sizeof(node)); ++n) {
+        ethash_calculate_dag_item(&(full_nodes[n]), n, params, cache);
+    }
+}
+
+static void ethash_hash(
+        ethash_return_value * ret,
+        node const *full_nodes,
+        ethash_cache const *cache,
+        ethash_params const *params,
+        const uint8_t header_hash[32],
+        const uint64_t nonce) {
+
+    assert((params->full_size % MIX_WORDS) == 0);
+
+    // pack hash and nonce together into first 40 bytes of s_mix
+    assert(sizeof(node)*8 == 512);
+    node s_mix[MIX_NODES + 1];
+    memcpy(s_mix[0].bytes, header_hash, 32);
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+    s_mix[0].double_words[4] = fix_endian64(nonce);
+#else
+    s_mix[0].double_words[4] = nonce;
+#endif
+
+    // compute sha3-512 hash and replicate across mix
+    SHA3_512(s_mix->bytes, s_mix->bytes, 40);
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+    for (unsigned w = 0; w != 16; ++w) {
+        s_mix[0].words[w] = fix_endian32(s_mix[0].words[w]);
+    }
+#endif
+
+    node* const mix = s_mix + 1;
+    for (unsigned w = 0; w != MIX_WORDS; ++w) {
+        mix->words[w] = s_mix[0].words[w % NODE_WORDS];
+    }
+
+    unsigned const
+            page_size = sizeof(uint32_t) * MIX_WORDS,
+            num_full_pages = (unsigned)(params->full_size / page_size);
+
+
+    for (unsigned i = 0; i != ACCESSES; ++i)
+	{
+        uint32_t const index = ((s_mix->words[0] ^ i)*FNV_PRIME ^ mix->words[i % MIX_WORDS]) % num_full_pages;
+
+        for (unsigned n = 0; n != MIX_NODES; ++n)
+		{
+            const node * dag_node = &full_nodes[MIX_NODES * index + n];
+
+            if (!full_nodes) {
+                node tmp_node;
+                ethash_calculate_dag_item(&tmp_node, index * MIX_NODES + n, params, cache);
+                dag_node = &tmp_node;
+            }
+
+			#if defined(_M_X64) && ENABLE_SSE
+            {
+                __m128i fnv_prime = _mm_set1_epi32(FNV_PRIME);
+                __m128i xmm0 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[0]);
+                __m128i xmm1 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[1]);
+                __m128i xmm2 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[2]);
+                __m128i xmm3 = _mm_mullo_epi32(fnv_prime, mix[n].xmm[3]);
+                mix[n].xmm[0] = _mm_xor_si128(xmm0, dag_node->xmm[0]);
+                mix[n].xmm[1] = _mm_xor_si128(xmm1, dag_node->xmm[1]);
+                mix[n].xmm[2] = _mm_xor_si128(xmm2, dag_node->xmm[2]);
+                mix[n].xmm[3] = _mm_xor_si128(xmm3, dag_node->xmm[3]);
+            }
+            #else
+            {
+                for (unsigned w = 0; w != NODE_WORDS; ++w) {
+                    mix[n].words[w] = fnv_hash(mix[n].words[w], dag_node->words[w]);
+                }
+            }
+            #endif
+        }
+
+    }
+
+	// compress mix
+	for (unsigned w = 0; w != MIX_WORDS; w += 4)
+	{
+		uint32_t reduction = mix->words[w+0];
+		reduction = reduction*FNV_PRIME ^ mix->words[w+1];
+		reduction = reduction*FNV_PRIME ^ mix->words[w+2];
+		reduction = reduction*FNV_PRIME ^ mix->words[w+3];
+		mix->words[w/4] = reduction;
+	}
+
+#if BYTE_ORDER != LITTLE_ENDIAN
+    for (unsigned w = 0; w != MIX_WORDS/4; ++w) {
+        mix->words[w] = fix_endian32(mix->words[w]);
+    }
+#endif
+
+    memcpy(ret->mix_hash, mix->bytes, 32);
+    // final Keccak hash
+    SHA3_256(ret->result, s_mix->bytes, 64+32);	// Keccak-256(s + compressed_mix)
+}
+
+void ethash_quick_hash(
+        uint8_t return_hash[32],
+        const uint8_t header_hash[32],
+        const uint64_t nonce,
+        const uint8_t mix_hash[32]) {
+
+    uint8_t buf[64+32];
+    memcpy(buf, header_hash, 32);
+#if BYTE_ORDER != LITTLE_ENDIAN
+    nonce = fix_endian64(nonce);
+#endif
+    memcpy(&(buf[32]), &nonce, 8);
+    SHA3_512(buf, buf, 40);
+    memcpy(&(buf[64]), mix_hash, 32);
+    SHA3_256(return_hash, buf, 64+32);
+}
+
+int ethash_quick_check_difficulty(
+        const uint8_t header_hash[32],
+        const uint64_t nonce,
+        const uint8_t mix_hash[32],
+        const uint8_t difficulty[32]) {
+    uint8_t return_hash[32];
+    ethash_quick_hash(return_hash, header_hash, nonce, mix_hash);
+    return ethash_check_difficulty(return_hash, difficulty);
+}
+
+void ethash_full(ethash_return_value * ret, void const *full_mem, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) {
+    ethash_hash(ret, (node const *) full_mem, NULL, params, previous_hash, nonce);
+}
+
+void ethash_light(ethash_return_value * ret, ethash_cache const *cache, ethash_params const *params, const uint8_t previous_hash[32], const uint64_t nonce) {
+    ethash_hash(ret, NULL, cache, params, previous_hash, nonce);
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h
new file mode 100644
index 0000000000..bcbacdaa49
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/internal.h
@@ -0,0 +1,48 @@
+#pragma once
+#include "compiler.h"
+#include "endian.h"
+#include "ethash.h"
+
+#define ENABLE_SSE 1
+
+#if defined(_M_X64) && ENABLE_SSE
+#include <smmintrin.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+// compile time settings
+#define NODE_WORDS (64/4)
+#define MIX_WORDS (MIX_BYTES/4)
+#define MIX_NODES (MIX_WORDS / NODE_WORDS)
+#include <stdint.h>
+
+typedef union node {
+    uint8_t bytes[NODE_WORDS * 4];
+    uint32_t words[NODE_WORDS];
+    uint64_t double_words[NODE_WORDS / 2];
+
+#if defined(_M_X64) && ENABLE_SSE
+	__m128i xmm[NODE_WORDS/4];
+#endif
+
+} node;
+
+void ethash_calculate_dag_item(
+        node *const ret,
+        const unsigned node_index,
+        ethash_params const *params,
+        ethash_cache const *cache
+);
+
+void ethash_quick_hash(
+        uint8_t return_hash[32],
+        const uint8_t header_hash[32],
+        const uint64_t nonce,
+        const uint8_t mix_hash[32]);
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c
new file mode 100644
index 0000000000..0c28230b86
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.c
@@ -0,0 +1,151 @@
+/** libkeccak-tiny
+*
+* A single-file implementation of SHA-3 and SHAKE.
+*
+* Implementor: David Leon Gil
+* License: CC0, attribution kindly requested. Blame taken too,
+* but not liability.
+*/
+#include "sha3.h"
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+/******** The Keccak-f[1600] permutation ********/
+
+/*** Constants. ***/
+static const uint8_t rho[24] = \
+  { 1,  3,   6, 10, 15, 21,
+        28, 36, 45, 55,  2, 14,
+        27, 41, 56,  8, 25, 43,
+        62, 18, 39, 61, 20, 44};
+static const uint8_t pi[24] = \
+  {10,  7, 11, 17, 18, 3,
+        5, 16,  8, 21, 24, 4,
+        15, 23, 19, 13, 12, 2,
+        20, 14, 22,  9, 6,  1};
+static const uint64_t RC[24] = \
+  {1ULL, 0x8082ULL, 0x800000000000808aULL, 0x8000000080008000ULL,
+        0x808bULL, 0x80000001ULL, 0x8000000080008081ULL, 0x8000000000008009ULL,
+        0x8aULL, 0x88ULL, 0x80008009ULL, 0x8000000aULL,
+        0x8000808bULL, 0x800000000000008bULL, 0x8000000000008089ULL, 0x8000000000008003ULL,
+        0x8000000000008002ULL, 0x8000000000000080ULL, 0x800aULL, 0x800000008000000aULL,
+        0x8000000080008081ULL, 0x8000000000008080ULL, 0x80000001ULL, 0x8000000080008008ULL};
+
+/*** Helper macros to unroll the permutation. ***/
+#define rol(x, s) (((x) << s) | ((x) >> (64 - s)))
+#define REPEAT6(e) e e e e e e
+#define REPEAT24(e) REPEAT6(e e e e)
+#define REPEAT5(e) e e e e e
+#define FOR5(v, s, e) \
+  v = 0;            \
+  REPEAT5(e; v += s;)
+
+/*** Keccak-f[1600] ***/
+static inline void keccakf(void* state) {
+    uint64_t* a = (uint64_t*)state;
+    uint64_t b[5] = {0};
+    uint64_t t = 0;
+    uint8_t x, y;
+
+    for (int i = 0; i < 24; i++) {
+        // Theta
+        FOR5(x, 1,
+                b[x] = 0;
+                FOR5(y, 5,
+                        b[x] ^= a[x + y]; ))
+        FOR5(x, 1,
+                FOR5(y, 5,
+                        a[y + x] ^= b[(x + 4) % 5] ^ rol(b[(x + 1) % 5], 1); ))
+        // Rho and pi
+        t = a[1];
+        x = 0;
+        REPEAT24(b[0] = a[pi[x]];
+                a[pi[x]] = rol(t, rho[x]);
+                t = b[0];
+                x++; )
+        // Chi
+        FOR5(y,
+                5,
+                FOR5(x, 1,
+                        b[x] = a[y + x];)
+                FOR5(x, 1,
+                a[y + x] = b[x] ^ ((~b[(x + 1) % 5]) & b[(x + 2) % 5]); ))
+        // Iota
+        a[0] ^= RC[i];
+    }
+}
+
+/******** The FIPS202-defined functions. ********/
+
+/*** Some helper macros. ***/
+
+#define _(S) do { S } while (0)
+#define FOR(i, ST, L, S) \
+  _(for (size_t i = 0; i < L; i += ST) { S; })
+#define mkapply_ds(NAME, S)                                          \
+  static inline void NAME(uint8_t* dst,                              \
+                          const uint8_t* src,                        \
+                          size_t len) {                              \
+    FOR(i, 1, len, S);                                               \
+  }
+#define mkapply_sd(NAME, S)                                          \
+  static inline void NAME(const uint8_t* src,                        \
+                          uint8_t* dst,                              \
+                          size_t len) {                              \
+    FOR(i, 1, len, S);                                               \
+  }
+
+mkapply_ds(xorin, dst[i] ^= src[i])  // xorin
+mkapply_sd(setout, dst[i] = src[i])  // setout
+
+#define P keccakf
+#define Plen 200
+
+// Fold P*F over the full blocks of an input.
+#define foldP(I, L, F) \
+  while (L >= rate) {  \
+    F(a, I, rate);     \
+    P(a);              \
+    I += rate;         \
+    L -= rate;         \
+  }
+
+/** The sponge-based hash construction. **/
+static inline int hash(uint8_t* out, size_t outlen,
+        const uint8_t* in, size_t inlen,
+        size_t rate, uint8_t delim) {
+    if ((out == NULL) || ((in == NULL) && inlen != 0) || (rate >= Plen)) {
+        return -1;
+    }
+    uint8_t a[Plen] = {0};
+    // Absorb input.
+    foldP(in, inlen, xorin);
+    // Xor in the DS and pad frame.
+    a[inlen] ^= delim;
+    a[rate - 1] ^= 0x80;
+    // Xor in the last block.
+    xorin(a, in, inlen);
+    // Apply P
+    P(a);
+    // Squeeze output.
+    foldP(out, outlen, setout);
+    setout(a, out, outlen);
+    memset(a, 0, 200);
+    return 0;
+}
+
+#define defsha3(bits)                                             \
+  int sha3_##bits(uint8_t* out, size_t outlen,                    \
+                  const uint8_t* in, size_t inlen) {              \
+    if (outlen > (bits/8)) {                                      \
+      return -1;                                                  \
+    }                                                             \
+    return hash(out, outlen, in, inlen, 200 - (bits / 4), 0x01);  \
+  }
+
+/*** FIPS202 SHA3 FOFs ***/
+defsha3(256)
+defsha3(512)
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h
new file mode 100644
index 0000000000..36a0a53010
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3.h
@@ -0,0 +1,27 @@
+#pragma once
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include "compiler.h"
+#include <stdint.h>
+#include <stdlib.h>
+
+#define decsha3(bits) \
+  int sha3_##bits(uint8_t*, size_t, const uint8_t*, size_t);
+
+decsha3(256)
+decsha3(512)
+
+static inline void SHA3_256(uint8_t * const ret, uint8_t const *data, const size_t size) {
+    sha3_256(ret, 32, data, size);
+}
+
+static inline void SHA3_512(uint8_t * const ret, uint8_t const *data, const size_t size) {
+    sha3_512(ret, 64, data, size);
+}
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp
new file mode 100644
index 0000000000..9454ce04a2
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.cpp
@@ -0,0 +1,34 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+
+/** @file sha3.cpp
+* @author Tim Hughes <tim@twistedfury.com>
+* @date 2015
+*/
+
+#include <stdint.h>
+#include <cryptopp/sha3.h>
+
+extern "C" {
+void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size) {
+  CryptoPP::SHA3_256().CalculateDigest(ret, data, size);
+}
+
+void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size) {
+  CryptoPP::SHA3_512().CalculateDigest(ret, data, size);
+}
+}
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h
new file mode 100644
index 0000000000..f910960e1b
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/sha3_cryptopp.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "compiler.h"
+#include <stdint.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+void SHA3_256(uint8_t *const ret, const uint8_t *data, size_t size);
+void SHA3_512(uint8_t *const ret, const uint8_t *data, size_t size);
+
+#ifdef __cplusplus
+}
+#endif
\ No newline at end of file
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c
new file mode 100644
index 0000000000..fbf268b7d4
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.c
@@ -0,0 +1,41 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file util.c
+ * @author Tim Hughes <tim@twistedfury.com>
+ * @date 2015
+ */
+#include <stdarg.h>
+#include <stdio.h>
+#include "util.h"
+
+#ifdef _MSC_VER
+
+// foward declare without all of Windows.h
+__declspec(dllimport) void __stdcall OutputDebugStringA(const char* lpOutputString);
+
+void debugf(const char *str, ...)
+{
+	va_list args;
+    va_start(args, str);
+
+	char buf[1<<16];
+	_vsnprintf_s(buf, sizeof(buf), sizeof(buf), str, args);
+	buf[sizeof(buf)-1] = '\0';
+	OutputDebugStringA(buf);
+}
+
+#endif
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h
new file mode 100644
index 0000000000..2f59076f61
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/libethash/util.h
@@ -0,0 +1,47 @@
+/*
+  This file is part of cpp-ethereum.
+
+  cpp-ethereum is free software: you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation, either version 3 of the License, or
+  (at your option) any later version.
+
+  cpp-ethereum is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+
+  You should have received a copy of the GNU General Public License
+  along with cpp-ethereum.  If not, see <http://www.gnu.org/licenses/>.
+*/
+/** @file util.h
+ * @author Tim Hughes <tim@twistedfury.com>
+ * @date 2015
+ */
+#pragma once
+#include <stdint.h>
+#include "compiler.h"
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#ifdef _MSC_VER
+void debugf(const char *str, ...);
+#else
+#define debugf printf
+#endif
+
+static inline uint32_t min_u32(uint32_t a, uint32_t b)
+{
+	return a < b ? a : b;
+}
+
+static inline uint32_t clamp_u32(uint32_t x, uint32_t min_, uint32_t max_)
+{
+	return x < min_ ? min_ : (x > max_ ? max_ : x);
+}
+
+#ifdef __cplusplus
+}
+#endif
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp
new file mode 100644
index 0000000000..642c33cb33
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/binding.gyp
@@ -0,0 +1,29 @@
+{
+  "targets":
+    [{
+      "target_name": "ethash",
+        "sources": [
+            './ethash.cc',
+            '../libethash/ethash.h',
+            '../libethash/util.c',
+            '../libethash/util.h',
+            '../libethash/blum_blum_shub.h',
+            '../libethash/blum_blum_shub.c',
+            '../libethash/sha3.h',
+            '../libethash/sha3.c',
+            '../libethash/internal.h',
+            '../libethash/internal.c'
+          ],
+        "include_dirs": [
+          "../",
+          "<!(node -e \"require('nan')\")"
+        ],
+        "cflags": [
+        "-Wall",
+        "-Wno-maybe-uninitialized",
+        "-Wno-uninitialized",
+        "-Wno-unused-function",
+        "-Wextra"
+          ]
+    }]
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc
new file mode 100644
index 0000000000..6ab9730be4
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/ethash.cc
@@ -0,0 +1,587 @@
+#include <nan.h>
+#include <iostream>
+#include <node.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include "../libethash/ethash.h"
+
+using namespace v8;
+
+class EthashValidator : public NanAsyncWorker {
+ public:
+  // Constructor
+  EthashValidator(NanCallback *callback, const unsigned blocknumber, const unsigned char * seed)
+    : NanAsyncWorker(callback), blocknumber(blocknumber), seed(seed) {}
+  // Destructor
+  ~EthashValidator() {
+  	free(this->cache);
+	free(this->params);
+  }
+
+  // Executed inside the worker-thread.
+  // It is not safe to access V8, or V8 data structures
+  // here, so everything we need for input and output
+  // should go on `this`.
+  void Execute () {
+	
+    /* this->result = secp256k1_ecdsa_sign(this->msg, this->sig , &this->sig_len, this->pk, NULL, NULL); */
+  }
+
+  // Executed when the async work is complete
+  // this function will be run inside the main event loop
+  // so it is safe to use V8 again
+  void HandleOKCallback () {
+    NanScope();
+    Handle<Value> argv[] = {
+      NanNew<Number>(this->result)
+    };
+    callback->Call(2, argv);
+  }
+
+ protected:
+  const unsigned blocknumber; 
+  const unsigned char * seed;
+  ethash_params * params;
+  ethash_cache * cache;
+  bool result;
+  bool ready = 0;
+};
+
+/* class CompactSignWorker : public SignWorker { */
+/*  public: */
+/*   CompactSignWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *pk ) */
+/*     : SignWorker(callback, msg, pk){} */
+
+/*   void Execute () { */
+/*     this->result = secp256k1_ecdsa_sign_compact(this->msg, this->sig , this->pk, NULL, NULL,  &this->sig_len); */
+/*   } */
+
+/*   void HandleOKCallback () { */
+/*     NanScope(); */
+/*     Handle<Value> argv[] = { */
+/*       NanNew<Number>(this->result), */
+/*       NanNewBufferHandle((char *)this->sig, 64), */
+/*       NanNew<Number>(this->sig_len) */
+/*     }; */
+/*     callback->Call(3, argv); */
+/*   } */
+/* }; */
+
+/* class RecoverWorker : public NanAsyncWorker { */
+/*  public: */
+/*   // Constructor */
+/*   RecoverWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int compressed, int rec_id) */
+/*     : NanAsyncWorker(callback), msg(msg), sig(sig), compressed(compressed), rec_id(rec_id) {} */
+/*   // Destructor */
+/*   ~RecoverWorker() {} */
+
+/*   void Execute () { */
+/*     if(this->compressed == 1){ */
+/*       this->pubkey = new unsigned char[33]; */ 
+/*     }else{ */
+/*       this->pubkey = new unsigned char[65]; */ 
+/*     } */
+
+/*     this->result = secp256k1_ecdsa_recover_compact(this->msg, this->sig, this->pubkey, &this->pubkey_len, this->compressed, this->rec_id); */
+/*   } */
+
+/*   void HandleOKCallback () { */
+/*     NanScope(); */
+/*     Handle<Value> argv[] = { */
+/*       NanNew<Number>(this->result), */
+/*       NanNewBufferHandle((char *)this->pubkey, this->pubkey_len) */
+/*     }; */
+/*     callback->Call(2, argv); */
+/*   } */
+
+/*  protected: */
+/*   const unsigned char * msg; */
+/*   const unsigned char * sig; */ 
+/*   int compressed; */
+/*   int rec_id; */
+/*   int result; */
+/*   unsigned char * pubkey; */
+/*   int pubkey_len; */
+/* }; */
+
+/* class VerifyWorker : public NanAsyncWorker { */
+/*  public: */
+/*   // Constructor */
+/*   VerifyWorker(NanCallback *callback, const unsigned char *msg, const unsigned char *sig, int sig_len, const unsigned char *pub_key, int pub_key_len) */
+/*     : NanAsyncWorker(callback), msg(msg), sig(sig), sig_len(sig_len), pub_key(pub_key), pub_key_len(pub_key_len) {} */
+/*   // Destructor */
+/*   ~VerifyWorker() {} */
+
+/*   void Execute () { */
+/*     this->result = secp256k1_ecdsa_verify(this->msg, this->sig, this->sig_len,  this->pub_key, this->pub_key_len); */
+/*   } */
+
+/*   void HandleOKCallback () { */
+/*     NanScope(); */
+/*     Handle<Value> argv[] = { */
+/*       NanNew<Number>(this->result), */
+/*     }; */
+/*     callback->Call(1, argv); */
+/*   } */
+
+/*  protected: */
+/*   int result; */
+/*   const unsigned char * msg; */
+/*   const unsigned char * sig; */
+/*   int sig_len; */ 
+/*   const unsigned char * pub_key; */
+/*   int pub_key_len; */
+/* }; */
+
+/* NAN_METHOD(Verify){ */
+/*   NanScope(); */
+
+/*   Local<Object> pub_buf = args[0].As<Object>(); */
+/*   const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */
+/*   int pub_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/*   Local<Object> sig_buf = args[2].As<Object>(); */
+/*   const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */
+/*   int sig_len = node::Buffer::Length(args[2]); */
+
+/*   int result = secp256k1_ecdsa_verify(msg_data, sig_data, sig_len, pub_data, pub_len ); */ 
+
+/*   NanReturnValue(NanNew<Number>(result)); */
+/* } */
+
+/* NAN_METHOD(Verify_Async){ */
+/*   NanScope(); */
+
+/*   Local<Object> pub_buf = args[0].As<Object>(); */
+/*   const unsigned char *pub_data = (unsigned char *) node::Buffer::Data(pub_buf); */
+/*   int pub_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/*   Local<Object> sig_buf = args[2].As<Object>(); */
+/*   const unsigned char *sig_data = (unsigned char *) node::Buffer::Data(sig_buf); */
+/*   int sig_len = node::Buffer::Length(args[2]); */
+
+/*   Local<Function> callback = args[3].As<Function>(); */
+/*   NanCallback* nanCallback = new NanCallback(callback); */
+
+/*   VerifyWorker* worker = new VerifyWorker(nanCallback, msg_data, sig_data, sig_len, pub_data, pub_len); */
+/*   NanAsyncQueueWorker(worker); */
+
+/*   NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Sign){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Local<Object> pk_buf = args[0].As<Object>(); */
+/*   const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+/*   int sec_len = node::Buffer::Length(args[0]); */
+/*   //the second argument is the message that we are signing */
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/*   unsigned char sig[72]; */
+/*   int sig_len = 72; */
+/*   int msg_len = node::Buffer::Length(args[1]); */
+
+/*   if(sec_len != 32){ */
+/*     return NanThrowError("the secret key needs tobe 32 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   int result = secp256k1_ecdsa_sign(msg_data, sig , &sig_len, pk_data, NULL, NULL); */
+
+/*   if(result == 1){ */
+/*     NanReturnValue(NanNewBufferHandle((char *)sig, sig_len)); */
+/*   }else{ */
+/*     return NanThrowError("nonce invalid, try another one"); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Sign_Async){ */
+
+/*   NanScope(); */
+/*   //the first argument should be the private key as a buffer */
+/*   Local<Object> sec_buf = args[0].As<Object>(); */
+/*   const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */
+/*   int sec_len = node::Buffer::Length(args[0]); */
+/*   //the second argument is the message that we are signing */
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+/*   Local<Function> callback = args[2].As<Function>(); */
+/*   NanCallback* nanCallback = new NanCallback(callback); */
+
+/*   int msg_len = node::Buffer::Length(args[1]); */
+
+/*   if(sec_len != 32){ */
+/*     return NanThrowError("the secret key needs tobe 32 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   SignWorker* worker = new SignWorker(nanCallback, msg_data, sec_data); */
+/*   NanAsyncQueueWorker(worker); */
+
+/*   NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Sign_Compact){ */
+
+/*   NanScope(); */
+
+/*   Local<Object> seckey_buf = args[0].As<Object>(); */
+/*   const unsigned char *seckey_data = (unsigned char *) node::Buffer::Data(seckey_buf); */
+/*   int sec_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+/*   int msg_len = node::Buffer::Length(args[1]); */
+
+/*   if(sec_len != 32){ */
+/*     return NanThrowError("the secret key needs tobe 32 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   unsigned char sig[64]; */
+/*   int rec_id; */
+
+/*   //TODO: change the nonce */
+/*   int valid_nonce = secp256k1_ecdsa_sign_compact(msg_data, sig, seckey_data, NULL, NULL, &rec_id ); */
+
+/*   Local<Array> array = NanNew<Array>(3); */
+/*   array->Set(0, NanNew<Integer>(valid_nonce)); */
+/*   array->Set(1, NanNew<Integer>(rec_id)); */
+/*   array->Set(2, NanNewBufferHandle((char *)sig, 64)); */
+
+/*   NanReturnValue(array); */
+/* } */
+
+/* NAN_METHOD(Sign_Compact_Async){ */
+/*   NanScope(); */
+/*   //the first argument should be the private key as a buffer */
+/*   Local<Object> sec_buf = args[0].As<Object>(); */
+/*   const unsigned char *sec_data = (unsigned char *) node::Buffer::Data(sec_buf); */
+/*   int sec_len = node::Buffer::Length(args[0]); */
+
+/*   //the second argument is the message that we are signing */
+/*   Local<Object> msg_buf = args[1].As<Object>(); */
+/*   const unsigned char *msg_data = (unsigned char *) node::Buffer::Data(msg_buf); */
+
+
+/*   Local<Function> callback = args[2].As<Function>(); */
+/*   NanCallback* nanCallback = new NanCallback(callback); */
+
+/*   int msg_len = node::Buffer::Length(args[1]); */
+
+/*   if(sec_len != 32){ */
+/*     return NanThrowError("the secret key needs tobe 32 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   CompactSignWorker* worker = new CompactSignWorker(nanCallback, msg_data, sec_data); */ 
+/*   NanAsyncQueueWorker(worker); */
+
+/*   NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Recover_Compact){ */
+
+/*   NanScope(); */
+  
+/*   Local<Object> msg_buf = args[0].As<Object>(); */
+/*   const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */
+/*   int msg_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Object> sig_buf = args[1].As<Object>(); */
+/*   const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */
+
+/*   Local<Number> compressed = args[2].As<Number>(); */
+/*   int int_compressed = compressed->IntegerValue(); */
+
+/*   Local<Number> rec_id = args[3].As<Number>(); */
+/*   int int_rec_id = rec_id->IntegerValue(); */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   unsigned char pubKey[65]; */ 
+
+/*   int pubKeyLen; */
+
+/*   int result = secp256k1_ecdsa_recover_compact(msg, sig, pubKey, &pubKeyLen, int_compressed, int_rec_id); */
+/*   if(result == 1){ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */
+/*   }else{ */
+    
+/*     NanReturnValue(NanFalse()); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Recover_Compact_Async){ */
+
+/*   NanScope(); */
+  
+/*   //the message */
+/*   Local<Object> msg_buf = args[0].As<Object>(); */
+/*   const unsigned char *msg = (unsigned char *) node::Buffer::Data(msg_buf); */
+/*   int msg_len = node::Buffer::Length(args[0]); */
+
+/*   //the signature length */
+/*   Local<Object> sig_buf = args[1].As<Object>(); */
+/*   const unsigned char *sig = (unsigned char *) node::Buffer::Data(sig_buf); */
+/*   //todo sig len needs tobe 64 */
+/*   int sig_len = node::Buffer::Length(args[1]); */
+
+/*   //to compress or not? */
+/*   Local<Number> compressed = args[2].As<Number>(); */
+/*   int int_compressed = compressed->IntegerValue(); */
+
+/*   //the rec_id */
+/*   Local<Number> rec_id = args[3].As<Number>(); */
+/*   int int_rec_id = rec_id->IntegerValue(); */
+
+/*   //the callback */
+/*   Local<Function> callback = args[4].As<Function>(); */
+/*   NanCallback* nanCallback = new NanCallback(callback); */
+
+/*   if(sig_len != 64){ */
+/*     return NanThrowError("the signature needs to be 64 bytes"); */
+/*   } */
+
+/*   if(msg_len == 0){ */
+/*     return NanThrowError("messgae cannot be null"); */ 
+/*   } */
+
+/*   RecoverWorker* worker = new RecoverWorker(nanCallback, msg, sig, int_compressed, int_rec_id); */
+/*   NanAsyncQueueWorker(worker); */
+
+/*   NanReturnUndefined(); */
+/* } */
+
+/* NAN_METHOD(Seckey_Verify){ */
+/*   NanScope(); */
+
+/*   const unsigned char *data = (const unsigned char*) node::Buffer::Data(args[0]); */
+/*   int result =  secp256k1_ec_seckey_verify(data); */ 
+/*   NanReturnValue(NanNew<Number>(result)); */ 
+/* } */
+
+/* NAN_METHOD(Pubkey_Verify){ */
+
+/*   NanScope(); */
+  
+/*   Local<Object> pub_buf = args[0].As<Object>(); */
+/*   const unsigned char *pub_key = (unsigned char *) node::Buffer::Data(pub_buf); */
+/*   int pub_key_len = node::Buffer::Length(args[0]); */
+
+/*   int result = secp256k1_ec_pubkey_verify(pub_key, pub_key_len); */
+
+/*   NanReturnValue(NanNew<Number>(result)); */ 
+/* } */
+
+/* NAN_METHOD(Pubkey_Create){ */
+/*   NanScope(); */
+
+/*   Handle<Object> pk_buf = args[0].As<Object>(); */
+/*   const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   Local<Number> l_compact = args[1].As<Number>(); */
+/*   int compact = l_compact->IntegerValue(); */
+/*   int pubKeyLen; */
+
+/*   if(pk_len != 32){ */
+/*     return NanThrowError("the secert key need to be 32 bytes"); */
+/*   } */
+
+/*   unsigned char *pubKey; */
+/*   if(compact == 1){ */
+/*     pubKey = new unsigned char[33]; */ 
+/*   }else{ */
+/*     pubKey = new unsigned char[65]; */ 
+/*   } */
+
+/*   int results = secp256k1_ec_pubkey_create(pubKey,&pubKeyLen, pk_data, compact ); */
+/*   if(results == 0){ */
+/*     return NanThrowError("secret was invalid, try again."); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pubKey, pubKeyLen)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Pubkey_Decompress){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Local<Object> pk_buf = args[0].As<Object>(); */
+/*   unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   int results = secp256k1_ec_pubkey_decompress(pk_data, &pk_len); */
+
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid public key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pk_data, pk_len)); */
+/*   } */
+/* } */
+
+
+/* NAN_METHOD(Privkey_Import){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> pk_buf = args[0].As<Object>(); */
+/*   const unsigned char *pk_data = (unsigned char *) node::Buffer::Data(pk_buf); */
+
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   unsigned char sec_key[32]; */
+/*   int results = secp256k1_ec_privkey_import(sec_key, pk_data, pk_len); */
+
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid private key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)sec_key, 32)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Privkey_Export){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> sk_buf = args[0].As<Object>(); */
+/*   const unsigned char *sk_data = (unsigned char *) node::Buffer::Data(sk_buf); */
+
+/*   Local<Number> l_compressed = args[1].As<Number>(); */
+/*   int compressed = l_compressed->IntegerValue(); */
+
+/*   unsigned char *privKey; */
+/*   int pk_len; */
+/*   int results = secp256k1_ec_privkey_export(sk_data, privKey, &pk_len, compressed); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid private key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)privKey, pk_len)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Privkey_Tweak_Add){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> sk_buf = args[0].As<Object>(); */
+/*   unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */
+
+/*   Handle<Object> tweak_buf = args[1].As<Object>(); */
+/*   const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/*   int results = secp256k1_ec_privkey_tweak_add(sk, tweak); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Privkey_Tweak_Mul){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> sk_buf = args[0].As<Object>(); */
+/*   unsigned char *sk = (unsigned char *) node::Buffer::Data(sk_buf); */
+
+/*   Handle<Object> tweak_buf = args[1].As<Object>(); */
+/*   const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/*   int results = secp256k1_ec_privkey_tweak_mul(sk, tweak); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)sk, 32)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Pubkey_Tweak_Add){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> pk_buf = args[0].As<Object>(); */
+/*   unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   Handle<Object> tweak_buf = args[1].As<Object>(); */
+/*   const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/*   int results = secp256k1_ec_pubkey_tweak_add(pk, pk_len, tweak); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */
+/*   } */
+/* } */
+
+/* NAN_METHOD(Pubkey_Tweak_Mul){ */
+/*   NanScope(); */
+
+/*   //the first argument should be the private key as a buffer */
+/*   Handle<Object> pk_buf = args[0].As<Object>(); */
+/*   unsigned char *pk = (unsigned char *) node::Buffer::Data(pk_buf); */
+/*   int pk_len = node::Buffer::Length(args[0]); */
+
+/*   Handle<Object> tweak_buf = args[1].As<Object>(); */
+/*   const unsigned char *tweak= (unsigned char *) node::Buffer::Data(tweak_buf); */
+
+/*   int results = secp256k1_ec_pubkey_tweak_mul(pk, pk_len, tweak); */
+/*   if(results == 0){ */
+/*     return NanThrowError("invalid key"); */
+/*   }else{ */
+/*     NanReturnValue(NanNewBufferHandle((char *)pk, pk_len)); */
+/*   } */
+/* } */
+
+void Init(Handle<Object> exports) {
+
+  /* secp256k1_start(SECP256K1_START_SIGN | SECP256K1_START_VERIFY); */
+  /* exports->Set(NanNew("seckeyVerify"), NanNew<FunctionTemplate>(Seckey_Verify)->GetFunction()); */
+  /* exports->Set(NanNew("sign"), NanNew<FunctionTemplate>(Sign)->GetFunction()); */
+  /* exports->Set(NanNew("signAsync"), NanNew<FunctionTemplate>(Sign_Async)->GetFunction()); */
+  /* exports->Set(NanNew("signCompact"), NanNew<FunctionTemplate>(Sign_Compact)->GetFunction()); */
+  /* exports->Set(NanNew("signCompactAsync"), NanNew<FunctionTemplate>(Sign_Compact_Async)->GetFunction()); */
+  /* exports->Set(NanNew("recoverCompact"), NanNew<FunctionTemplate>(Recover_Compact)->GetFunction()); */
+  /* exports->Set(NanNew("recoverCompactAsync"), NanNew<FunctionTemplate>(Recover_Compact_Async)->GetFunction()); */
+  /* exports->Set(NanNew("verify"), NanNew<FunctionTemplate>(Verify)->GetFunction()); */
+  /* exports->Set(NanNew("verifyAsync"), NanNew<FunctionTemplate>(Verify_Async)->GetFunction()); */
+  /* exports->Set(NanNew("secKeyVerify"), NanNew<FunctionTemplate>(Seckey_Verify)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyVerify"), NanNew<FunctionTemplate>(Pubkey_Verify)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyCreate"), NanNew<FunctionTemplate>(Pubkey_Create)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyDecompress"), NanNew<FunctionTemplate>(Pubkey_Decompress)->GetFunction()); */
+  /* exports->Set(NanNew("privKeyExport"), NanNew<FunctionTemplate>(Privkey_Export)->GetFunction()); */
+  /* exports->Set(NanNew("privKeyImport"), NanNew<FunctionTemplate>(Privkey_Import)->GetFunction()); */
+  /* exports->Set(NanNew("privKeyTweakAdd"), NanNew<FunctionTemplate>(Privkey_Tweak_Add)->GetFunction()); */
+  /* exports->Set(NanNew("privKeyTweakMul"), NanNew<FunctionTemplate>(Privkey_Tweak_Mul)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyTweakAdd"), NanNew<FunctionTemplate>(Privkey_Tweak_Add)->GetFunction()); */
+  /* exports->Set(NanNew("pubKeyTweakMul"), NanNew<FunctionTemplate>(Privkey_Tweak_Mul)->GetFunction()); */
+}
+
+NODE_MODULE(secp256k1, Init)
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json
new file mode 100644
index 0000000000..690ea32634
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/package.json
@@ -0,0 +1,13 @@
+{
+  "name": "node-ethash",
+  "version": "1.0.0",
+  "description": "",
+  "main": "index.js",
+  "scripts": {
+    "test": "echo \"Error: no test specified\" && exit 1",
+    "install": "node-gyp rebuild"
+  },
+  "author": "",
+  "license": "ISC",
+  "gypfile": true
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md
new file mode 100644
index 0000000000..bb46cdd6e5
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/node-ethash/readme.md
@@ -0,0 +1,12 @@
+# To Develop
+`npm install -g node-gyp`  
+`npm install .`  
+
+
+# To rebuild 
+`node-gyp rebuild`  
+
+
+# notes
+
+nan is good https://github.com/rvagg/nan
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt b/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt
new file mode 100644
index 0000000000..431af6a997
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/CMakeLists.txt
@@ -0,0 +1,30 @@
+IF( NOT Boost_FOUND )
+    find_package(Boost COMPONENTS unit_test_framework)
+ENDIF()
+
+IF( Boost_FOUND )
+    include_directories( ${Boost_INCLUDE_DIR} )
+    include_directories(..)
+
+    link_directories ( ${Boost_LIBRARY_DIRS} )
+    file(GLOB HEADERS "*.h")
+    ADD_DEFINITIONS(-DBOOST_TEST_DYN_LINK)
+
+    if (NOT CRYPTOPP_FOUND)
+	    find_package (CryptoPP)
+    endif()
+
+    if (CRYPTOPP_FOUND)
+	    add_definitions(-DWITH_CRYPTOPP)
+    endif()
+
+    add_executable (Test test.cpp ${HEADERS})
+    target_link_libraries (Test ${Boost_UNIT_TEST_FRAMEWORK_LIBRARY} ${ETHHASH_LIBS})
+
+    if (CRYPTOPP_FOUND)
+	    TARGET_LINK_LIBRARIES(Test ${CRYPTOPP_LIBRARIES})
+    endif()
+
+    enable_testing ()
+    add_test(NAME ethash COMMAND Test)
+ENDIF()
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go b/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go
new file mode 100644
index 0000000000..d734954e1b
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/go/ethash_test.go
@@ -0,0 +1,54 @@
+package ethashTest
+
+import (
+	"bytes"
+	"crypto/rand"
+	"log"
+	"math/big"
+	"testing"
+
+	"github.com/ethereum/ethash"
+	"github.com/ethereum/go-ethereum/core"
+	"github.com/ethereum/go-ethereum/ethdb"
+)
+
+func TestEthash(t *testing.T) {
+	seedHash := make([]byte, 32)
+	_, err := rand.Read(seedHash)
+	if err != nil {
+		panic(err)
+	}
+
+	db, err := ethdb.NewMemDatabase()
+	if err != nil {
+		panic(err)
+	}
+
+	blockProcessor, err := core.NewCanonical(5, db)
+	if err != nil {
+		panic(err)
+	}
+
+	log.Println("Block Number: ", blockProcessor.ChainManager().CurrentBlock().Number())
+
+	e := ethash.New(blockProcessor.ChainManager())
+
+	miningHash := make([]byte, 32)
+	if _, err := rand.Read(miningHash); err != nil {
+		panic(err)
+	}
+	diff := big.NewInt(10000)
+	log.Println("difficulty", diff)
+
+	nonce := uint64(0)
+
+	ghash_full := e.FullHash(nonce, miningHash)
+	log.Printf("ethash full (on nonce): %x %x\n", ghash_full, nonce)
+
+	ghash_light := e.LightHash(nonce, miningHash)
+	log.Printf("ethash light (on nonce): %x %x\n", ghash_light, nonce)
+
+	if bytes.Compare(ghash_full, ghash_light) != 0 {
+		t.Errorf("full: %x, light: %x", ghash_full, ghash_light)
+	}
+}
diff --git a/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp
new file mode 100644
index 0000000000..bec44e1385
--- /dev/null
+++ b/Godeps/_workspace/src/github.com/ethereum/ethash/test/test.cpp
@@ -0,0 +1,233 @@
+#include <iomanip>
+#include <libethash/fnv.h>
+#include <libethash/ethash.h>
+#include <libethash/internal.h>
+
+#ifdef WITH_CRYPTOPP
+#include <libethash/sha3_cryptopp.h>
+#else
+#include <libethash/sha3.h>
+#endif // WITH_CRYPTOPP
+
+#define BOOST_TEST_MODULE Daggerhashimoto
+#define BOOST_TEST_MAIN
+
+#include <boost/test/unit_test.hpp>
+#include <libethash/ethash.h>
+#include <iostream>
+
+std::string bytesToHexString(const uint8_t *str, const size_t s) {
+    std::ostringstream ret;
+
+    for (int i = 0; i < s; ++i)
+        ret << std::hex << std::setfill('0') << std::setw(2) << std::nouppercase << (int) str[i];
+
+    return ret.str();
+}
+
+BOOST_AUTO_TEST_CASE(fnv_hash_check) {
+    uint32_t x = 1235U;
+    const uint32_t
+            y = 9999999U,
+            expected = (FNV_PRIME * x) ^ y;
+
+    x = fnv_hash(x, y);
+
+    BOOST_REQUIRE_MESSAGE(x == expected,
+            "\nexpected: " << expected << "\n"
+                    << "actual: " << x << "\n");
+
+}
+
+BOOST_AUTO_TEST_CASE(SHA256_check) {
+    uint8_t input[32], out[32];
+    memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32);
+    SHA3_256(out, input, 32);
+    const std::string
+            expected = "2b5ddf6f4d21c23de216f44d5e4bdc68e044b71897837ea74c83908be7037cd7",
+            actual = bytesToHexString(out, 32);
+    BOOST_REQUIRE_MESSAGE(expected == actual,
+            "\nexpected: " << expected.c_str() << "\n"
+                    << "actual: " << actual.c_str() << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(SHA512_check) {
+    uint8_t input[64], out[64];
+    memcpy(input, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 64);
+    SHA3_512(out, input, 64);
+    const std::string
+            expected = "0be8a1d334b4655fe58c6b38789f984bb13225684e86b20517a55ab2386c7b61c306f25e0627c60064cecd6d80cd67a82b3890bd1289b7ceb473aad56a359405",
+            actual = bytesToHexString(out, 64);
+    BOOST_REQUIRE_MESSAGE(expected == actual,
+            "\nexpected: " << expected.c_str() << "\n"
+                    << "actual: " << actual.c_str() << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_check) {
+    ethash_params params;
+    ethash_params_init(&params, 0);
+    BOOST_REQUIRE_MESSAGE(params.full_size  < DAGSIZE_BYTES_INIT,
+            "\nfull size: " << params.full_size << "\n"
+                    << "should be less than or equal to: " << DAGSIZE_BYTES_INIT << "\n");
+    BOOST_REQUIRE_MESSAGE(params.full_size + 20*MIX_BYTES >= DAGSIZE_BYTES_INIT,
+            "\nfull size + 20*MIX_BYTES: " << params.full_size + 20*MIX_BYTES << "\n"
+                    << "should be greater than or equal to: " << DAGSIZE_BYTES_INIT << "\n");
+    BOOST_REQUIRE_MESSAGE(params.cache_size < DAGSIZE_BYTES_INIT / 32,
+            "\ncache size: " << params.cache_size << "\n"
+                    << "should be less than or equal to: " << DAGSIZE_BYTES_INIT / 32 << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(ethash_params_init_genesis_calcifide_check) {
+    ethash_params params;
+    ethash_params_init(&params, 0);
+    const uint32_t expected_full_size = 1073739904;
+    const uint32_t expected_cache_size = 1048384;
+    BOOST_REQUIRE_MESSAGE(params.full_size  == expected_full_size,
+            "\nexpected: " << expected_cache_size << "\n"
+                    << "actual: " << params.full_size << "\n");
+    BOOST_REQUIRE_MESSAGE(params.cache_size  == expected_cache_size,
+            "\nexpected: " << expected_cache_size << "\n"
+                    << "actual: " << params.cache_size << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(ethash_params_init_check) {
+    ethash_params params;
+    ethash_params_init(&params, 1971000);
+    const uint64_t nine_month_size = (uint64_t) 8*DAGSIZE_BYTES_INIT;
+    BOOST_REQUIRE_MESSAGE(params.full_size  < nine_month_size,
+            "\nfull size: " << params.full_size << "\n"
+                    << "should be less than or equal to: " << nine_month_size << "\n");
+    BOOST_REQUIRE_MESSAGE(params.full_size + DAGSIZE_BYTES_INIT / 4 > nine_month_size,
+            "\nfull size + DAGSIZE_BYTES_INIT / 4: " << params.full_size + DAGSIZE_BYTES_INIT / 4 << "\n"
+                    << "should be greater than or equal to: " << nine_month_size << "\n");
+    BOOST_REQUIRE_MESSAGE(params.cache_size < nine_month_size / 1024,
+            "\nactual cache size: " << params.cache_size << "\n"
+                    << "expected: " << nine_month_size / 1024 << "\n");
+    BOOST_REQUIRE_MESSAGE(params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 > nine_month_size / 1024 ,
+            "\ncache size + DAGSIZE_BYTES_INIT / 4 / 1024: " << params.cache_size + DAGSIZE_BYTES_INIT / 4 / 1024 << "\n"
+                    << "actual: " << nine_month_size / 32 << "\n");
+}
+
+BOOST_AUTO_TEST_CASE(light_and_full_client_checks) {
+    ethash_params params;
+    uint8_t seed[32], hash[32];
+    ethash_return_value light_out, full_out;
+    memcpy(seed, "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32);
+    memcpy(hash, "~~~X~~~~~~~~~~~~~~~~~~~~~~~~~~~~", 32);
+    ethash_params_init(&params, 0);
+    params.cache_size = 1024;
+    params.full_size = 1024 * 32;
+    ethash_cache cache;
+    cache.mem = alloca(params.cache_size);
+    ethash_mkcache(&cache, &params, seed);
+    node * full_mem = (node *) alloca(params.full_size);
+    ethash_compute_full_data(full_mem, &params, &cache);
+
+    {
+        const std::string
+                expected = "2da2b506f21070e1143d908e867962486d6b0a02e31d468fd5e3a7143aafa76a14201f63374314e2a6aaf84ad2eb57105dea3378378965a1b3873453bb2b78f9a8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995ca8620b2ebeca41fbc773bb837b5e724d6eb2de570d99858df0d7d97067fb8103b21757873b735097b35d3bea8fd1c359a9e8a63c1540c76c9784cf8d975e995c259440b89fa3481c2c33171477c305c8e1e421f8d8f6d59585449d0034f3e421808d8da6bbd0b6378f567647cc6c4ba6c434592b198ad444e7284905b7c6adaf70bf43ec2daa7bd5e8951aa609ab472c124cf9eba3d38cff5091dc3f58409edcc386c743c3bd66f92408796ee1e82dd149eaefbf52b00ce33014a6eb3e50625413b072a58bc01da28262f42cbe4f87d4abc2bf287d15618405a1fe4e386fcdafbb171064bd99901d8f81dd6789396ce5e364ac944bbbd75a7827291c70b42d26385910cd53ca535ab29433dd5c5714d26e0dce95514c5ef866329c12e958097e84462197c2b32087849dab33e88b11da61d52f9dbc0b92cc61f742c07dbbf751c49d7678624ee60dfbe62e5e8c47a03d8247643f3d16ad8c8e663953bcda1f59d7e2d4a9bf0768e789432212621967a8f41121ad1df6ae1fa78782530695414c6213942865b2730375019105cae91a4c17a558d4b63059661d9f108362143107babe0b848de412e4da59168cce82bfbff3c99e022dd6ac1e559db991f2e3f7bb910cefd173e65ed00a8d5d416534e2c8416ff23977dbf3eb7180b75c71580d08ce95efeb9b0afe904ea12285a392aff0c8561ff79fca67f694a62b9e52377485c57cc3598d84cac0a9d27960de0cc31ff9bbfe455acaa62c8aa5d2cce96f345da9afe843d258a99c4eaf3650fc62efd81c7b81cd0d534d2d71eeda7a6e315d540b4473c80f8730037dc2ae3e47b986240cfc65ccc565f0d8cde0bc68a57e39a271dda57440b3598bee19f799611d25731a96b5dbbbefdff6f4f656161462633030d62560ea4e9c161cf78fc96a2ca5aaa32453a6c5dea206f766244e8c9d9a8dc61185ce37f1fc804459c5f07434f8ecb34141b8dcae7eae704c950b55556c5f40140c3714b45eddb02637513268778cbf937a33e4e33183685f9deb31ef54e90161e76d969587dd782eaa94e289420e7c2ee908517f5893a26fdb5873d68f92d118d4bcf98d7a4916794d6ab290045e30f9ea00ca547c584b8482b0331ba1539a0f2714fddc3a0b06b0cfbb6a607b8339c39bcfd6640b1f653e9d70ef6c985b",
+                actual = bytesToHexString((uint8_t const *) cache.mem, params.cache_size);
+
+        BOOST_REQUIRE_MESSAGE(expected == actual,
+                "\nexpected: " << expected.c_str() << "\n"
+                        << "actual: " << actual.c_str() << "\n");
+    }
+
+
+
+    {
+        node node;
+        ethash_calculate_dag_item(&node, 0, &params, &cache);
+        const std::string
+                actual = bytesToHexString((uint8_t const *) &node, sizeof(node)),
+                expected = "b1698f829f90b35455804e5185d78f549fcb1bdce2bee006d4d7e68eb154b596be1427769eb1c3c3e93180c760af75f81d1023da6a0ffbe321c153a7c0103597";
+        BOOST_REQUIRE_MESSAGE(actual == expected,
+                "\n" << "expected: " << expected.c_str() << "\n"
+                        << "actual: " << actual.c_str() << "\n");
+    }
+
+    {
+        for (int i = 0 ; i < params.full_size / sizeof(node) ; ++i ) {
+            for (uint32_t j = 0; j < 32; ++j) {
+                node expected_node;
+                ethash_calculate_dag_item(&expected_node, j, &params, &cache);
+                const std::string
+                        actual = bytesToHexString((uint8_t const *) &(full_mem[j]), sizeof(node)),
+                        expected = bytesToHexString((uint8_t const *) &expected_node, sizeof(node));
+                BOOST_REQUIRE_MESSAGE(actual == expected,
+                        "\ni: " << j << "\n"
+                                << "expected: " << expected.c_str() << "\n"
+                                << "actual: " << actual.c_str() << "\n");
+            }
+        }
+    }
+
+    {
+        uint64_t nonce = 0x7c7c597c;
+        ethash_full(&full_out, full_mem, &params, hash, nonce);
+        ethash_light(&light_out, &cache, &params, hash, nonce);
+        const std::string
+                light_result_string = bytesToHexString(light_out.result, 32),
+                full_result_string = bytesToHexString(full_out.result, 32);
+        BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string,
+                "\nlight result: " << light_result_string.c_str() << "\n"
+                        << "full result: " << full_result_string.c_str() << "\n");
+        const std::string
+                light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32),
+                full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32);
+        BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string,
+                "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n"
+                        << "full mix hash: " << full_mix_hash_string.c_str() << "\n");
+        uint8_t check_hash[32];
+        ethash_quick_hash(check_hash, hash, nonce, full_out.mix_hash);
+        const std::string check_hash_string = bytesToHexString(check_hash, 32);
+        BOOST_REQUIRE_MESSAGE(check_hash_string == full_result_string,
+                "\ncheck hash string: " << check_hash_string.c_str() << "\n"
+                        << "full result: " << full_result_string.c_str() << "\n");
+    }
+    {
+        ethash_full(&full_out, full_mem, &params, hash, 5);
+        std::string
+                light_result_string = bytesToHexString(light_out.result, 32),
+                full_result_string = bytesToHexString(full_out.result, 32);
+
+        BOOST_REQUIRE_MESSAGE(light_result_string != full_result_string,
+                "\nlight result and full result should differ: " << light_result_string.c_str() << "\n");
+
+        ethash_light(&light_out, &cache, &params, hash, 5);
+        light_result_string = bytesToHexString(light_out.result, 32);
+        BOOST_REQUIRE_MESSAGE(light_result_string == full_result_string,
+                "\nlight result and full result should be the same\n"
+                        << "light result: " << light_result_string.c_str() << "\n"
+                        << "full result: " << full_result_string.c_str() << "\n");
+        std::string
+                light_mix_hash_string = bytesToHexString(light_out.mix_hash, 32),
+                full_mix_hash_string = bytesToHexString(full_out.mix_hash, 32);
+        BOOST_REQUIRE_MESSAGE(full_mix_hash_string == light_mix_hash_string,
+                "\nlight mix hash: " << light_mix_hash_string.c_str() << "\n"
+                        << "full mix hash: " << full_mix_hash_string.c_str() << "\n");
+    }
+}
+
+BOOST_AUTO_TEST_CASE(ethash_check_difficulty_check) {
+    uint8_t hash[32], target[32];
+    memset(hash, 0, 32);
+    memset(target, 0, 32);
+
+    memcpy(hash, "11111111111111111111111111111111", 32);
+    memcpy(target, "22222222222222222222222222222222", 32);
+    BOOST_REQUIRE_MESSAGE(
+            ethash_check_difficulty(hash, target),
+            "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n");
+    BOOST_REQUIRE_MESSAGE(
+            !ethash_check_difficulty(hash, hash),
+            "\nexpected \"" << hash << "\" to have the same difficulty as \"" << hash << "\"\n");
+    memcpy(target, "11111111111111111111111111111112", 32);
+    BOOST_REQUIRE_MESSAGE(
+            ethash_check_difficulty(hash, target),
+            "\nexpected \"" << hash << "\" to have less difficulty than \"" << target << "\"\n");
+    memcpy(target, "11111111111111111111111111111110", 32);
+    BOOST_REQUIRE_MESSAGE(
+            !ethash_check_difficulty(hash, target),
+            "\nexpected \"" << hash << "\" to have more difficulty than \"" << target << "\"\n");
+}
\ No newline at end of file
diff --git a/core/block_processor.go b/core/block_processor.go
index aef53c9404..3123511f9e 100644
--- a/core/block_processor.go
+++ b/core/block_processor.go
@@ -7,12 +7,12 @@ import (
 	"sync"
 	"time"
 
+	"github.com/ethereum/ethash"
 	"github.com/ethereum/go-ethereum/core/types"
 	"github.com/ethereum/go-ethereum/ethutil"
 	"github.com/ethereum/go-ethereum/event"
 	"github.com/ethereum/go-ethereum/logger"
 	"github.com/ethereum/go-ethereum/pow"
-	"github.com/ethereum/go-ethereum/pow/ezp"
 	"github.com/ethereum/go-ethereum/state"
 	"gopkg.in/fatih/set.v0"
 )
@@ -50,7 +50,7 @@ func NewBlockProcessor(db ethutil.Database, txpool *TxPool, chainManager *ChainM
 	sm := &BlockProcessor{
 		db:       db,
 		mem:      make(map[string]*big.Int),
-		Pow:      ezp.New(),
+		Pow:      ethash.New(chainManager),
 		bc:       chainManager,
 		eventMux: eventMux,
 		txpool:   txpool,
@@ -255,6 +255,7 @@ func (sm *BlockProcessor) ValidateBlock(block, parent *types.Block) error {
 		return fmt.Errorf("GasLimit check failed for block %v, %v", block.Header().GasLimit, expl)
 	}
 
+	// There can be at most one uncle
 	if len(block.Uncles()) > 1 {
 		return ValidationError("Block can only contain one uncle (contained %v)", len(block.Uncles()))
 	}
diff --git a/core/chain_makers.go b/core/chain_makers.go
index eb43f8aa08..2c36b892e5 100644
--- a/core/chain_makers.go
+++ b/core/chain_makers.go
@@ -13,21 +13,31 @@ import (
 // So we can generate blocks easily
 type FakePow struct{}
 
-func (f FakePow) Search(block pow.Block, stop <-chan struct{}) []byte { return nil }
-func (f FakePow) Verify(block pow.Block) bool                         { return true }
-func (f FakePow) GetHashrate() int64                                  { return 0 }
-func (f FakePow) Turbo(bool)                                          {}
+func (f FakePow) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte, []byte) {
+	return nil, nil, nil
+}
+func (f FakePow) Verify(block pow.Block) bool { return true }
+func (f FakePow) GetHashrate() int64          { return 0 }
+func (f FakePow) Turbo(bool)                  {}
 
+// So we can deterministically seed different blockchains
+var (
+	CanonicalSeed = 1
+	ForkSeed      = 2
+)
+
+// Utility functions for making chains on the fly
+// Exposed for sake of testing from other packages (eg. go-ethash)
 func NewBlockFromParent(addr []byte, parent *types.Block) *types.Block {
 	return newBlockFromParent(addr, parent)
 }
 
-func MakeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Database) *types.Block {
-	return makeBlock(bman, parent, i, db)
+func MakeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Database, seed int) *types.Block {
+	return makeBlock(bman, parent, i, db, seed)
 }
 
-func MakeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Database) types.Blocks {
-	return makeChain(bman, parent, max, db)
+func MakeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Database, seed int) types.Blocks {
+	return makeChain(bman, parent, max, db, seed)
 }
 
 func NewChainMan(block *types.Block, eventMux *event.TypeMux, db ethutil.Database) *ChainManager {
@@ -42,9 +52,9 @@ func NewCanonical(n int, db ethutil.Database) (*BlockProcessor, error) {
 	return newCanonical(n, db)
 }
 
+// block time is fixed at 10 seconds
 func newBlockFromParent(addr []byte, parent *types.Block) *types.Block {
 	block := types.NewBlock(parent.Hash(), addr, parent.Root(), ethutil.BigPow(2, 32), nil, "")
-
 	block.SetUncles(nil)
 	block.SetTransactions(nil)
 	block.SetReceipts(nil)
@@ -52,6 +62,7 @@ func newBlockFromParent(addr []byte, parent *types.Block) *types.Block {
 	header := block.Header()
 	header.Difficulty = CalcDifficulty(block, parent)
 	header.Number = new(big.Int).Add(parent.Header().Number, ethutil.Big1)
+	header.Time = parent.Header().Time + 10
 	header.GasLimit = CalcGasLimit(parent, block)
 
 	block.Td = parent.Td
@@ -60,8 +71,10 @@ func newBlockFromParent(addr []byte, parent *types.Block) *types.Block {
 }
 
 // Actually make a block by simulating what miner would do
-func makeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Database) *types.Block {
+// we seed chains by the first byte of the coinbase
+func makeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Database, seed int) *types.Block {
 	addr := ethutil.LeftPadBytes([]byte{byte(i)}, 20)
+	addr[0] = byte(seed)
 	block := newBlockFromParent(addr, parent)
 	state := state.New(block.Root(), db)
 	cbase := state.GetOrNewStateObject(addr)
@@ -74,11 +87,11 @@ func makeBlock(bman *BlockProcessor, parent *types.Block, i int, db ethutil.Data
 
 // Make a chain with real blocks
 // Runs ProcessWithParent to get proper state roots
-func makeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Database) types.Blocks {
+func makeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Database, seed int) types.Blocks {
 	bman.bc.currentBlock = parent
 	blocks := make(types.Blocks, max)
 	for i := 0; i < max; i++ {
-		block := makeBlock(bman, parent, i, db)
+		block := makeBlock(bman, parent, i, db, seed)
 		td, err := bman.processWithParent(block, parent)
 		if err != nil {
 			fmt.Println("process with parent failed", err)
@@ -87,9 +100,7 @@ func makeChain(bman *BlockProcessor, parent *types.Block, max int, db ethutil.Da
 		block.Td = td
 		blocks[i] = block
 		parent = block
-		fmt.Printf("New Block: %x\n", block.Hash())
 	}
-	fmt.Println("Done making chain")
 	return blocks
 }
 
@@ -113,7 +124,7 @@ func newBlockProcessor(db ethutil.Database, txpool *TxPool, cman *ChainManager,
 	return bman
 }
 
-// Make a new canonical chain by running InsertChain
+// Make a new, deterministic canonical chain by running InsertChain
 // on result of makeChain
 func newCanonical(n int, db ethutil.Database) (*BlockProcessor, error) {
 	eventMux := &event.TypeMux{}
@@ -125,7 +136,7 @@ func newCanonical(n int, db ethutil.Database) (*BlockProcessor, error) {
 	if n == 0 {
 		return bman, nil
 	}
-	lchain := makeChain(bman, parent, n, db)
-	bman.bc.InsertChain(lchain)
-	return bman, nil
+	lchain := makeChain(bman, parent, n, db, CanonicalSeed)
+	err := bman.bc.InsertChain(lchain)
+	return bman, err
 }
diff --git a/core/chain_manager_test.go b/core/chain_manager_test.go
index e6614212f2..b562b677d2 100644
--- a/core/chain_manager_test.go
+++ b/core/chain_manager_test.go
@@ -24,12 +24,6 @@ func init() {
 
 // Test fork of length N starting from block i
 func testFork(t *testing.T, bman *BlockProcessor, i, N int, f func(td1, td2 *big.Int)) {
-	fmt.Println("Testing Fork!")
-	var b *types.Block = nil
-	if i > 0 {
-		b = bman.bc.GetBlockByNumber(uint64(i))
-	}
-	_ = b
 	// switch databases to process the new chain
 	db, err := ethdb.NewMemDatabase()
 	if err != nil {
@@ -40,13 +34,25 @@ func testFork(t *testing.T, bman *BlockProcessor, i, N int, f func(td1, td2 *big
 	if err != nil {
 		t.Fatal("could not make new canonical in testFork", err)
 	}
+	// asert the bmans have the same block at i
+	bi1 := bman.bc.GetBlockByNumber(uint64(i)).Hash()
+	bi2 := bman2.bc.GetBlockByNumber(uint64(i)).Hash()
+	if bytes.Compare(bi1, bi2) != 0 {
+		t.Fatal("chains do not have the same hash at height", i)
+	}
+
 	bman2.bc.SetProcessor(bman2)
 
+	// extend the fork
 	parent := bman2.bc.CurrentBlock()
-	chainB := makeChain(bman2, parent, N, db)
-	bman2.bc.InsertChain(chainB)
+	chainB := makeChain(bman2, parent, N, db, ForkSeed)
+	err = bman2.bc.InsertChain(chainB)
+	if err != nil {
+		t.Fatal("Insert chain error for fork:", err)
+	}
 
 	tdpre := bman.bc.Td()
+	// Test the fork's blocks on the original chain
 	td, err := testChain(chainB, bman)
 	if err != nil {
 		t.Fatal("expected chainB not to give errors:", err)
@@ -55,6 +61,14 @@ func testFork(t *testing.T, bman *BlockProcessor, i, N int, f func(td1, td2 *big
 	f(tdpre, td)
 }
 
+func printChain(bc *ChainManager) {
+	for i := bc.CurrentBlock().Number().Uint64(); i > 0; i-- {
+		b := bc.GetBlockByNumber(uint64(i))
+		fmt.Printf("\t%x\n", b.Hash())
+	}
+}
+
+// process blocks against a chain
 func testChain(chainB types.Blocks, bman *BlockProcessor) (*big.Int, error) {
 	td := new(big.Int)
 	for _, block := range chainB {
@@ -102,12 +116,13 @@ func insertChain(done chan bool, chainMan *ChainManager, chain types.Blocks, t *
 }
 
 func TestExtendCanonical(t *testing.T) {
+	CanonicalLength := 5
 	db, err := ethdb.NewMemDatabase()
 	if err != nil {
 		t.Fatal("Failed to create db:", err)
 	}
 	// make first chain starting from genesis
-	bman, err := newCanonical(5, db)
+	bman, err := newCanonical(CanonicalLength, db)
 	if err != nil {
 		t.Fatal("Could not make new canonical chain:", err)
 	}
@@ -116,11 +131,11 @@ func TestExtendCanonical(t *testing.T) {
 			t.Error("expected chainB to have higher difficulty. Got", td2, "expected more than", td1)
 		}
 	}
-	// Start fork from current height (5)
-	testFork(t, bman, 5, 1, f)
-	testFork(t, bman, 5, 2, f)
-	testFork(t, bman, 5, 5, f)
-	testFork(t, bman, 5, 10, f)
+	// Start fork from current height (CanonicalLength)
+	testFork(t, bman, CanonicalLength, 1, f)
+	testFork(t, bman, CanonicalLength, 2, f)
+	testFork(t, bman, CanonicalLength, 5, f)
+	testFork(t, bman, CanonicalLength, 10, f)
 }
 
 func TestShorterFork(t *testing.T) {
@@ -189,6 +204,7 @@ func TestEqualFork(t *testing.T) {
 	}
 	// Sum of numbers must be equal to 10
 	// for this to be an equal fork
+	testFork(t, bman, 0, 10, f)
 	testFork(t, bman, 1, 9, f)
 	testFork(t, bman, 2, 8, f)
 	testFork(t, bman, 5, 5, f)
@@ -215,7 +231,7 @@ func TestBrokenChain(t *testing.T) {
 	}
 	bman2.bc.SetProcessor(bman2)
 	parent := bman2.bc.CurrentBlock()
-	chainB := makeChain(bman2, parent, 5, db2)
+	chainB := makeChain(bman2, parent, 5, db2, ForkSeed)
 	chainB = chainB[1:]
 	_, err = testChain(chainB, bman)
 	if err == nil {
diff --git a/core/transaction_pool.go b/core/transaction_pool.go
index 860f57dc30..bd377f6790 100644
--- a/core/transaction_pool.go
+++ b/core/transaction_pool.go
@@ -121,7 +121,7 @@ func (self *TxPool) add(tx *types.Transaction) error {
 	if len(tx.From()) > 0 {
 		from = ethutil.Bytes2Hex(tx.From()[:4])
 	} else {
-		from = "INVALID"
+		return errors.New(fmt.Sprintf("FROM ADDRESS MUST BE POSITIVE (was %v)", tx.From()))
 	}
 	txplogger.Debugf("(t) %x => %s (%v) %x\n", from, to, tx.Value, tx.Hash())
 
diff --git a/core/types/block.go b/core/types/block.go
index f637b5c3b8..a37038f739 100644
--- a/core/types/block.go
+++ b/core/types/block.go
@@ -48,7 +48,20 @@ type Header struct {
 }
 
 func (self *Header) rlpData(withNonce bool) []interface{} {
-	fields := []interface{}{self.ParentHash, self.UncleHash, self.Coinbase, self.Root, self.TxHash, self.ReceiptHash, self.Bloom, self.Difficulty, self.Number, self.GasLimit, self.GasUsed, self.Time, self.Extra}
+	fields := []interface{}{
+		self.ParentHash,
+		self.UncleHash,
+		self.Coinbase,
+		self.Root,
+		self.TxHash,
+		self.ReceiptHash,
+		self.Bloom,
+		self.Difficulty,
+		self.Number,
+		self.GasLimit,
+		self.GasUsed,
+		self.Time,
+		self.Extra}
 	if withNonce {
 		fields = append(fields, self.Nonce, self.MixDigest, self.SeedHash)
 	}
diff --git a/miner/agent.go b/miner/agent.go
index 9046f5d5ab..5b2bfdb06c 100644
--- a/miner/agent.go
+++ b/miner/agent.go
@@ -69,8 +69,8 @@ done:
 
 func (self *CpuMiner) mine(block *types.Block) {
 	minerlogger.Infof("(re)started agent[%d]. mining...\n", self.index)
-	nonce := self.pow.Search(block, self.quitCurrentOp)
+	nonce, mixDigest, seedHash := self.pow.Search(block, self.quitCurrentOp)
 	if nonce != nil {
-		self.returnCh <- Work{block.Number().Uint64(), nonce}
+		self.returnCh <- Work{block.Number().Uint64(), nonce, mixDigest, seedHash}
 	}
 }
diff --git a/miner/worker.go b/miner/worker.go
index 4f0909302c..201367fdf9 100644
--- a/miner/worker.go
+++ b/miner/worker.go
@@ -42,8 +42,10 @@ func env(block *types.Block, eth core.Backend) *environment {
 }
 
 type Work struct {
-	Number uint64
-	Nonce  []byte
+	Number    uint64
+	Nonce     []byte
+	MixDigest []byte
+	SeedHash  []byte
 }
 
 type Agent interface {
@@ -138,9 +140,12 @@ out:
 func (self *worker) wait() {
 	for {
 		for work := range self.recv {
+			// Someone Successfully Mined!
 			block := self.current.block
 			if block.Number().Uint64() == work.Number && block.Nonce() == nil {
 				self.current.block.Header().Nonce = work.Nonce
+				self.current.block.Header().MixDigest = work.MixDigest
+				self.current.block.Header().SeedHash = work.SeedHash
 
 				if err := self.chain.InsertChain(types.Blocks{self.current.block}); err == nil {
 					self.mux.Post(core.NewMinedBlockEvent{self.current.block})
diff --git a/pow/block.go b/pow/block.go
index 129f96fd32..eb07bc86e7 100644
--- a/pow/block.go
+++ b/pow/block.go
@@ -1,12 +1,20 @@
 package pow
 
-import "math/big"
+import (
+	"github.com/ethereum/go-ethereum/core/types"
+	"math/big"
+)
 
 type Block interface {
 	Difficulty() *big.Int
 	HashNoNonce() []byte
 	Nonce() []byte
-	Number() *big.Int
 	MixDigest() []byte
 	SeedHash() []byte
+	NumberU64() uint64
+}
+
+type ChainManager interface {
+	GetBlockByNumber(uint64) *types.Block
+	CurrentBlock() *types.Block
 }
diff --git a/pow/dagger/dagger.go b/pow/dagger/dagger.go
index 310f8abddd..3da7683d56 100644
--- a/pow/dagger/dagger.go
+++ b/pow/dagger/dagger.go
@@ -44,7 +44,7 @@ func (dag *Dagger) Find(obj *big.Int, resChan chan int64) {
 	resChan <- 0
 }
 
-func (dag *Dagger) Search(hash, diff *big.Int) *big.Int {
+func (dag *Dagger) Search(hash, diff *big.Int) ([]byte, []byte, []byte) {
 	// TODO fix multi threading. Somehow it results in the wrong nonce
 	amountOfRoutines := 1
 
@@ -69,7 +69,7 @@ func (dag *Dagger) Search(hash, diff *big.Int) *big.Int {
 		}
 	}
 
-	return big.NewInt(res)
+	return big.NewInt(res).Bytes(), nil, nil
 }
 
 func (dag *Dagger) Verify(hash, diff, nonce *big.Int) bool {
diff --git a/pow/dash/crypto.c b/pow/dash/crypto.c
deleted file mode 100644
index 9c5a62d16a..0000000000
--- a/pow/dash/crypto.c
+++ /dev/null
@@ -1,5 +0,0 @@
-extern char *Sha3(char *, int);
-char *sha3_cgo(char *data, int l)
-{
-	return Sha3(data, l);
-}
diff --git a/pow/dash/crypto.go b/pow/dash/crypto.go
deleted file mode 100644
index 0644a54ae5..0000000000
--- a/pow/dash/crypto.go
+++ /dev/null
@@ -1,14 +0,0 @@
-package dash
-
-/*
-char *sha3_cgo(char *, int); // Forward declaration
-*/
-import "C"
-import (
-	"github.com/ethereum/go-ethereum/crypto"
-)
-
-//export Sha3
-func Sha3(data []byte, l int) []byte {
-	return crypto.Sha3(data)
-}
diff --git a/pow/ezp/pow.go b/pow/ezp/pow.go
index 8808d7ce09..49854c3d02 100644
--- a/pow/ezp/pow.go
+++ b/pow/ezp/pow.go
@@ -32,7 +32,7 @@ func (pow *EasyPow) Turbo(on bool) {
 	pow.turbo = on
 }
 
-func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}) []byte {
+func (pow *EasyPow) Search(block pow.Block, stop <-chan struct{}) ([]byte, []byte, []byte) {
 	r := rand.New(rand.NewSource(time.Now().UnixNano()))
 	hash := block.HashNoNonce()
 	diff := block.Difficulty()
@@ -57,7 +57,7 @@ empty:
 	for {
 		select {
 		case <-stop:
-			return nil
+			return nil, nil, nil
 		default:
 			i++
 
@@ -67,7 +67,7 @@ empty:
 
 			sha := crypto.Sha3(big.NewInt(r.Int63()).Bytes())
 			if verify(hash, diff, sha) {
-				return sha
+				return sha, nil, nil
 			}
 		}
 
@@ -75,8 +75,6 @@ empty:
 			time.Sleep(20 * time.Microsecond)
 		}
 	}
-
-	return nil
 }
 
 func (pow *EasyPow) Verify(block pow.Block) bool {
diff --git a/pow/pow.go b/pow/pow.go
index c94ee40ba4..11aecbd6bc 100644
--- a/pow/pow.go
+++ b/pow/pow.go
@@ -1,7 +1,7 @@
 package pow
 
 type PoW interface {
-	Search(block Block, stop <-chan struct{}) []byte
+	Search(block Block, stop <-chan struct{}) ([]byte, []byte, []byte)
 	Verify(block Block) bool
 	GetHashrate() int64
 	Turbo(bool)