From 929e457b174fde48914715e1464626bfaed001e1 Mon Sep 17 00:00:00 2001 From: Robert Haase Date: Sat, 31 Jul 2021 15:56:35 +0200 Subject: [PATCH 1/6] basic multi-GPU support works with push, execute, execute_separable_kernel, copy and gaussian_blur only so far --- pyclesperanto_prototype/_tier0/__init__.py | 2 +- pyclesperanto_prototype/_tier0/_create.py | 12 +++--- pyclesperanto_prototype/_tier0/_device.py | 8 ++++ pyclesperanto_prototype/_tier0/_execute.py | 1 - pyclesperanto_prototype/_tier0/_push.py | 8 ++-- pyclesperanto_prototype/_tier0/_pycl.py | 3 +- pyclesperanto_prototype/_tier0/_utils.py | 3 ++ pyclesperanto_prototype/_tier1/_copy.py | 8 ++-- .../_tier1/_execute_separable_kernel.py | 26 +++++++------ .../_tier1/_gaussian_blur.py | 9 ++--- tests/test_multi_gpu_support.py | 38 +++++++++++++++++++ 11 files changed, 86 insertions(+), 32 deletions(-) create mode 100644 pyclesperanto_prototype/_tier0/_utils.py create mode 100644 tests/test_multi_gpu_support.py diff --git a/pyclesperanto_prototype/_tier0/__init__.py b/pyclesperanto_prototype/_tier0/__init__.py index df04a6fb..c074d101 100644 --- a/pyclesperanto_prototype/_tier0/__init__.py +++ b/pyclesperanto_prototype/_tier0/__init__.py @@ -35,7 +35,7 @@ from ._plugin_function import plugin_function from ._types import Image from ._cl_info import cl_info -from ._device import get_device, select_device, set_device_scoring_key +from ._device import get_device, select_device, set_device_scoring_key, new_device, Device from ._cl_image import create_image, empty_image_like, empty_image from ._available_device_names import available_device_names from ._set_wait_for_kernel_finish import set_wait_for_kernel_finish \ No newline at end of file diff --git a/pyclesperanto_prototype/_tier0/_create.py b/pyclesperanto_prototype/_tier0/_create.py index 01c2da40..05af7f8f 100644 --- a/pyclesperanto_prototype/_tier0/_create.py +++ b/pyclesperanto_prototype/_tier0/_create.py @@ -1,8 +1,8 @@ from ._pycl import OCLArray import numpy as np +from ._device import Device, get_device - -def create(dimensions, dtype=np.float32): +def create(dimensions, dtype=np.float32, device:Device = None): """ Convenience method for creating images on the GPU. This method basically does the same as in CLIJ: @@ -18,18 +18,20 @@ def create(dimensions, dtype=np.float32): if isinstance(dimensions, OCLArray) else tuple(dimensions) # reverses a list/tuple ) - return OCLArray.empty(dimensions, dtype) + if device is None: + device = get_device() + return device.empty(dimensions, dtype) def create_zyx(dimensions): return create(dimensions[::-1]) -def create_like(*args): +def create_like(*args, device:Device = None): dimensions = args[0] if isinstance(dimensions, OCLArray): dimensions = dimensions.shape elif isinstance(dimensions, np.ndarray): dimensions = dimensions.shape[::-1] - return create(dimensions) + return create(dimensions, device=device) def create_binary_like(*args): dimensions = args[0] diff --git a/pyclesperanto_prototype/_tier0/_device.py b/pyclesperanto_prototype/_tier0/_device.py index dc34ffdc..27d7c415 100644 --- a/pyclesperanto_prototype/_tier0/_device.py +++ b/pyclesperanto_prototype/_tier0/_device.py @@ -1,6 +1,9 @@ import pyopencl as cl +from pyopencl import array from typing import Callable, List, Optional from functools import lru_cache +from ._utils import prepare +import numpy as np # TODO: we should discuss whether this collection is actually the best thing to pass # around. might be better to work lower level with contexts... @@ -25,6 +28,11 @@ def program_from_source(self, source): from ._program import OCLProgram return OCLProgram(src_str=source, dev=self) + def from_array(self, arr, *args, **kwargs): + return array.to_device(self.queue, prepare(arr), *args, **kwargs) + + def empty(self, shape, dtype=np.float32): + return array.empty(self.queue, shape, dtype) def score_device(dev: cl.Device) -> float: score = 4e12 if dev.type == cl.device_type.GPU else 2e12 diff --git a/pyclesperanto_prototype/_tier0/_execute.py b/pyclesperanto_prototype/_tier0/_execute.py index 0573dddc..e5ddd395 100644 --- a/pyclesperanto_prototype/_tier0/_execute.py +++ b/pyclesperanto_prototype/_tier0/_execute.py @@ -98,7 +98,6 @@ def execute(anchor, opencl_kernel_filename, kernel_name, global_size, parameters # time_stamp = time.time() defines = ["#define MAX_ARRAY_SIZE 1000"] - if image_size_independent_kernel_compilation: defines.extend([ "#define GET_IMAGE_WIDTH(image_key) image_size_ ## image_key ## _width", diff --git a/pyclesperanto_prototype/_tier0/_push.py b/pyclesperanto_prototype/_tier0/_push.py index 1847ad5a..69855a59 100644 --- a/pyclesperanto_prototype/_tier0/_push.py +++ b/pyclesperanto_prototype/_tier0/_push.py @@ -1,8 +1,8 @@ import numpy as np from ._pycl import OCLArray +from ._device import Device, get_device - -def push(any_array): +def push(any_array, device : Device = None): """Copies an image to GPU memory and returns its handle .. deprecated:: 0.6.0 @@ -36,7 +36,9 @@ def push(any_array): any_array = np.asarray(any_array.get()) float_arr = any_array.astype(np.float32) - return OCLArray.from_array(float_arr) + if device is None: + device = get_device() + return device.from_array(float_arr) def push_zyx(any_array): diff --git a/pyclesperanto_prototype/_tier0/_pycl.py b/pyclesperanto_prototype/_tier0/_pycl.py index f7f9ef09..d47e1dc9 100644 --- a/pyclesperanto_prototype/_tier0/_pycl.py +++ b/pyclesperanto_prototype/_tier0/_pycl.py @@ -6,6 +6,7 @@ from pyopencl import characterize from pyopencl import array from ._device import get_device +from ._utils import prepare """ Below here, vendored from GPUtools Copyright (c) 2016, Martin Weigert @@ -105,8 +106,6 @@ def _wrap_OCLArray(cls): WRAPPER """ - def prepare(arr): - return np.require(arr, None, "C") @classmethod def from_array(cls, arr, *args, **kwargs): diff --git a/pyclesperanto_prototype/_tier0/_utils.py b/pyclesperanto_prototype/_tier0/_utils.py new file mode 100644 index 00000000..56a7d2c9 --- /dev/null +++ b/pyclesperanto_prototype/_tier0/_utils.py @@ -0,0 +1,3 @@ +import numpy as np +def prepare(arr): + return np.require(arr, None, "C") diff --git a/pyclesperanto_prototype/_tier1/_copy.py b/pyclesperanto_prototype/_tier1/_copy.py index 6dfcaf16..72094be6 100644 --- a/pyclesperanto_prototype/_tier1/_copy.py +++ b/pyclesperanto_prototype/_tier1/_copy.py @@ -1,9 +1,9 @@ from .._tier0 import execute from .._tier0 import plugin_function -from .._tier0 import Image +from .._tier0 import Image, Device @plugin_function -def copy(source : Image, destination : Image = None): +def copy(source : Image, destination : Image = None, device: Device = None): """Copies an image.
f(x) = x
@@ -12,6 +12,8 @@ def copy(source : Image, destination : Image = None): ---------- source : Image destination : Image + device : Device, optional + OpenCL-device to operate on Returns ------- @@ -33,5 +35,5 @@ def copy(source : Image, destination : Image = None): "src":source } - execute(__file__, '../clij-opencl-kernels/kernels/copy_' + str(len(destination.shape)) + 'd_x.cl', 'copy_' + str(len(destination.shape)) + 'd', destination.shape, parameters) + execute(__file__, '../clij-opencl-kernels/kernels/copy_' + str(len(destination.shape)) + 'd_x.cl', 'copy_' + str(len(destination.shape)) + 'd', destination.shape, parameters, device=device) return destination diff --git a/pyclesperanto_prototype/_tier1/_execute_separable_kernel.py b/pyclesperanto_prototype/_tier1/_execute_separable_kernel.py index 17d6cdb8..d28a313b 100644 --- a/pyclesperanto_prototype/_tier1/_execute_separable_kernel.py +++ b/pyclesperanto_prototype/_tier1/_execute_separable_kernel.py @@ -1,19 +1,21 @@ from pyclesperanto_prototype._tier0._create import create_like from pyclesperanto_prototype._tier1 import copy from pyclesperanto_prototype._tier0._execute import execute +from pyclesperanto_prototype._tier0._device import Device + from ._set import set -def execute_separable_kernel(src, dst, anchor, opencl_kernel_filename, kernel_name, kernel_size_x, kernel_size_y, kernel_size_z, sigma_x, sigma_y, sigma_z, dimensions) : +def execute_separable_kernel(src, dst, anchor, opencl_kernel_filename, kernel_name, kernel_size_x, kernel_size_y, kernel_size_z, sigma_x, sigma_y, sigma_z, dimensions, device : Device = None) : n = [kernel_size_x, kernel_size_y, kernel_size_z] sigma = [sigma_x, sigma_y, sigma_z] # todo: ensure that temp1 and temp2 become of type float - temp1 = create_like(src); - temp2 = create_like(src); + temp1 = create_like(src, device=device) + temp2 = create_like(src, device=device) if (sigma[0] > 0) : - param_src = src; + param_src = src if (dimensions == 2): param_dst = temp1 else : @@ -27,13 +29,13 @@ def execute_separable_kernel(src, dst, anchor, opencl_kernel_filename, kernel_na "s": float(sigma[0]) } - execute(anchor, opencl_kernel_filename, kernel_name, src.shape, parameters) + execute(anchor, opencl_kernel_filename, kernel_name, src.shape, parameters, device=device) else : if (dimensions == 2): - copy(src, temp1) + copy(src, temp1, device=device) else : - copy(src, temp2) + copy(src, temp2, device=device) if (sigma[1] > 0) : if (dimensions == 2): @@ -51,12 +53,12 @@ def execute_separable_kernel(src, dst, anchor, opencl_kernel_filename, kernel_na "s": float(sigma[1]) } - execute(anchor, opencl_kernel_filename, kernel_name, src.shape, parameters) + execute(anchor, opencl_kernel_filename, kernel_name, src.shape, parameters, device=device) else : if (dimensions == 2): - copy(temp1, dst) + copy(temp1, dst, device=device) else : - copy(temp2, temp1) + copy(temp2, temp1, device=device) if (dimensions == 3): if (sigma[2] > 0): @@ -68,8 +70,8 @@ def execute_separable_kernel(src, dst, anchor, opencl_kernel_filename, kernel_na "N": int(n[2]), "s": float(sigma[2]) } - execute(anchor, opencl_kernel_filename, kernel_name, src.shape, parameters) + execute(anchor, opencl_kernel_filename, kernel_name, src.shape, parameters, device=device) else: - copy(temp1, dst) + copy(temp1, dst, device=device) return dst \ No newline at end of file diff --git a/pyclesperanto_prototype/_tier1/_gaussian_blur.py b/pyclesperanto_prototype/_tier1/_gaussian_blur.py index 50b4156f..6cfebede 100644 --- a/pyclesperanto_prototype/_tier1/_gaussian_blur.py +++ b/pyclesperanto_prototype/_tier1/_gaussian_blur.py @@ -1,10 +1,10 @@ from .._tier0 import sigma_to_kernel_size from .._tier0 import plugin_function -from .._tier0 import Image +from .._tier0 import Image, Device from ._execute_separable_kernel import execute_separable_kernel @plugin_function(categories=['filter', 'denoise', 'in assistant'], priority=1) -def gaussian_blur(source : Image, destination : Image = None, sigma_x : float = 0, sigma_y : float = 0, sigma_z : float = 0): +def gaussian_blur(source : Image, destination : Image = None, sigma_x : float = 0, sigma_y : float = 0, sigma_z : float = 0, device : Device = None): """Computes the Gaussian blurred image of an image given sigma values in X, Y and Z. @@ -34,8 +34,6 @@ def gaussian_blur(source : Image, destination : Image = None, sigma_x : float = ---------- .. [1] https://clij.github.io/clij2-docs/reference_gaussianBlur3D """ - - kernel_size_x = sigma_to_kernel_size(sigma_x) kernel_size_y = sigma_to_kernel_size(sigma_y) kernel_size_z = sigma_to_kernel_size(sigma_z) @@ -52,7 +50,8 @@ def gaussian_blur(source : Image, destination : Image = None, sigma_x : float = sigma_x, sigma_y, sigma_z, - len(destination.shape) + len(destination.shape), + device=device ) return destination diff --git a/tests/test_multi_gpu_support.py b/tests/test_multi_gpu_support.py new file mode 100644 index 00000000..4d302da8 --- /dev/null +++ b/tests/test_multi_gpu_support.py @@ -0,0 +1,38 @@ +import numpy as np +import pyclesperanto_prototype as cle +def test_single_gpu_support(): + dev1 = cle.new_device("RTX") + + print(dev1) + + image = np.random.random((2048,2048,10)) + gpu_input1 = cle.push(image, device=dev1) + + gpu_output1 = cle.create_like(gpu_input1, device=dev1) + + cle.gaussian_blur(gpu_input1, gpu_output1, sigma_x=1, sigma_y=1, device=dev1) + + output1 = cle.pull(gpu_output1) + assert not np.allclose(output1, image) + +def test_multi_gpu_support(): + dev1 = cle.new_device("RTX") + dev2 = cle.new_device("gfx") + + print(dev1) + print(dev2) + + image = np.random.random((2048,2048,10)) + gpu_input1 = cle.push(image, device=dev1) + gpu_input2 = cle.push(image, device=dev2) + + gpu_output1 = cle.create_like(gpu_input1, device=dev1) + gpu_output2 = cle.create_like(gpu_input2, device=dev2) + + cle.gaussian_blur(gpu_input1, gpu_output1, sigma_x=1, sigma_y=1, device=dev1) + cle.gaussian_blur(gpu_input2, gpu_output2, sigma_x=1, sigma_y=1, device=dev2) + + output1 = cle.pull(gpu_output1) + output2 = cle.pull(gpu_output2) + + assert(np.allclose(output1, output2)) From 358f84762965c78edc584da82df6d83524610dc3 Mon Sep 17 00:00:00 2001 From: Robert Haase Date: Sat, 31 Jul 2021 16:16:22 +0200 Subject: [PATCH 2/6] output_creators accept a device now --- pyclesperanto_prototype/_tier0/_create.py | 84 +++++++++---------- pyclesperanto_prototype/_tier0/_device.py | 1 - .../_tier0/_plugin_function.py | 14 +++- tests/test_multi_gpu_support.py | 26 +++++- 4 files changed, 78 insertions(+), 47 deletions(-) diff --git a/pyclesperanto_prototype/_tier0/_create.py b/pyclesperanto_prototype/_tier0/_create.py index 05af7f8f..24ed7f38 100644 --- a/pyclesperanto_prototype/_tier0/_create.py +++ b/pyclesperanto_prototype/_tier0/_create.py @@ -22,8 +22,8 @@ def create(dimensions, dtype=np.float32, device:Device = None): device = get_device() return device.empty(dimensions, dtype) -def create_zyx(dimensions): - return create(dimensions[::-1]) +def create_zyx(dimensions, device:Device = None): + return create(dimensions[::-1], device=device) def create_like(*args, device:Device = None): dimensions = args[0] @@ -33,42 +33,42 @@ def create_like(*args, device:Device = None): dimensions = dimensions.shape[::-1] return create(dimensions, device=device) -def create_binary_like(*args): +def create_binary_like(*args, device:Device = None): dimensions = args[0] if isinstance(dimensions, OCLArray): dimensions = dimensions.shape elif isinstance(dimensions, np.ndarray): dimensions = dimensions.shape[::-1] - return create(dimensions, np.uint8) + return create(dimensions, np.uint8, device=device) -def create_labels_like(*args): +def create_labels_like(*args, device:Device = None): dimensions = args[0] if isinstance(dimensions, OCLArray): dimensions = dimensions.shape elif isinstance(dimensions, np.ndarray): dimensions = dimensions.shape[::-1] - return create(dimensions, np.uint32) + return create(dimensions, np.uint32, device=device) -def create_pointlist_from_labelmap(input:OCLArray, *args): +def create_pointlist_from_labelmap(input:OCLArray, *args, device:Device = None): from .._tier2 import maximum_of_all_pixels number_of_labels = int(maximum_of_all_pixels(input)) number_of_dimensions = len(input.shape) - return create([number_of_dimensions, number_of_labels]) + return create([number_of_dimensions, number_of_labels], device=device) -def create_vector_from_labelmap(input: OCLArray, *args): +def create_vector_from_labelmap(input: OCLArray, *args, device:Device = None): from .._tier2 import maximum_of_all_pixels number_of_labels = int(maximum_of_all_pixels(input)) + 1 - return create([1, number_of_labels]) + return create([1, number_of_labels], device=device) -def create_matrix_from_pointlists(pointlist1:OCLArray, pointlist2:OCLArray): +def create_matrix_from_pointlists(pointlist1:OCLArray, pointlist2:OCLArray, device:Device = None): width = pointlist1.shape[1] + 1 height = pointlist2.shape[1] + 1 - return create([width, height]) + return create([width, height], device=device) -def create_from_pointlist(pointlist: OCLArray, *args): +def create_from_pointlist(pointlist: OCLArray, *args, device:Device = None): from .._tier1 import maximum_x_projection from .._tier0 import pull @@ -76,73 +76,73 @@ def create_from_pointlist(pointlist: OCLArray, *args): max_pos = max_pos[0] if len(max_pos) == 3: # 3D image requested - destination = create([max_pos[2] + 1, max_pos[1] + 1, max_pos[0] + 1]) + destination = create([max_pos[2] + 1, max_pos[1] + 1, max_pos[0] + 1], device=device) elif len(max_pos) == 2: # 2D image requested - destination = create([max_pos[1] + 1, max_pos[0] + 1]) + destination = create([max_pos[1] + 1, max_pos[0] + 1], device=device) else: raise Exception("Size not supported: " + str(max_pos)) return destination -def create_square_matrix_from_pointlist(pointlist1:OCLArray): +def create_square_matrix_from_pointlist(pointlist1:OCLArray, device:Device = None): width = pointlist1.shape[1] + 1 - return create([width, width]) + return create([width, width], device=device) -def create_square_matrix_from_labelmap(labelmap: OCLArray): +def create_square_matrix_from_labelmap(labelmap: OCLArray, device:Device = None): from .._tier2 import maximum_of_all_pixels width = int(maximum_of_all_pixels(labelmap) + 1) - return create([width, width]) + return create([width, width], device=device) -def create_square_matrix_from_two_labelmaps(labelmap1: OCLArray, labelmap2: OCLArray): +def create_square_matrix_from_two_labelmaps(labelmap1: OCLArray, labelmap2: OCLArray, device:Device = None): from .._tier2 import maximum_of_all_pixels width = int(maximum_of_all_pixels(labelmap1) + 1) height = int(maximum_of_all_pixels(labelmap2) + 1) - return create([height, width]) + return create([height, width],device=device) -def create_vector_from_square_matrix(square_matrix : OCLArray, *args): - return create([1, square_matrix.shape[0]]) +def create_vector_from_square_matrix(square_matrix : OCLArray, *args, device:Device = None): + return create([1, square_matrix.shape[0]], device=device) -def create_2d_xy(input): +def create_2d_xy(input, device:Device = None): if len(input.shape) == 3: - return create([input.shape[2], input.shape[1]]) + return create([input.shape[2], input.shape[1]], device=device) else: - return create([input.shape[1], input.shape[0]]) + return create([input.shape[1], input.shape[0]], device=device) -def create_2d_yx(input): +def create_2d_yx(input, device:Device = None): if len(input.shape) == 3: - return create([input.shape[1], input.shape[2]]) + return create([input.shape[1], input.shape[2]], device=device) else: - return create([input.shape[0], 1]) + return create([input.shape[0], 1], device=device) -def create_2d_zy(input): +def create_2d_zy(input, device:Device = None): if len(input.shape) == 3: - return create([input.shape[0], input.shape[1]]) + return create([input.shape[0], input.shape[1]], device=device) else: - return create([1, input.shape[0]]) + return create([1, input.shape[0]], device=device) -def create_2d_yz(input): +def create_2d_yz(input, device:Device = None): if len(input.shape) == 3: - return create([input.shape[1], input.shape[0]]) + return create([input.shape[1], input.shape[0]], device=device) else: - return create([input.shape[0], 1]) + return create([input.shape[0], 1], device=device) -def create_2d_zx(input): +def create_2d_zx(input, device:Device = None): if len(input.shape) == 3: - return create([input.shape[0], input.shape[2]]) + return create([input.shape[0], input.shape[2]], device=device) else: - return create([1, input.shape[1]]) + return create([1, input.shape[1]], device=device) -def create_2d_xz(input): +def create_2d_xz(input, device:Device = None): if len(input.shape) == 3: - return create([input.shape[2], input.shape[0]]) + return create([input.shape[2], input.shape[0]], device=device) else: - return create([input.shape[1], 1]) + return create([input.shape[1], 1], device=device) -def create_none(*args): +def create_none(*args, device:Device = None): return None \ No newline at end of file diff --git a/pyclesperanto_prototype/_tier0/_device.py b/pyclesperanto_prototype/_tier0/_device.py index 27d7c415..167317f8 100644 --- a/pyclesperanto_prototype/_tier0/_device.py +++ b/pyclesperanto_prototype/_tier0/_device.py @@ -118,4 +118,3 @@ def set_device_scoring_key(func: Callable[[cl.Device], int]) -> None: except Exception as e: raise ValueError(f"Scoring algorithm invalid: {e}") _current_device.score_key = func - diff --git a/pyclesperanto_prototype/_tier0/_plugin_function.py b/pyclesperanto_prototype/_tier0/_plugin_function.py index 921c0b77..84cf2e24 100644 --- a/pyclesperanto_prototype/_tier0/_plugin_function.py +++ b/pyclesperanto_prototype/_tier0/_plugin_function.py @@ -7,6 +7,7 @@ from ._create import create_like from ._types import Image, is_image from ._push import push +from ._device import get_device @curry @@ -48,6 +49,8 @@ def plugin_function( function.categories = categories function.priority = priority + + @wraps(function) def worker_function(*args, **kwargs): sig = inspect.signature(function) @@ -59,13 +62,20 @@ def worker_function(*args, **kwargs): # https://docs.python.org/3/library/inspect.html#inspect.BoundArguments.apply_defaults bound.apply_defaults() + # determine on which GPU the operation should be executed and + # potentially, output images should be created on + if 'device' in kwargs.keys(): + device = kwargs['device'] + else: + device = get_device() + # copy images to GPU, and create output array if necessary for key, value in bound.arguments.items(): if is_image(value): bound.arguments[key] = push(value) if key in sig.parameters and sig.parameters[key].annotation is Image and value is None: - sig = inspect.signature(output_creator) - bound.arguments[key] = output_creator(*bound.args[:len(sig.parameters)]) + sig = inspect.signature(output_creator) # -1 because we add device by hand + bound.arguments[key] = output_creator(*bound.args[:len(sig.parameters) - 1], device=device) # call the decorated function return function(*bound.args, **bound.kwargs) diff --git a/tests/test_multi_gpu_support.py b/tests/test_multi_gpu_support.py index 4d302da8..d0d1ed40 100644 --- a/tests/test_multi_gpu_support.py +++ b/tests/test_multi_gpu_support.py @@ -15,6 +15,7 @@ def test_single_gpu_support(): output1 = cle.pull(gpu_output1) assert not np.allclose(output1, image) + def test_multi_gpu_support(): dev1 = cle.new_device("RTX") dev2 = cle.new_device("gfx") @@ -22,7 +23,7 @@ def test_multi_gpu_support(): print(dev1) print(dev2) - image = np.random.random((2048,2048,10)) + image = np.random.random((2048, 2048, 10)) gpu_input1 = cle.push(image, device=dev1) gpu_input2 = cle.push(image, device=dev2) @@ -35,4 +36,25 @@ def test_multi_gpu_support(): output1 = cle.pull(gpu_output1) output2 = cle.pull(gpu_output2) - assert(np.allclose(output1, output2)) + assert (np.allclose(output1, output2)) + + +def test_multi_gpu_support_output_creators(): + dev1 = cle.new_device("RTX") + dev2 = cle.new_device("gfx") + + print(dev1) + print(dev2) + + image = np.random.random((2048, 2048, 10)) + gpu_input1 = cle.push(image, device=dev1) + gpu_input2 = cle.push(image, device=dev2) + + gpu_output1 = cle.gaussian_blur(gpu_input1, sigma_x=1, sigma_y=1, device=dev1) + gpu_output2 = cle.gaussian_blur(gpu_input2, sigma_x=1, sigma_y=1, device=dev2) + + output1 = cle.pull(gpu_output1) + output2 = cle.pull(gpu_output2) + + assert (np.allclose(output1, output2)) + From 5296717f9db2c454b5097cac0b3c4ce4c440da35 Mon Sep 17 00:00:00 2001 From: Robert Haase Date: Sat, 31 Jul 2021 16:46:02 +0200 Subject: [PATCH 3/6] push must also send data to the same device by default --- pyclesperanto_prototype/_tier0/_plugin_function.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyclesperanto_prototype/_tier0/_plugin_function.py b/pyclesperanto_prototype/_tier0/_plugin_function.py index 84cf2e24..c2d21ea2 100644 --- a/pyclesperanto_prototype/_tier0/_plugin_function.py +++ b/pyclesperanto_prototype/_tier0/_plugin_function.py @@ -72,7 +72,7 @@ def worker_function(*args, **kwargs): # copy images to GPU, and create output array if necessary for key, value in bound.arguments.items(): if is_image(value): - bound.arguments[key] = push(value) + bound.arguments[key] = push(value, device=device) if key in sig.parameters and sig.parameters[key].annotation is Image and value is None: sig = inspect.signature(output_creator) # -1 because we add device by hand bound.arguments[key] = output_creator(*bound.args[:len(sig.parameters) - 1], device=device) From 1fd4e7bdf52bad2bf192cf9533c7761741c15941 Mon Sep 17 00:00:00 2001 From: Robert Haase Date: Sat, 31 Jul 2021 18:00:48 +0200 Subject: [PATCH 4/6] multi-threading technically works; not sure about performance --- tests/test_multi_threading.py | 48 +++++++++++++++++++++++++++++++++++ 1 file changed, 48 insertions(+) create mode 100644 tests/test_multi_threading.py diff --git a/tests/test_multi_threading.py b/tests/test_multi_threading.py new file mode 100644 index 00000000..b022f40d --- /dev/null +++ b/tests/test_multi_threading.py @@ -0,0 +1,48 @@ +import pyclesperanto_prototype as cle +import numpy as np +import time + +def test_multi_gpu_threading(): + import threading + import time + + class myThread(threading.Thread): + def __init__(self, threadID, name, counter, image): + threading.Thread.__init__(self) + self.threadID = threadID + self.name = name + self.counter = counter + self.device = cle.new_device(name) + self.image = cle.push(image, device=self.device) + + def run(self): + print("Starting " + self.name) + self.print_time() + print("Exiting " + self.name) + + def print_time(self): + while self.counter: + self.image = cle.gaussian_blur(self.image, sigma_x=15, sigma_y=15, sigma_z=15, device=self.device) + time.sleep(0.1) + print("%s: %s gaussian blur" % (self.name, time.ctime(time.time()))) + self.counter -= 1 + + image = np.random.random((100, 100, 10)) + + # Create new threads + thread1 = myThread(1, "RTX", 15, image) # RTX gpu + thread2 = myThread(2, "gfx", 12, image) # AMD gpu + thread3 = myThread(3, "Intel", 11, image) # Intel gpu + + # Start new Threads + thread1.start() + thread2.start() + thread3.start() + + print("Exiting Main Thread") + thread1.join() + thread2.join() + thread3.join() + + assert False + From 2ce44985a6adc711f0bba5a78880fb0f49758f79 Mon Sep 17 00:00:00 2001 From: Robert Haase Date: Sat, 31 Jul 2021 18:43:36 +0200 Subject: [PATCH 5/6] demo showing 2x speedup with parallelization on a RTX 3050 Ti mobile --- tests/test_multi_threading.py | 61 ++++++++++++++++++++++++++++++++++- 1 file changed, 60 insertions(+), 1 deletion(-) diff --git a/tests/test_multi_threading.py b/tests/test_multi_threading.py index b022f40d..2a7572d7 100644 --- a/tests/test_multi_threading.py +++ b/tests/test_multi_threading.py @@ -2,6 +2,8 @@ import numpy as np import time + + def test_multi_gpu_threading(): import threading import time @@ -39,10 +41,67 @@ def print_time(self): thread2.start() thread3.start() - print("Exiting Main Thread") thread1.join() thread2.join() thread3.join() + print("Exiting Main Thread") + +def test_slinge_gpu_multi_threading(): + import threading + import time + + class myThread(threading.Thread): + def __init__(self, threadID, name, counter, image): + threading.Thread.__init__(self) + self.threadID = threadID + self.name = "T" + str(threadID) + self.counter = counter + self.device = cle.new_device(name) + self.image = cle.push(image, device=self.device) + + def run(self): + #print("Starting " + self.name) + self.print_time() + #print("Exiting " + self.name) + + def print_time(self): + while self.counter: + self.image = cle.gaussian_blur(self.image, sigma_x=25, sigma_y=25, sigma_z=25, device=self.device) + time.sleep(0.1) # this is important; other threads can use this time better + #print("%s: %s gaussian blur" % (self.name, time.ctime(time.time()))) + self.counter -= 1 + + image = np.random.random((100, 100, 10)) + + num_tasks = 10 + gpu_name = "RTX" + + start_time = time.time() + # Create new threads + threads = [myThread(i + 1, gpu_name, 15, image) for i in range(num_tasks)] + + # Start new Threads + for thread in threads: + thread.start() + + # wait for finish + for thread in threads: + thread.join() + print("Parallel all done after ", time.time() - start_time, "s") + + # Create new threads + start_time = time.time() + threads = [myThread(i + 1, gpu_name, 15, image) for i in range(num_tasks)] + + for thread in threads: + # Start new Threads + thread.start() + # wait for finish + thread.join() + + print("Sequential all done after ", time.time() - start_time, "s") + + assert False From 9450673a2a42838069ca7af8328c972ce597213e Mon Sep 17 00:00:00 2001 From: Robert Haase Date: Sat, 31 Jul 2021 18:53:19 +0200 Subject: [PATCH 6/6] make test pass --- tests/test_multi_threading.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_multi_threading.py b/tests/test_multi_threading.py index 2a7572d7..d4206908 100644 --- a/tests/test_multi_threading.py +++ b/tests/test_multi_threading.py @@ -103,5 +103,5 @@ def print_time(self): print("Sequential all done after ", time.time() - start_time, "s") - assert False + #assert False