nScales = 6
showDebugPlots = True
# Load image
#image_input = np.asarray(Image.open("imagetest01.jpg").convert('RGBA'), dtype = np.uint8)
image_input = np.asarray(Image.open("butterfly_external_testimage.jpg").convert('RGBA'), dtype = np.uint8)
h, w, c = image_input.shape[0], image_input.shape[1], image_input.shape[2]
print(f"Source image has dimensions {h} x {w} x {c}")
# Compile our kernel
clkernel_Grayscale = '''
#define IMAGEHEIGHT {0}
#define IMAGEWIDTH {1}
'''.format(image_input.shape[1], image_input.shape[0]) + clkernel_Grayscale_Tpl
clProg_Grayscale = cl.Program(clContext, clkernel_Grayscale).build()
krnlGray = clProg_Grayscale.grayscale_RGBA2Pyrabuffer
# Image input buffer
clBufferIn = cl.Buffer(clContext, cl.mem_flags.READ_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf = image_input)
# Pyramid output buffer
npPyramidBuffer = np.empty((image_input.shape[1] * 2, image_input.shape[0] * 2), dtype = np.uint8)
clPyramidBuffer = cl.Buffer(clContext, cl.mem_flags.WRITE_ONLY | cl.mem_flags.COPY_HOST_PTR, hostbuf = npPyramidBuffer)
# Difference Kernel
clkernel_Difference = '''
#define IMAGEHEIGHT_DIFFPYRAMID {0}
#define IMAGEWIDTH_DIFFPYRAMID {1}
'''.format(int(image_input.shape[1] * 2), int(image_input.shape[0] * 2)) + clkernel_DifferencePyramidDifference
clProg_Difference = cl.Program(clContext, clkernel_Difference).build()
krnlDifference = clProg_Difference.grayscalePyramidDifference
# Would require / 4 for the ARGB -> Grayscale and * 4 for the 4 images so the size stays the same ...
curSize = clBufferIn.get_info(cl.mem_info.SIZE)
shapeX = image_input.shape[1]
shapeY = image_input.shape[0]
clBuffer_Scales = []
npBuffer_Scales = []
clProgH = []
clProgV = []
clKrnlH = []
clKrnlV = []
clProgSubsample = []
clKrnlSubsample = []
npTempBuffer = []
clTempBuffer = []
for iScale in range(nScales):
print(f"Scale {iScale}: {shapeX} x {shapeY} -> {curSize}")
npBuffer_Scales.append(np.empty(curSize, dtype=np.uint8))
clBuffer_Scales.append(cl.Buffer(clContext, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf = npBuffer_Scales[-1]))
# Blurring kernels
clkernel = '''
#define KERNELHALFWIDTH {0}
#define IMAGEHEIGHT {1}
#define IMAGEWIDTH {2}
#define CONVOLVE_HORIZONTAL 1
'''.format(int(len(imagekernel_GaussianBlurGrayscalePyramid)/2), shapeX, shapeY) + clkernel_GaussianBlurGrayscalePyramid
clProgH.append(cl.Program(clContext, clkernel).build())
clkernel = '''
#define KERNELHALFWIDTH {0}
#define IMAGEHEIGHT {1}
#define IMAGEWIDTH {2}
'''.format(int(len(imagekernel_GaussianBlurGrayscalePyramid)/2), shapeX, shapeY) + clkernel_GaussianBlurGrayscalePyramid
clProgV.append(cl.Program(clContext, clkernel).build())
clKrnlH.append(clProgH[-1].convolveGrayscalePyramid)
clKrnlV.append(clProgV[-1].convolveGrayscalePyramid)
# Temporary buffers
npTempBuffer.append(np.empty((shapeY, shapeX), dtype = np.uint8))
clTempBuffer.append(cl.Buffer(clContext, cl.mem_flags.READ_WRITE | cl.mem_flags.COPY_HOST_PTR, hostbuf = npTempBuffer[-1]))
# Subsampling kernels
clkernel = '''
#define IMAGEHEIGHT {0}
#define IMAGEWIDTH {1}
'''.format(shapeX, shapeY) + clkernel_SubsampleGrayscalePyramid
clProgSubsample.append(cl.Program(clContext, clkernel).build())
clKrnlSubsample.append(clProgSubsample[-1].subsampleGrayscalePyramid)
if shapeX % 2 == 0:
shapeX = shapeX + 1
if shapeY % 2 == 0:
shapeY = shapeY + 1
shapeX = int(shapeX / 2)
shapeY = int(shapeY / 2)
# curSize = int(curSize / 4)
curSize = (shapeX * shapeY) * 4
# Now execute the kernels on a single input image ...
durations = []
showDebugPlots = False
targetOffset_X = 0
targetOffset_Y = 0
for iIteration in range(500):
print(f"{iIteration} ... ", end = "")
stime = time()
# Grayscale kernel is only required once. Also grayscaling only runs once ...
krnlGray(clQueue, (w, h,), None , clBufferIn, clBuffer_Scales[0])
if showDebugPlots:
cl.enqueue_copy(clQueue, npBuffer_Scales[0], clBuffer_Scales[0]) #, origin=(0,0), region=(w,h))
plt.imshow(npBuffer_Scales[0].reshape((h*2, w*2)))
plt.show()
for iScale in range(nScales):
clKrnlH[iScale](clQueue, (w, h,), None , clBuffer_Scales[iScale], clTempBuffer[iScale], clBufferKernel, np.int32(0), np.int32(0), np.int32(0), np.int32(1))
clKrnlV[iScale](clQueue, (w, h,), None , clBuffer_Scales[iScale], clTempBuffer[iScale], clBufferKernel, np.int32(0), np.int32(0), np.int32(0), np.int32(1))
clKrnlH[iScale](clQueue, (w, h,), None , clBuffer_Scales[iScale], clTempBuffer[iScale], clBufferKernel, np.int32(0), np.int32(1), np.int32(1), np.int32(0))
clKrnlV[iScale](clQueue, (w, h,), None , clBuffer_Scales[iScale], clTempBuffer[iScale], clBufferKernel, np.int32(0), np.int32(1), np.int32(1), np.int32(0))
clKrnlH[iScale](clQueue, (w, h,), None , clBuffer_Scales[iScale], clTempBuffer[iScale], clBufferKernel, np.int32(1), np.int32(0), np.int32(1), np.int32(1))
clKrnlV[iScale](clQueue, (w, h,), None , clBuffer_Scales[iScale], clTempBuffer[iScale], clBufferKernel, np.int32(1), np.int32(0), np.int32(1), np.int32(1))
cl.enqueue_copy(clQueue, npBuffer_Scales[iScale], clBuffer_Scales[iScale]) #, origin=(0,0), region=(w,h))
if showDebugPlots:
plt.imshow(npBuffer_Scales[iScale].reshape((h*2, w*2)))
plt.show()
if iScale != (nScales - 1):
clKrnlSubsample[iScale](clQueue, (int(w / 2), int(h / 2),), None, clBuffer_Scales[iScale], clBuffer_Scales[iScale + 1])
# if showDebugPlots:
# cl.enqueue_copy(clQueue, npBuffer_Scales[iScale + 1], clBuffer_Scales[iScale + 1])
# plt.imshow(npBuffer_Scales[iScale + 1].reshape((h, w)))
# Perform difference calculation
krnlDifference(clQueue, (w, h,), None, clBuffer_Scales[iScale], clPyramidBuffer, np.int32(targetOffset_X), np.int32(targetOffset_Y), np.int32(2*h), np.int32(2*w))
targetOffset_X = targetOffset_X + h
targetOffset_Y = targetOffset_Y + w
w = int(w / 2)
h = int(h / 2)
etime = time()
durations.append((etime - stime) * 1000)
# Fetch LAST Pyramid into host buffer ...
cl.enqueue_copy(clQueue, npPyramidBuffer, clPyramidBuffer)
fig, ax = plt.subplots(figsize = (6.4 * 4, 4.8 * 4))
npPyramidBuffer = (((npPyramidBuffer - np.min(npPyramidBuffer)) / np.max(npPyramidBuffer)) * 255.0).astype(np.uint8)
ax.imshow(npPyramidBuffer.reshape((image_input.shape[0] * 2, image_input.shape[1] * 2)))
#plt.savefig("imagepyramids2_initialpyramid.png")
plt.show()
fig, ax = plt.subplots()
ax.set_xlabel("Iteration")
ax.set_ylabel("Duration [ms]")
ax.plot(durations)
ax.grid()
ax.set_title(f"DoG pyramid, {nScales} scales (average {np.mean(durations)} ms)")
#plt.savefig("imagepyramids2_initialpyramid_timing.png")
#plt.savefig("imagepyramids2_initialpyramid_timing.svg")
plt.show()