import pyopencl as cl
import numpy as np
from PIL import Image

def main():

    image = Image.open('my_image.png')

    (width, height) = image.size

    if image.mode != 'RGB':
        image = image.convert('RGB')

    intType = np.dtype(np.int)

    imarr = np.array(image, dtype = intType)

    platform = cl.get_platforms()[0]
    device = platform.get_devices()[0]

    ctx = cl.Context(devices = [device])
    queue = cl.CommandQueue(ctx)

    mf = cl.mem_flags

    imarr_d = cl.Buffer(ctx, mf.READ_ONLY, imarr.nbytes)
    cl.enqueue_copy(queue, imarr_d, imarr, is_blocking = True)

    result = np.zeros([width, height], dtype = intType)
    result_d = cl.Buffer(ctx, mf.READ_WRITE, result.nbytes)
    cl.enqueue_copy(queue, result_d, result, is_blocking = True)

    code = '''

    __kernel void has_rgb_value(
        __global int * imarr_d,
        __global int * result_d)
    {
        int x = get_global_id(0);
        int y = get_global_id(1);

        int i = x * %(height)d * 3 + y * 3;

        if (imarr_d[i + 0] == %(red)d &&
            imarr_d[i + 1] == %(green)d &&
            imarr_d[i + 2] == %(blue)d)
        {
            result_d[x * %(height)d + y] = 1;
        }
    }

    __kernel void reduce(
        __global int * result_d)
    {
        int i = get_global_id(0);

        for (int j = 1; j < %(height)d; j ++)
        {
            result_d[i * %(height)d + 0] += result_d[i * %(height)d + j];
        }
    }

    ''' % {
        'red'    : 255,
        'green'  : 0,
        'blue'   : 255,
        'height' : height
    }

    program = cl.Program(ctx, code).build()

    program.has_rgb_value(
        queue, (width, height), None,
        imarr_d, result_d
    ).wait()

    program.reduce(
        queue, (width, ), None,
        result_d
    ).wait()

    cl.enqueue_copy(queue, result, result_d, is_blocking = True)

    for i in range(1, width):
        result[0, 0] += result[i, 0];

    print result[0, 0]

if __name__ == '__main__':
    main()