/*!
[config]
name: Vector store private ushort2,3,4,8,16
clc_version_min: 11

dimensions: 1
global_size: 1 0 0

[test]
name: vector store private ushort2
kernel_name: vstore2_private
arg_out: 0 buffer ushort[4] 0 28 46 0xdead
arg_in: 0 buffer ushort[4] 0 0 0  0xdead
arg_in:  1 buffer ushort2[1] 28 46

[test]
name: vector store private offset ushort2
kernel_name: vstore2_private_offset
arg_out: 0 buffer ushort[6] 0  28 46  28 46 0xdead
arg_in: 0 buffer ushort[6] 0 0 0 0 0  0xdead
arg_in: 1 buffer ushort2[1] 28 46

[test]
name: vector store private ushort3
kernel_name: vstore3_private
arg_out: 0 buffer ushort[5] 0 121 66 189 0xdead
arg_in: 0 buffer ushort[5] 0 0 0 0  0xdead
arg_in:  1 buffer ushort3[1] 121 66 189

[test]
name: vector store private offset ushort3
kernel_name: vstore3_private_offset
arg_out: 0 buffer ushort[8] 0  121 66 189  121 66 189 0xdead
arg_in: 0 buffer ushort[8] 0 0 0 0 0 0 0  0xdead
arg_in: 1 buffer ushort3[1] 121 66 189

[test]
name: vector store private ushort4
kernel_name: vstore4_private
arg_out: 0 buffer ushort[6] 0 120 155 52 202 0xdead
arg_in: 0 buffer ushort[6] 0 0 0 0 0  0xdead
arg_in:  1 buffer ushort4[1] 120 155 52 202

[test]
name: vector store private offset ushort4
kernel_name: vstore4_private_offset
arg_out: 0 buffer ushort[10] 0  120 155 52 202  120 155 52 202 0xdead
arg_in: 0 buffer ushort[10] 0 0 0 0 0 0 0 0 0  0xdead
arg_in: 1 buffer ushort4[1] 120 155 52 202

[test]
name: vector store private ushort8
kernel_name: vstore8_private
arg_out: 0 buffer ushort[10] 0 116 189 192 64 98 22 43 70 0xdead
arg_in: 0 buffer ushort[10] 0 0 0 0 0 0 0 0 0  0xdead
arg_in:  1 buffer ushort8[1] 116 189 192 64 98 22 43 70

[test]
name: vector store private offset ushort8
kernel_name: vstore8_private_offset
arg_out: 0 buffer ushort[18] 0  116 189 192 64 98 22 43 70  116 189 192 64 98 22 43 70 0xdead
arg_in: 0 buffer ushort[18] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0xdead
arg_in: 1 buffer ushort8[1] 116 189 192 64 98 22 43 70

[test]
name: vector store private ushort16
kernel_name: vstore16_private
arg_out: 0 buffer ushort[18] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171 0xdead
arg_in: 0 buffer ushort[18] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0xdead
arg_in:  1 buffer ushort16[1] 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

[test]
name: vector store private offset ushort16
kernel_name: vstore16_private_offset
arg_out: 0 buffer ushort[34] 0  185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171  185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171 0xdead
arg_in: 0 buffer ushort[34] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0  0xdead
arg_in: 1 buffer ushort16[1] 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

!*/

kernel void vstore2_private(global ushort *out,
                             global ushort2 *in) {
    ushort2 tmp = in[0];
    volatile private ushort loc[3];
    for (int i = 0; i < 3; ++i)
        loc[i] = (ushort)0;

    vstore2(tmp, 0, (private ushort*)loc + 1);
    for (int i = 0; i < 3; ++i)
        out[i] = loc[i];
}

kernel void vstore2_private_offset(global ushort *out,
                                    global ushort2 *in) {
    ushort2 tmp = in[0];
    volatile private ushort loc[5];
    for (int i = 0; i < 5; ++i)
        loc[i] = (ushort)0;

    vstore2(tmp, 0, (private ushort*)loc + 1);
    vstore2(tmp, 1, (private ushort*)loc + 1);
    for (int i = 0; i < 5; ++i)
        out[i] = loc[i];
}

kernel void vstore3_private(global ushort *out,
                             global ushort3 *in) {
    ushort3 tmp = in[0];
    volatile private ushort loc[4];
    for (int i = 0; i < 4; ++i)
        loc[i] = (ushort)0;

    vstore3(tmp, 0, (private ushort*)loc + 1);
    for (int i = 0; i < 4; ++i)
        out[i] = loc[i];
}

kernel void vstore3_private_offset(global ushort *out,
                                    global ushort3 *in) {
    ushort3 tmp = in[0];
    volatile private ushort loc[7];
    for (int i = 0; i < 7; ++i)
        loc[i] = (ushort)0;

    vstore3(tmp, 0, (private ushort*)loc + 1);
    vstore3(tmp, 1, (private ushort*)loc + 1);
    for (int i = 0; i < 7; ++i)
        out[i] = loc[i];
}

kernel void vstore4_private(global ushort *out,
                             global ushort4 *in) {
    ushort4 tmp = in[0];
    volatile private ushort loc[5];
    for (int i = 0; i < 5; ++i)
        loc[i] = (ushort)0;

    vstore4(tmp, 0, (private ushort*)loc + 1);
    for (int i = 0; i < 5; ++i)
        out[i] = loc[i];
}

kernel void vstore4_private_offset(global ushort *out,
                                    global ushort4 *in) {
    ushort4 tmp = in[0];
    volatile private ushort loc[9];
    for (int i = 0; i < 9; ++i)
        loc[i] = (ushort)0;

    vstore4(tmp, 0, (private ushort*)loc + 1);
    vstore4(tmp, 1, (private ushort*)loc + 1);
    for (int i = 0; i < 9; ++i)
        out[i] = loc[i];
}

kernel void vstore8_private(global ushort *out,
                             global ushort8 *in) {
    ushort8 tmp = in[0];
    volatile private ushort loc[9];
    for (int i = 0; i < 9; ++i)
        loc[i] = (ushort)0;

    vstore8(tmp, 0, (private ushort*)loc + 1);
    for (int i = 0; i < 9; ++i)
        out[i] = loc[i];
}

kernel void vstore8_private_offset(global ushort *out,
                                    global ushort8 *in) {
    ushort8 tmp = in[0];
    volatile private ushort loc[17];
    for (int i = 0; i < 17; ++i)
        loc[i] = (ushort)0;

    vstore8(tmp, 0, (private ushort*)loc + 1);
    vstore8(tmp, 1, (private ushort*)loc + 1);
    for (int i = 0; i < 17; ++i)
        out[i] = loc[i];
}

kernel void vstore16_private(global ushort *out,
                             global ushort16 *in) {
    ushort16 tmp = in[0];
    volatile private ushort loc[17];
    for (int i = 0; i < 17; ++i)
        loc[i] = (ushort)0;

    vstore16(tmp, 0, (private ushort*)loc + 1);
    for (int i = 0; i < 17; ++i)
        out[i] = loc[i];
}

kernel void vstore16_private_offset(global ushort *out,
                                    global ushort16 *in) {
    ushort16 tmp = in[0];
    volatile private ushort loc[33];
    for (int i = 0; i < 33; ++i)
        loc[i] = (ushort)0;

    vstore16(tmp, 0, (private ushort*)loc + 1);
    vstore16(tmp, 1, (private ushort*)loc + 1);
    for (int i = 0; i < 33; ++i)
        out[i] = loc[i];
}
