/*!
[config]
name: Vector load local int2,3,4,8,16
clc_version_min: 11

dimensions: 1
global_size: 1 0 0

[test]
name: vector load local int2
kernel_name: vload2_local
arg_in:  0 buffer int[3] 0 28 46
arg_out: 1 buffer int2[2] 0 28 28 46

[test]
name: vector load local offset int2
kernel_name: vload2_local_offset
arg_in:  0 buffer int[5] 0 0 0 28 46
arg_out: 1 buffer int2[2] 0 28 28 46

[test]
name: vector load local int3
kernel_name: vload3_local
arg_in:  0 buffer int[4] 0 121 66 189
arg_out: 1 buffer int3[2] 0 121 66 121 66 189

[test]
name: vector load local offset int3
kernel_name: vload3_local_offset
arg_in:  0 buffer int[7] 0 0 0 0 121 66 189
arg_out: 1 buffer int3[2] 0 121 66 121 66 189

[test]
name: vector load local int4
kernel_name: vload4_local
arg_in:  0 buffer int[5] 0 120 155 52 202
arg_out: 1 buffer int4[2] 0 120 155 52 120 155 52 202

[test]
name: vector load local offset int4
kernel_name: vload4_local_offset
arg_in:  0 buffer int[9] 0 0 0 0 0 120 155 52 202
arg_out: 1 buffer int4[2] 0 120 155 52 120 155 52 202

[test]
name: vector load local int8
kernel_name: vload8_local
arg_in:  0 buffer int[9] 0 116 189 192 64 98 22 43 70
arg_out: 1 buffer int8[2] 0 116 189 192 64 98 22 43 116 189 192 64 98 22 43 70

[test]
name: vector load local offset int8
kernel_name: vload8_local_offset
arg_in:  0 buffer int[17] 0 0 0 0 0 0 0 0 0 116 189 192 64 98 22 43 70
arg_out: 1 buffer int8[2] 0 116 189 192 64 98 22 43 116 189 192 64 98 22 43 70

[test]
name: vector load local int16
kernel_name: vload16_local
arg_in:  0 buffer int[17] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171
arg_out: 1 buffer int16[2] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

[test]
name: vector load local offset int16
kernel_name: vload16_local_offset
arg_in:  0 buffer int[33] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171
arg_out: 1 buffer int16[2] 0 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 185 240 246 145 213 116 228 2 209 132 121 113 5 151 154 171

!*/

kernel void vload2_local(global int *in,
                             global int2 *out) {
    volatile local int loc[3];
    for (int i = 0; i < 3; ++i)
        loc[i] = in[i];

    out[0] = vload2(0, (local int*)loc);
    out[1] = vload2(0, (local int*)loc + 1);
}

kernel void vload2_local_offset(global int *in,
                                    global int2 *out) {
    volatile local int loc[5];
    for (int i = 0; i < 5; ++i)
        loc[i] = in[i];

    out[0] = vload2(1, (local int*)loc);
    out[1] = vload2(1, (local int*)loc + 1);
}

kernel void vload3_local(global int *in,
                             global int3 *out) {
    volatile local int loc[4];
    for (int i = 0; i < 4; ++i)
        loc[i] = in[i];

    out[0] = vload3(0, (local int*)loc);
    out[1] = vload3(0, (local int*)loc + 1);
}

kernel void vload3_local_offset(global int *in,
                                    global int3 *out) {
    volatile local int loc[8];
    for (int i = 0; i < 8; ++i)
        loc[i] = in[i];

    out[0] = vload3(1, (local int*)loc);
    out[1] = vload3(1, (local int*)loc + 1);
}

kernel void vload4_local(global int *in,
                             global int4 *out) {
    volatile local int loc[5];
    for (int i = 0; i < 5; ++i)
        loc[i] = in[i];

    out[0] = vload4(0, (local int*)loc);
    out[1] = vload4(0, (local int*)loc + 1);
}

kernel void vload4_local_offset(global int *in,
                                    global int4 *out) {
    volatile local int loc[9];
    for (int i = 0; i < 9; ++i)
        loc[i] = in[i];

    out[0] = vload4(1, (local int*)loc);
    out[1] = vload4(1, (local int*)loc + 1);
}

kernel void vload8_local(global int *in,
                             global int8 *out) {
    volatile local int loc[9];
    for (int i = 0; i < 9; ++i)
        loc[i] = in[i];

    out[0] = vload8(0, (local int*)loc);
    out[1] = vload8(0, (local int*)loc + 1);
}

kernel void vload8_local_offset(global int *in,
                                    global int8 *out) {
    volatile local int loc[17];
    for (int i = 0; i < 17; ++i)
        loc[i] = in[i];

    out[0] = vload8(1, (local int*)loc);
    out[1] = vload8(1, (local int*)loc + 1);
}

kernel void vload16_local(global int *in,
                             global int16 *out) {
    volatile local int loc[17];
    for (int i = 0; i < 17; ++i)
        loc[i] = in[i];

    out[0] = vload16(0, (local int*)loc);
    out[1] = vload16(0, (local int*)loc + 1);
}

kernel void vload16_local_offset(global int *in,
                                    global int16 *out) {
    volatile local int loc[33];
    for (int i = 0; i < 33; ++i)
        loc[i] = in[i];

    out[0] = vload16(1, (local int*)loc);
    out[1] = vload16(1, (local int*)loc + 1);
}
