#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <chrono>
#include <xmmintrin.h>
#include <emmintrin.h>
#include <immintrin.h>
struct audio_repack;
typedef int (*audio_repack_func_t)(struct audio_repack *,
const uint8_t *, uint32_t);
struct audio_repack {
uint8_t *packet_buffer;
uint32_t packet_size;
uint32_t base_src_size;
uint32_t base_dst_size;
audio_repack_func_t repack_func;
};
#ifdef AVX
int repack_8ch_swap23_avx2(struct audio_repack *repack,
const uint8_t *bsrc, uint32_t frame_count)
{
const uint32_t size = frame_count * repack->base_src_size;
const uint32_t half_frame_count = frame_count / 2;
const __m256i *src = (__m256i *)bsrc;
const __m256i *esrc = src + half_frame_count;
__m256i *dst = (__m256i *)repack->packet_buffer;
for (; src != esrc; ++src, ++dst) {
__m256i target = _mm256_load_si256(src);
__m256i buf = _mm256_shufflelo_epi16(target, 0xB4);
_mm256_store_si256(dst, buf);
}
if (frame_count % 2 == 1) {
memcpy(dst, src, 16);
dst[2] = src[3];
dst[3] = src[2];
}
return 0;
}
#endif
int repack_8ch_swap23_sse2(struct audio_repack *repack,
const uint8_t *bsrc, uint32_t frame_count)
{
const uint32_t size = frame_count * repack->base_src_size;
const __m128i *src = (__m128i *)bsrc;
const __m128i *esrc = src + frame_count;
__m128i *dst = (__m128i *)repack->packet_buffer;
for (; src != esrc; ++src, ++dst) {
__m128i target = _mm_load_si128(src);
__m128i buf = _mm_shufflelo_epi16(target, 0xB4);
_mm_store_si128(dst, buf);
}
return 0;
}
int repack_8ch_swap23_3(struct audio_repack *repack,
const uint8_t *bsrc, uint32_t frame_count)
{
const uint32_t size = frame_count * repack->base_src_size;
const uint16_t *esrc = (uint16_t *)(bsrc + size);
uint16_t *dst = (uint16_t *)repack->packet_buffer;
for (const uint16_t *src = (uint16_t *)bsrc; src != esrc; src += 8, dst += 8) {
memcpy(dst, src, 16);
dst[2] = src[3];
dst[3] = src[2];
}
return 0;
}
int repack_8ch_swap23_2(struct audio_repack *repack,
const uint8_t *bsrc, uint32_t frame_count)
{
const uint32_t size = frame_count * repack->base_src_size;
const uint16_t *esrc = (uint16_t *)(bsrc + size);
uint16_t *dst = (uint16_t *)repack->packet_buffer;
for (const uint16_t *src = (uint16_t *)bsrc; src != esrc; src += 8, dst += 8) {
memcpy(dst, src, 4);
dst[2] = src[3];
dst[3] = src[2];
memcpy(dst + 4, src + 4, 8);
}
return 0;
}
int repack_8ch_swap23(struct audio_repack *repack,
const uint8_t *bsrc, uint32_t frame_count)
{
const uint32_t size = frame_count * repack->base_src_size;
memcpy(repack->packet_buffer, bsrc, size);
const uint16_t *esrc = (uint16_t *)(bsrc + size);
uint16_t *dst = (uint16_t *)repack->packet_buffer;
for (const uint16_t *src = (uint16_t *)bsrc; src != esrc; src += 8, dst += 8) {
dst[2] = src[3];
dst[3] = src[2];
}
return 0;
}
int print(const uint8_t *buf, int idx, const char* s) {
const uint16_t *data = (uint16_t *)buf;
printf("%6s %4d: %02X %02X %02X %02X %02X %02X %02X %02X\n",
s, idx,
data[8 * idx + 0], data[8 * idx + 1], data[8 * idx + 2], data[8 * idx + 3],
data[8 * idx + 4], data[8 * idx + 5], data[8 * idx + 6], data[8 * idx + 7]);
}
int test(struct audio_repack *repack, const uint8_t *data, audio_repack_func_t func, int frame_count, const char* s) {
auto start = std::chrono::system_clock::now();
(*func)(repack, data, frame_count);
auto end = std::chrono::system_clock::now();
print(repack->packet_buffer, 0, s);
print(repack->packet_buffer, 2, s);
print(repack->packet_buffer, 798, s);
print(repack->packet_buffer, 800, s);
printf("%6s: %d ns\n", s, std::chrono::duration_cast<std::chrono::nanoseconds>(end - start).count());
}
int main() {
uint16_t base[8] = { 0, 1, 3, 2, 4, 5, 6, 7 };
const int frame = 801;
#ifdef AVX
uint16_t __attribute__((vector_size(32))) input[8 * frame] = { 0 };
#else
uint16_t input[8 * frame] = { 0 };
#endif
for (int i = 0; i < frame; ++i) {
memcpy(input + 8 * i, base, 16);
}
print((uint8_t *)input, 0, "input");
print((uint8_t *)input, 2, "input");
print((uint8_t *)input, 798, "input");
print((uint8_t *)input, 800, "input");
struct audio_repack repack = { 0 };
repack.base_src_size = 8 * (16 / 8);
repack.base_dst_size = 8 * (16 / 8);
repack.packet_size = repack.base_dst_size;
#ifdef AVX
uint16_t __attribute__((vector_size(32))) output[8 * frame] = { 0 };
#else
uint16_t output[8 * frame] = { 0 };
#endif
repack.packet_buffer = (uint8_t *)output;
test(&repack, (uint8_t *)input, repack_8ch_swap23, frame, "test1");
test(&repack, (uint8_t *)input, repack_8ch_swap23_2, frame, "test2");
test(&repack, (uint8_t *)input, repack_8ch_swap23_3, frame, "test3");
test(&repack, (uint8_t *)input, repack_8ch_swap23_sse2, frame, "sse2");
#ifdef AVX
test(&repack, (uint8_t *)input, repack_8ch_swap23_avx2, frame, "avx2");
#endif
return 0;
}
I2luY2x1ZGUgPHN0ZGlvLmg+CiNpbmNsdWRlIDxzdGRpbnQuaD4KI2luY2x1ZGUgPHN0cmluZy5oPgoKI2luY2x1ZGUgPGNocm9ubz4KCiNpbmNsdWRlIDx4bW1pbnRyaW4uaD4KI2luY2x1ZGUgPGVtbWludHJpbi5oPgojaW5jbHVkZSA8aW1taW50cmluLmg+CgpzdHJ1Y3QgYXVkaW9fcmVwYWNrOwoKdHlwZWRlZiBpbnQgKCphdWRpb19yZXBhY2tfZnVuY190KShzdHJ1Y3QgYXVkaW9fcmVwYWNrICosCgkJY29uc3QgdWludDhfdCAqLCB1aW50MzJfdCk7CgpzdHJ1Y3QgYXVkaW9fcmVwYWNrIHsKCXVpbnQ4X3QgICAgICAgICAgICAgKnBhY2tldF9idWZmZXI7Cgl1aW50MzJfdCAgICAgICAgICAgIHBhY2tldF9zaXplOwoKCXVpbnQzMl90ICAgICAgICAgICAgYmFzZV9zcmNfc2l6ZTsKCXVpbnQzMl90ICAgICAgICAgICAgYmFzZV9kc3Rfc2l6ZTsKCglhdWRpb19yZXBhY2tfZnVuY190IHJlcGFja19mdW5jOwp9OwoKCiNpZmRlZiBBVlgKaW50IHJlcGFja184Y2hfc3dhcDIzX2F2eDIoc3RydWN0IGF1ZGlvX3JlcGFjayAqcmVwYWNrLAoJCWNvbnN0IHVpbnQ4X3QgKmJzcmMsIHVpbnQzMl90IGZyYW1lX2NvdW50KQp7Cgljb25zdCB1aW50MzJfdCBzaXplID0gZnJhbWVfY291bnQgKiByZXBhY2stPmJhc2Vfc3JjX3NpemU7CgoJY29uc3QgdWludDMyX3QgaGFsZl9mcmFtZV9jb3VudCA9IGZyYW1lX2NvdW50IC8gMjsKCWNvbnN0IF9fbTI1NmkgKnNyYyA9IChfX20yNTZpICopYnNyYzsKCWNvbnN0IF9fbTI1NmkgKmVzcmMgPSBzcmMgKyBoYWxmX2ZyYW1lX2NvdW50OwoJX19tMjU2aSAqZHN0ID0gKF9fbTI1NmkgKilyZXBhY2stPnBhY2tldF9idWZmZXI7Cglmb3IgKDsgc3JjICE9IGVzcmM7ICsrc3JjLCArK2RzdCkgewoJCV9fbTI1NmkgdGFyZ2V0ID0gX21tMjU2X2xvYWRfc2kyNTYoc3JjKTsKCQlfX20yNTZpIGJ1ZiA9IF9tbTI1Nl9zaHVmZmxlbG9fZXBpMTYodGFyZ2V0LCAweEI0KTsKCQlfbW0yNTZfc3RvcmVfc2kyNTYoZHN0LCBidWYpOwoJfQoJaWYgKGZyYW1lX2NvdW50ICUgMiA9PSAxKSB7CgkJbWVtY3B5KGRzdCwgc3JjLCAxNik7CgkJZHN0WzJdID0gc3JjWzNdOwoJCWRzdFszXSA9IHNyY1syXTsKCX0KCglyZXR1cm4gMDsKfQojZW5kaWYKCmludCByZXBhY2tfOGNoX3N3YXAyM19zc2UyKHN0cnVjdCBhdWRpb19yZXBhY2sgKnJlcGFjaywKCQljb25zdCB1aW50OF90ICpic3JjLCB1aW50MzJfdCBmcmFtZV9jb3VudCkKewoJY29uc3QgdWludDMyX3Qgc2l6ZSA9IGZyYW1lX2NvdW50ICogcmVwYWNrLT5iYXNlX3NyY19zaXplOwoKCWNvbnN0IF9fbTEyOGkgKnNyYyA9IChfX20xMjhpICopYnNyYzsKCWNvbnN0IF9fbTEyOGkgKmVzcmMgPSBzcmMgKyBmcmFtZV9jb3VudDsKCV9fbTEyOGkgKmRzdCA9IChfX20xMjhpICopcmVwYWNrLT5wYWNrZXRfYnVmZmVyOwoJZm9yICg7IHNyYyAhPSBlc3JjOyArK3NyYywgKytkc3QpIHsKCQlfX20xMjhpIHRhcmdldCA9IF9tbV9sb2FkX3NpMTI4KHNyYyk7CgkJX19tMTI4aSBidWYgPSBfbW1fc2h1ZmZsZWxvX2VwaTE2KHRhcmdldCwgMHhCNCk7CgkJX21tX3N0b3JlX3NpMTI4KGRzdCwgYnVmKTsKCX0KCglyZXR1cm4gMDsKfQoKaW50IHJlcGFja184Y2hfc3dhcDIzXzMoc3RydWN0IGF1ZGlvX3JlcGFjayAqcmVwYWNrLAoJCWNvbnN0IHVpbnQ4X3QgKmJzcmMsIHVpbnQzMl90IGZyYW1lX2NvdW50KQp7Cgljb25zdCB1aW50MzJfdCBzaXplID0gZnJhbWVfY291bnQgKiByZXBhY2stPmJhc2Vfc3JjX3NpemU7CgoJY29uc3QgdWludDE2X3QgKmVzcmMgPSAodWludDE2X3QgKikoYnNyYyArIHNpemUpOwoJdWludDE2X3QgKmRzdCA9ICh1aW50MTZfdCAqKXJlcGFjay0+cGFja2V0X2J1ZmZlcjsKCWZvciAoY29uc3QgdWludDE2X3QgKnNyYyA9ICh1aW50MTZfdCAqKWJzcmM7IHNyYyAhPSBlc3JjOyBzcmMgKz0gOCwgZHN0ICs9IDgpIHsKCQltZW1jcHkoZHN0LCBzcmMsIDE2KTsKCQlkc3RbMl0gPSBzcmNbM107CgkJZHN0WzNdID0gc3JjWzJdOwoJfQoKCXJldHVybiAwOwp9CgppbnQgcmVwYWNrXzhjaF9zd2FwMjNfMihzdHJ1Y3QgYXVkaW9fcmVwYWNrICpyZXBhY2ssCgkJY29uc3QgdWludDhfdCAqYnNyYywgdWludDMyX3QgZnJhbWVfY291bnQpCnsKCWNvbnN0IHVpbnQzMl90IHNpemUgPSBmcmFtZV9jb3VudCAqIHJlcGFjay0+YmFzZV9zcmNfc2l6ZTsKCgljb25zdCB1aW50MTZfdCAqZXNyYyA9ICh1aW50MTZfdCAqKShic3JjICsgc2l6ZSk7Cgl1aW50MTZfdCAqZHN0ID0gKHVpbnQxNl90ICopcmVwYWNrLT5wYWNrZXRfYnVmZmVyOwoJZm9yIChjb25zdCB1aW50MTZfdCAqc3JjID0gKHVpbnQxNl90ICopYnNyYzsgc3JjICE9IGVzcmM7IHNyYyArPSA4LCBkc3QgKz0gOCkgewoJCW1lbWNweShkc3QsIHNyYywgNCk7CgkJZHN0WzJdID0gc3JjWzNdOwoJCWRzdFszXSA9IHNyY1syXTsKCQltZW1jcHkoZHN0ICsgNCwgc3JjICsgNCwgOCk7Cgl9CgoJcmV0dXJuIDA7Cn0KCmludCByZXBhY2tfOGNoX3N3YXAyMyhzdHJ1Y3QgYXVkaW9fcmVwYWNrICpyZXBhY2ssCgkJY29uc3QgdWludDhfdCAqYnNyYywgdWludDMyX3QgZnJhbWVfY291bnQpCnsKCWNvbnN0IHVpbnQzMl90IHNpemUgPSBmcmFtZV9jb3VudCAqIHJlcGFjay0+YmFzZV9zcmNfc2l6ZTsKCgltZW1jcHkocmVwYWNrLT5wYWNrZXRfYnVmZmVyLCBic3JjLCBzaXplKTsKCgljb25zdCB1aW50MTZfdCAqZXNyYyA9ICh1aW50MTZfdCAqKShic3JjICsgc2l6ZSk7Cgl1aW50MTZfdCAqZHN0ID0gKHVpbnQxNl90ICopcmVwYWNrLT5wYWNrZXRfYnVmZmVyOwoJZm9yIChjb25zdCB1aW50MTZfdCAqc3JjID0gKHVpbnQxNl90ICopYnNyYzsgc3JjICE9IGVzcmM7IHNyYyArPSA4LCBkc3QgKz0gOCkgewoJCWRzdFsyXSA9IHNyY1szXTsKCQlkc3RbM10gPSBzcmNbMl07Cgl9CgoJcmV0dXJuIDA7Cn0KCmludCBwcmludChjb25zdCB1aW50OF90ICpidWYsIGludCBpZHgsIGNvbnN0IGNoYXIqIHMpIHsKCWNvbnN0IHVpbnQxNl90ICpkYXRhID0gKHVpbnQxNl90ICopYnVmOwoJcHJpbnRmKCIlNnMgJTRkOiAlMDJYICUwMlggJTAyWCAlMDJYICUwMlggJTAyWCAlMDJYICUwMlhcbiIsCgkJcywgaWR4LAoJCWRhdGFbOCAqIGlkeCArIDBdLCBkYXRhWzggKiBpZHggKyAxXSwgZGF0YVs4ICogaWR4ICsgMl0sIGRhdGFbOCAqIGlkeCArIDNdLAoJCWRhdGFbOCAqIGlkeCArIDRdLCBkYXRhWzggKiBpZHggKyA1XSwgZGF0YVs4ICogaWR4ICsgNl0sIGRhdGFbOCAqIGlkeCArIDddKTsKfQoKaW50IHRlc3Qoc3RydWN0IGF1ZGlvX3JlcGFjayAqcmVwYWNrLCBjb25zdCB1aW50OF90ICpkYXRhLCBhdWRpb19yZXBhY2tfZnVuY190IGZ1bmMsIGludCBmcmFtZV9jb3VudCwgY29uc3QgY2hhciogcykgewoJYXV0byBzdGFydCA9IHN0ZDo6Y2hyb25vOjpzeXN0ZW1fY2xvY2s6Om5vdygpOwoJKCpmdW5jKShyZXBhY2ssIGRhdGEsIGZyYW1lX2NvdW50KTsKCWF1dG8gZW5kID0gc3RkOjpjaHJvbm86OnN5c3RlbV9jbG9jazo6bm93KCk7CgoJcHJpbnQocmVwYWNrLT5wYWNrZXRfYnVmZmVyLCAwLCBzKTsKCXByaW50KHJlcGFjay0+cGFja2V0X2J1ZmZlciwgMiwgcyk7CglwcmludChyZXBhY2stPnBhY2tldF9idWZmZXIsIDc5OCwgcyk7CglwcmludChyZXBhY2stPnBhY2tldF9idWZmZXIsIDgwMCwgcyk7CgoJcHJpbnRmKCIlNnM6ICVkIG5zXG4iLCBzLCBzdGQ6OmNocm9ubzo6ZHVyYXRpb25fY2FzdDxzdGQ6OmNocm9ubzo6bmFub3NlY29uZHM+KGVuZCAtIHN0YXJ0KS5jb3VudCgpKTsKfQoKaW50IG1haW4oKSB7Cgl1aW50MTZfdCBiYXNlWzhdID0geyAwLCAxLCAzLCAyLCA0LCA1LCA2LCA3IH07CgkKCWNvbnN0IGludCBmcmFtZSA9IDgwMTsKI2lmZGVmIEFWWAoJdWludDE2X3QgX19hdHRyaWJ1dGVfXygodmVjdG9yX3NpemUoMzIpKSkgaW5wdXRbOCAqIGZyYW1lXSA9IHsgMCB9OwojZWxzZQoJdWludDE2X3QgaW5wdXRbOCAqIGZyYW1lXSA9IHsgMCB9OwojZW5kaWYKCWZvciAoaW50IGkgPSAwOyBpIDwgZnJhbWU7ICsraSkgewoJCW1lbWNweShpbnB1dCArIDggKiBpLCBiYXNlLCAxNik7Cgl9CglwcmludCgodWludDhfdCAqKWlucHV0LCAwLCAiaW5wdXQiKTsKCXByaW50KCh1aW50OF90ICopaW5wdXQsIDIsICJpbnB1dCIpOwoJcHJpbnQoKHVpbnQ4X3QgKilpbnB1dCwgNzk4LCAiaW5wdXQiKTsKCXByaW50KCh1aW50OF90ICopaW5wdXQsIDgwMCwgImlucHV0Iik7CgkKCXN0cnVjdCBhdWRpb19yZXBhY2sgcmVwYWNrID0geyAwIH07CglyZXBhY2suYmFzZV9zcmNfc2l6ZSA9IDggKiAoMTYgLyA4KTsKCXJlcGFjay5iYXNlX2RzdF9zaXplID0gOCAqICgxNiAvIDgpOwoJcmVwYWNrLnBhY2tldF9zaXplID0gcmVwYWNrLmJhc2VfZHN0X3NpemU7CgojaWZkZWYgQVZYCgl1aW50MTZfdCBfX2F0dHJpYnV0ZV9fKCh2ZWN0b3Jfc2l6ZSgzMikpKSBvdXRwdXRbOCAqIGZyYW1lXSA9IHsgMCB9OwojZWxzZQoJdWludDE2X3Qgb3V0cHV0WzggKiBmcmFtZV0gPSB7IDAgfTsKI2VuZGlmCglyZXBhY2sucGFja2V0X2J1ZmZlciA9ICh1aW50OF90ICopb3V0cHV0OwoKCXRlc3QoJnJlcGFjaywgKHVpbnQ4X3QgKilpbnB1dCwgcmVwYWNrXzhjaF9zd2FwMjMsIGZyYW1lLCAidGVzdDEiKTsKCXRlc3QoJnJlcGFjaywgKHVpbnQ4X3QgKilpbnB1dCwgcmVwYWNrXzhjaF9zd2FwMjNfMiwgZnJhbWUsICJ0ZXN0MiIpOwoJdGVzdCgmcmVwYWNrLCAodWludDhfdCAqKWlucHV0LCByZXBhY2tfOGNoX3N3YXAyM18zLCBmcmFtZSwgInRlc3QzIik7Cgl0ZXN0KCZyZXBhY2ssICh1aW50OF90ICopaW5wdXQsIHJlcGFja184Y2hfc3dhcDIzX3NzZTIsIGZyYW1lLCAic3NlMiIpOwojaWZkZWYgQVZYCgl0ZXN0KCZyZXBhY2ssICh1aW50OF90ICopaW5wdXQsIHJlcGFja184Y2hfc3dhcDIzX2F2eDIsIGZyYW1lLCAiYXZ4MiIpOwojZW5kaWYKCglyZXR1cm4gMDsKfQ==