#include <algorithm>
#include <cassert>
#include <chrono>
#include <cstdlib>
#include <ctime>
#include <iostream>
#include <map>
#include <random>
#include <unordered_map>
#include <vector>
#define BENCHMARK 1
template<class PairMap>
inline float DotPairsMapped(const PairMap& lhs, const PairMap& rhs) {
float dot = 0;
for(auto& pair : lhs) {
auto pos = rhs.find(pair.first);
if(pos != rhs.end())
dot += pair.second * pos->second;
}
return dot;
}
template<class PairContainerSorted>
inline float DotPairsSorted(const PairContainerSorted& lhs, const PairContainerSorted& rhs) {
float dot = 0;
for(auto pLhs = lhs.begin(), pRhs = rhs.begin(), endLhs = lhs.end(), endRhs = rhs.end(); pRhs != endRhs;) {
for(; pLhs != endLhs && pLhs->first < pRhs->first; ++pLhs);
if(pLhs == endLhs)
break;
for(; pRhs != endRhs && pRhs->first < pLhs->first; ++pRhs);
if(pRhs == endRhs)
break;
if(pLhs->first == pRhs->first) {
dot += pLhs->second * pRhs->second;
++pLhs;
++pRhs;
}
}
return dot;
}
template<class PairContainer>
inline float LenSqrPairs(const PairContainer& vec) {
float dot = 0;
for(auto& pair : vec)
dot += pair.second * pair.second;
return dot;
}
struct SparseVector {
explicit SparseVector(size_t d, const std::pair<size_t, float>* begin, const std::pair<size_t, float>* end) : d(d) {
size_t k(end - begin);
idx.resize(k);
val.resize(k);
for(size_t i = 0; begin != end; ++i, ++begin) {
idx[i] = begin->first;
val[i] = begin->second;
}
}
bool IsValid() const {
if(idx.size() != val.size())
return false;
for(size_t i = 1; i < idx.size(); ++i) {
if(idx[i - 1] >= idx[i])
return false;
}
return true;
}
static float CosineDistance(const SparseVector& lhs, const SparseVector& rhs);
size_t d;
std::vector<size_t> idx;
std::vector<float> val;
};
inline float Dot(const std::map<size_t, float>& lhs, const std::map<size_t, float>& rhs) { return DotPairsSorted(lhs, rhs); }
inline float LenSqr(const std::map<size_t, float>& vec) { return LenSqrPairs(vec); }
inline float Dot(const std::unordered_map<size_t, float>& lhs, const std::unordered_map<size_t, float>& rhs) { return DotPairsMapped(lhs, rhs); }
inline float LenSqr(const std::unordered_map<size_t, float>& vec) { return LenSqrPairs(vec); }
inline float Dot(const std::vector<std::pair<size_t, float>>& lhs, const std::vector<std::pair<size_t, float>>& rhs) { return DotPairsSorted(lhs, rhs); }
inline float LenSqr(const std::vector<std::pair<size_t, float>>& vec) { return LenSqrPairs(vec); }
inline float Dot(const SparseVector& lhs, const SparseVector& rhs) {
float dot = 0;
if(!lhs.idx.empty() && !rhs.idx.empty()) {
const size_t *itIdxLhs = &lhs.idx[0], *endIdxLhs = &lhs.idx[0] + lhs.idx.size();
const float *itValLhs = &lhs.val[0], *endValLhs = &lhs.val[0] + lhs.val.size();
const size_t *itIdxRhs = &rhs.idx[0], *endIdxRhs = &rhs.idx[0] + rhs.idx.size();
const float *itValRhs = &rhs.val[0], *endValRhs = &rhs.val[0] + rhs.val.size();
while(itIdxRhs != endIdxRhs) {
for(; itIdxLhs < endIdxLhs && *itIdxLhs < *itIdxRhs; ++itIdxLhs, ++itValLhs);
if(itIdxLhs == endIdxLhs)
break;
for(; itIdxRhs < endIdxRhs && *itIdxRhs < *itIdxLhs; ++itIdxRhs, ++itValRhs);
if(itIdxRhs == endIdxRhs)
break;
if(*itIdxLhs == *itIdxRhs) {
dot += (*itValLhs) * (*itValRhs);
++itIdxLhs;
++itValLhs;
++itIdxRhs;
++itValRhs;
}
}
}
return dot;
}
inline float LenSqr(const SparseVector& vec) {
float dot = 0;
for(float v : vec.val)
dot += v * v;
return dot;
}
template<class Vector>
inline float CosineDistance(const Vector& lhs, const Vector& rhs) {
return Dot(lhs, rhs) / std::sqrt(LenSqr(lhs) * LenSqr(rhs));
}
inline float SparseVector::CosineDistance(const SparseVector& lhs, const SparseVector& rhs) {
float dotLR = 0, dotLL = 0, dotRR = 0;
if(!lhs.idx.empty() && !rhs.idx.empty()) {
const size_t *itIdxLhs = &lhs.idx[0], *endIdxLhs = &lhs.idx[0] + lhs.idx.size();
const float *itValLhs = &lhs.val[0], *endValLhs = &lhs.val[0] + lhs.val.size();
const size_t *itIdxRhs = &rhs.idx[0], *endIdxRhs = &rhs.idx[0] + rhs.idx.size();
const float *itValRhs = &rhs.val[0], *endValRhs = &rhs.val[0] + rhs.val.size();
while(itIdxRhs != endIdxRhs) {
for(; itIdxLhs < endIdxLhs && *itIdxLhs < *itIdxRhs; ++itIdxLhs, ++itValLhs)
dotLL += (*itValLhs) * (*itValLhs);
if(itIdxLhs == endIdxLhs) {
for(; itIdxRhs < endIdxRhs; ++itIdxRhs, ++itValRhs)
dotRR += (*itValRhs) * (*itValRhs);
break;
}
for(; itIdxRhs < endIdxRhs && *itIdxRhs < *itIdxLhs; ++itIdxRhs, ++itValRhs)
dotRR += (*itValRhs) * (*itValRhs);
if(itIdxRhs == endIdxRhs) {
for(; itIdxLhs < endIdxLhs; ++itIdxLhs, ++itValLhs)
dotLL += (*itValLhs) * (*itValLhs);
break;
}
if(*itIdxLhs == *itIdxRhs) {
dotLR += (*itValLhs) * (*itValRhs);
dotLL += (*itValLhs) * (*itValLhs);
dotRR += (*itValRhs) * (*itValRhs);
++itIdxLhs;
++itValLhs;
++itIdxRhs;
++itValRhs;
}
}
}
float lenSqrL = LenSqr(lhs), lenSqrR = LenSqr(rhs);
return dotLR / std::sqrt(dotLL * dotRR);
}
template<class RandomIt, class UniformRandomNumberGenerator>
void shuffleK(RandomIt first, RandomIt last, typename std::iterator_traits<RandomIt>::difference_type k, UniformRandomNumberGenerator&& g) {
typedef typename std::iterator_traits<RandomIt>::difference_type diff_t;
typedef typename std::make_unsigned<diff_t>::type udiff_t;
typedef typename std::uniform_int_distribution<udiff_t> distr_t;
typedef typename distr_t::param_type param_t;
distr_t D;
diff_t n = std::min(k, last - first);
for(diff_t i = n - 1; i > 0; --i) {
using std::swap;
swap(first[i], first[D(g, param_t(0, i))]);
}
}
#if BENCHMARK
struct Benchmark {
double vectorDot, vectorLen, vectorCos;
double mapDot, mapLen, mapCos;
double unorderedDot, unorderedLen, unorderedCos;
double classDot, classLen, classCos;
void min(Benchmark other) {
vectorDot = std::min(vectorDot, other.vectorDot);
vectorLen = std::min(vectorLen, other.vectorLen);
vectorCos = std::min(vectorCos, other.vectorCos);
mapDot = std::min(mapDot, other.mapDot);
mapLen = std::min(mapLen, other.mapLen);
mapCos = std::min(mapCos, other.mapCos);
unorderedDot = std::min(unorderedDot, other.unorderedDot);
unorderedLen = std::min(unorderedLen, other.unorderedLen);
unorderedCos = std::min(unorderedCos, other.unorderedCos);
classDot = std::min(classDot, other.classDot);
classLen = std::min(classLen, other.classLen);
classCos = std::min(classCos, other.classCos);
}
void max(Benchmark other) {
vectorDot = std::max(vectorDot, other.vectorDot);
vectorLen = std::max(vectorLen, other.vectorLen);
vectorCos = std::max(vectorCos, other.vectorCos);
mapDot = std::max(mapDot, other.mapDot);
mapLen = std::max(mapLen, other.mapLen);
mapCos = std::max(mapCos, other.mapCos);
unorderedDot = std::max(unorderedDot, other.unorderedDot);
unorderedLen = std::max(unorderedLen, other.unorderedLen);
unorderedCos = std::max(unorderedCos, other.unorderedCos);
classDot = std::max(classDot, other.classDot);
classLen = std::max(classLen, other.classLen);
classCos = std::max(classCos, other.classCos);
}
void add(Benchmark other) {
vectorDot += other.vectorDot;
vectorLen += other.vectorLen;
vectorCos += other.vectorCos;
mapDot += other.mapDot;
mapLen += other.mapLen;
mapCos += other.mapCos;
unorderedDot += other.unorderedDot;
unorderedLen += other.unorderedLen;
unorderedCos += other.unorderedCos;
classDot += other.classDot;
classLen += other.classLen;
classCos += other.classCos;
}
void sub(Benchmark other) {
vectorDot -= other.vectorDot;
vectorLen -= other.vectorLen;
vectorCos -= other.vectorCos;
mapDot -= other.mapDot;
mapLen -= other.mapLen;
mapCos -= other.mapCos;
unorderedDot -= other.unorderedDot;
unorderedLen -= other.unorderedLen;
unorderedCos -= other.unorderedCos;
classDot -= other.classDot;
classLen -= other.classLen;
classCos -= other.classCos;
}
void mul(double factor) {
vectorDot *= factor;
vectorLen *= factor;
vectorCos *= factor;
mapDot *= factor;
mapLen *= factor;
mapCos *= factor;
unorderedDot *= factor;
unorderedLen *= factor;
unorderedCos *= factor;
classDot *= factor;
classLen *= factor;
classCos *= factor;
}
template<class OStream>
friend OStream& operator<< (OStream& os, const Benchmark& o) {
os << " pairs t(dot): " << o.vectorDot << ", t(len2): " << o.vectorLen << ", t(cos): " << o.vectorCos << std::endl;
os << " map'd / pairs dot: " << o.mapDot / o.vectorDot << ", len2: " << o.mapLen / o.vectorLen << ", cos: " << o.mapCos / o.vectorCos << std::endl;
os << " hashm / pairs dot: " << o.unorderedDot / o.vectorDot << ", len2: " << o.unorderedLen / o.vectorLen << ", cos: " << o.unorderedCos / o.vectorCos << std::endl;
os << " class / pairs dot: " << o.classDot / o.vectorDot << ", len2: " << o.classLen / o.vectorLen << ", cos: " << o.classCos / o.vectorCos << std::endl;
return os;
}
};
#endif
void RunBenchmark(size_t D, float Ratio, size_t RunCount, std::mt19937& g);
int main() {
const size_t D = 169647;
const float Ratio = 0.05f;
const size_t RunCount = 10;
//std::random_device rd;
//std::mt19937 g(rd());
std::mt19937 g(0xC0FFEE);
for(int last = 1, cur = 1; cur <= 5; last += cur, std::swap(last, cur))
RunBenchmark(D, Ratio * cur, RunCount, g);
return 0;
}
volatile float sideEffectOnly = 0;
void RunBenchmark(size_t D, float Ratio, size_t RunCount, std::mt19937& g) {
std::vector<size_t> indices;
{
indices.resize(D);
for(size_t i = 0; i < D; ++i)
indices[i] = i;
}
// Generates two sparse random vectors with on average ratio * D entries
auto GenerateInput = [&indices, &g](float ratio, std::vector<std::pair<size_t, float>>& a, std::vector<std::pair<size_t, float>>& b) {
if(indices.size() == 0) {
a.resize(0);
b.resize(0);
return;
}
// determine number of entries
typedef std::normal_distribution<float> norm_t;
norm_t nrand;
norm_t::param_type nparam(ratio * (float)indices.size(), 3.0f);
float sizeA = std::min((float)indices.size(), std::max(1.0f, nrand(g, nparam)));
float sizeB = std::min((float)indices.size(), std::max(1.0f, nrand(g, nparam)));
a.resize((size_t)sizeA);
b.resize((size_t)sizeB);
// determine values of entries
typedef std::uniform_real_distribution<float> uniform_t;
uniform_t urand;
shuffleK(&indices[0], &indices[0] + indices.size(), indices.size(), g);
for(size_t i = 0; i < a.size(); ++i)
a[i] = std::make_pair(indices[i], urand(g, uniform_t::param_type(-13.37f, 13.37f)));
shuffleK(&indices[0], &indices[0] + indices.size(), indices.size(), g);
for(size_t i = 0; i < b.size(); ++i)
b[i] = std::make_pair(indices[i], urand(g, uniform_t::param_type(-13.37f, 13.37f)));
auto pred = [](std::pair<size_t, float> lhs, std::pair<size_t, float> rhs) -> bool { return lhs.first < rhs.first; };
std::sort(&a[0], &a[0] + a.size(), pred);
std::sort(&b[0], &b[0] + b.size(), pred);
};
#if BENCHMARK
const size_t BenchmarkDot = 3, BenchmarkLen = 10, BenchmarkCos = 3;
std::vector<Benchmark> benchmarks;
typedef std::chrono::high_resolution_clock Clock;
auto intervalAsSeconds = [](Clock::time_point t0, Clock::time_point t1) -> double { return 1e-9 * std::chrono::duration_cast<std::chrono::nanoseconds>(t1 - t0).count(); };
double tMapDotSpecialMin = 0, tMapDotSpecialAvg = 0, tMapDotSpecialMax = 0;
double tClassCosSpecialMin = 0, tClassCosSpecialAvg = 0, tClassCosSpecialMax = 0;
#endif
std::vector<std::pair<size_t, float>> a, b;
a.reserve(indices.size());
b.reserve(indices.size());
for(size_t run = 0; run < RunCount; ++run) {
GenerateInput(Ratio, a, b);
// ============= MAP ==============
std::map<size_t, float> mA, mB;
for(auto pair : a) mA.insert(pair);
for(auto pair : b) mB.insert(pair);
assert(mA.size() == a.size() && mB.size() == b.size());
// warm-up
const float mapDot = Dot(mA, mB);
const float mapCos = CosineDistance(mA, mB);
#if BENCHMARK
std::chrono::time_point<Clock> t0, t1;
Benchmark benchmark = {};
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkDot; ++i)
sideEffectOnly += Dot(mA, mB);
t1 = Clock::now();
benchmark.mapDot = intervalAsSeconds(t0, t1);
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkLen; ++i) {
sideEffectOnly += LenSqr(mA);
sideEffectOnly += LenSqr(mB);
}
t1 = Clock::now();
benchmark.mapLen = intervalAsSeconds(t0, t1);
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkCos; ++i)
sideEffectOnly += CosineDistance(mA, mB);
t1 = Clock::now();
benchmark.mapCos = intervalAsSeconds(t0, t1);
#endif
// ============= UNORDERED MAP ==============
std::unordered_map<size_t, float> uA, uB;
for(auto pair : a) uA.insert(pair);
for(auto pair : b) uB.insert(pair);
assert(uA.size() == a.size() && uB.size() == b.size());
const float unorderedDot = Dot(uA, uB);
const float unorderedCos = CosineDistance(uA, uB);
#if BENCHMARK
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkDot; ++i)
sideEffectOnly += Dot(uA, uB);
t1 = Clock::now();
benchmark.unorderedDot = intervalAsSeconds(t0, t1);
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkLen; ++i) {
sideEffectOnly += LenSqr(uA);
sideEffectOnly += LenSqr(uB);
}
t1 = Clock::now();
benchmark.unorderedLen = intervalAsSeconds(t0, t1);
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkCos; ++i)
sideEffectOnly += CosineDistance(uA, uB);
t1 = Clock::now();
benchmark.unorderedCos = intervalAsSeconds(t0, t1);
#endif
// ============= VECTOR OF PAIRS ==============
const float vectorDot = Dot(a, b);
const float vectorCos = CosineDistance(a, b);
#if BENCHMARK
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkDot; ++i)
sideEffectOnly += Dot(a, b);
t1 = Clock::now();
benchmark.vectorDot = intervalAsSeconds(t0, t1);
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkLen; ++i) {
sideEffectOnly += LenSqr(a);
sideEffectOnly += LenSqr(b);
}
t1 = Clock::now();
benchmark.vectorLen = intervalAsSeconds(t0, t1);
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkCos; ++i)
sideEffectOnly += CosineDistance(a, b);
t1 = Clock::now();
benchmark.vectorCos = intervalAsSeconds(t0, t1);
#endif
// ============= PAIRS OF VECTORS ==============
SparseVector vA(indices.size(), &a[0], &a[0] + a.size());
SparseVector vB(indices.size(), &b[0], &b[0] + b.size());
const float classDot = Dot(vA, vB);
const float classCos = CosineDistance(vA, vB);
#if BENCHMARK
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkDot; ++i)
sideEffectOnly += Dot(vA, vB);
t1 = Clock::now();
benchmark.classDot = intervalAsSeconds(t0, t1);
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkLen; ++i) {
sideEffectOnly += LenSqr(vA);
sideEffectOnly += LenSqr(vB);
}
t1 = Clock::now();
benchmark.classLen = intervalAsSeconds(t0, t1);
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkCos; ++i)
sideEffectOnly += CosineDistance(vA, vB);
t1 = Clock::now();
benchmark.classCos = intervalAsSeconds(t0, t1);
#endif
// ============= NAIVE MAP ==============
const float mapDotSpecial = DotPairsMapped(mA, mB);
// ======== DON'T TOUCH IT TWICE ========
const float classCosSpecial = SparseVector::CosineDistance(vA, vB);
#if BENCHMARK
// naive dot product with map
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkDot; ++i)
sideEffectOnly += DotPairsMapped(mA, mB);
t1 = Clock::now();
double tMapDotSpecial = intervalAsSeconds(t0, t1);
// cosine distance computation touching each element just once
t0 = Clock::now();
for(size_t i = 0; i < BenchmarkCos; ++i)
sideEffectOnly += SparseVector::CosineDistance(vA, vB);
t1 = Clock::now();
double tClassCosSpecial = intervalAsSeconds(t0, t1);
// accumulate specials
if(run == 0) {
tMapDotSpecialMin = tMapDotSpecial;
tMapDotSpecialAvg = tMapDotSpecial;
tMapDotSpecialMax = tMapDotSpecial;
tClassCosSpecialMin = tClassCosSpecial;
tClassCosSpecialAvg = tClassCosSpecial;
tClassCosSpecialMax = tClassCosSpecial;
} else {
tMapDotSpecialMin = std::min(tMapDotSpecialMin, tMapDotSpecial);
tMapDotSpecialAvg += tMapDotSpecial;
tMapDotSpecialMax = std::max(tMapDotSpecialMax, tMapDotSpecial);
tClassCosSpecialMin = std::min(tClassCosSpecialMin, tClassCosSpecial);
tClassCosSpecialAvg += tClassCosSpecial;
tClassCosSpecialMax = std::max(tClassCosSpecialMax, tClassCosSpecial);
}
#endif
auto equal = [](float lhs, float rhs) -> bool { return (rhs - lhs) * (rhs - lhs) < 1e-4f * std::max(std::abs(lhs), std::abs(rhs)); };
if(!equal(vectorDot, mapDot) || !equal(vectorCos, mapCos) ||
!equal(vectorDot, unorderedDot) || !equal(vectorCos, unorderedCos) ||
!equal(vectorDot, classDot) || !equal(vectorCos, classCos) ||
!equal(vectorDot, mapDotSpecial) ||
!equal(vectorCos, classCosSpecial)) {
std::cout << "Validation failure on run " << run << std::endl;
std::cout << " pairs: dot(a, b): " << Dot(a, b) << ", cos(a, b): " << CosineDistance(a, b) << std::endl;
std::cout << " map'd: dot(a, b): " << Dot(mA, mB) << ", cos(a, b): " << CosineDistance(mA, mB) << std::endl;
std::cout << " hashm: dot(a, b): " << Dot(uA, uB) << ", cos(a, b): " << CosineDistance(uA, uB) << std::endl;
std::cout << " class: dot(a, b): " << Dot(vA, vB) << ", cos(a, b): " << CosineDistance(vA, vB) << std::endl;
std::cout << " specl map'd: dot(a, b): " << mapDotSpecial << std::endl;
std::cout << " specl class: cos(a, b): " << classCosSpecial << std::endl;
#if BENCHMARK
} else {
benchmarks.push_back(benchmark);
#endif
}
}
#if BENCHMARK
if(RunCount > 0 && !benchmarks.empty()) {
Benchmark min = benchmarks[0], avg = min, max = min;
for(size_t i = 1; i < benchmarks.size(); ++i) {
min.min(benchmarks[i]);
avg.add(benchmarks[i]);
max.max(benchmarks[i]);
}
if(RunCount <= 2) {
avg.mul(1.0 / RunCount);
tClassCosSpecialAvg /= RunCount;
} else {
avg.sub(min);
avg.sub(max);
avg.mul(1.0 / (RunCount - 2));
tMapDotSpecialAvg = (tMapDotSpecialAvg - tMapDotSpecialMin - tMapDotSpecialMax) / (RunCount - 2);
tClassCosSpecialAvg = (tClassCosSpecialAvg - tClassCosSpecialMin - tClassCosSpecialMax) / (RunCount - 2);
}
std::cout << " *** BENCHMARK *** " << std::endl;
std::cout << "number of runs: " << RunCount << ", dimensions: " << D << " and non-zero ratio: " << Ratio << std::endl;
std::cout << "number of dot products per run: " << BenchmarkDot << std::endl;
std::cout << "number of lengthsqares per run: " << BenchmarkLen << std::endl;
std::cout << "number of cosine dists per run: " << BenchmarkCos << std::endl;
std::cout << std::endl;
#if 0
// min(x) / min(base) is not a good measure...
std::cout << "min" << std::endl;
std::cout << min;
std::cout << " specl / pairs dot (naive map): " << (tMapDotSpecialMin / min.vectorDot) << std::endl;
std::cout << " specl / pairs cos (optimised): " << (tClassCosSpecialMin / min.vectorCos) << std::endl;
std::cout << std::endl;
#endif
// avg(x) / avg(base)
std::cout << "avg" << std::endl;
std::cout << avg;
std::cout << " specl / pairs dot (naive map): " << (tMapDotSpecialAvg / avg.vectorDot) << std::endl;
std::cout << " specl / pairs cos (optimised): " << (tClassCosSpecialAvg / avg.vectorCos) << std::endl;
std::cout << std::endl;
#if 0
// max(x) / max(base) is not a good measure...
std::cout << "max" << std::endl;
std::cout << max;
std::cout << " specl / pairs dot (naive map): " << (tMapDotSpecialMax / max.vectorDot) << std::endl;
std::cout << " specl / pairs cos (optimised): " << (tClassCosSpecialMax / max.vectorCos) << std::endl;
std::cout << std::endl;
#endif
}
#endif
}