#include #include #include typedef std::tuple< uint32_t, uint32_t > split_t; split_t split( uint64_t a ) { static const uint32_t mask = -1; auto retval = std::make_tuple( mask&a, ( a >> 32 ) ); // std::cout << "(" << std::get<0>(retval) << "," << std::get<1>(retval) << ")\n"; return retval; } typedef std::tuple< uint64_t, uint64_t, uint64_t, uint64_t > cross_t; template cross_t cross( split_t lhs, split_t rhs, Lambda&& op ) { return std::make_tuple( op(std::get<0>(lhs), std::get<0>(rhs)), op(std::get<1>(lhs), std::get<0>(rhs)), op(std::get<0>(lhs), std::get<1>(rhs)), op(std::get<1>(lhs), std::get<1>(rhs)) ); } // a and c must have high bit unset: uint64_t a_times_2_k_mod_c( uint64_t a, unsigned k, uint64_t c ) { a %= c; for (unsigned i = 0; i < k; ++i) { a <<= 1; a %= c; } return a; } uint64_t a_times_b_mod_c( uint64_t a, uint64_t b, uint64_t c ) { // ensure a and b are < c: a %= c; b %= c; auto Z = cross( split(a), split(b), [](uint32_t lhs, uint32_t rhs)->uint64_t { return (uint64_t)lhs * (uint64_t)rhs; } ); uint64_t to_the_0; uint64_t to_the_32_a; uint64_t to_the_32_b; uint64_t to_the_64; std::tie( to_the_0, to_the_32_a, to_the_32_b, to_the_64 ) = Z; // std::cout << to_the_0 << "+ 2^32 *(" << to_the_32_a << "+" << to_the_32_b << ") + 2^64 * " << to_the_64 << "\n"; return (to_the_0 + a_times_2_k_mod_c(to_the_32_a%c+to_the_32_b%c, 32, c) + a_times_2_k_mod_c(to_the_64, 64, c) ) %c; } int main() { uint64_t retval = a_times_b_mod_c( 19010000000000000000, 1011000000000000, 1231231231231211 ); std::cout << retval << "\n"; }