#include <stdint.h>
#include <tuple>
#include <iostream>

typedef std::tuple< uint32_t, uint32_t > split_t;
split_t split( uint64_t a )
{
    static const uint32_t mask = -1;
    auto retval = std::make_tuple( mask&a, ( a >> 32 ) );
    // std::cout << "(" << std::get<0>(retval) << "," << std::get<1>(retval) << ")\n";
    return retval;
}

typedef std::tuple< uint64_t, uint64_t, uint64_t, uint64_t > cross_t;
template<typename Lambda>
cross_t cross( split_t lhs, split_t rhs, Lambda&& op )
{
    return std::make_tuple( 
        op(std::get<0>(lhs), std::get<0>(rhs)),
        op(std::get<1>(lhs), std::get<0>(rhs)),
        op(std::get<0>(lhs), std::get<1>(rhs)),
        op(std::get<1>(lhs), std::get<1>(rhs))
    );
}

// a and c must have high bit unset:
uint64_t a_times_2_k_mod_c( uint64_t a, unsigned k, uint64_t c )
{
    a %= c;
    for (unsigned i = 0; i < k; ++i)
    {
        a <<= 1;
        a %= c;
    }
    return a;
}

uint64_t a_times_b_mod_c( uint64_t a, uint64_t b, uint64_t c )
{
    // ensure a and b are < c:
    a %= c;
    b %= c;
    
    auto Z = cross( split(a), split(b), [](uint32_t lhs, uint32_t rhs)->uint64_t {
        return (uint64_t)lhs * (uint64_t)rhs;
    } );
    
    uint64_t to_the_0;
    uint64_t to_the_32_a;
    uint64_t to_the_32_b;
    uint64_t to_the_64;
    std::tie( to_the_0, to_the_32_a, to_the_32_b, to_the_64 ) = Z;
    
    // std::cout << to_the_0 << "+ 2^32 *(" << to_the_32_a << "+" << to_the_32_b << ") + 2^64 * " << to_the_64 << "\n";
    
    return
        (to_the_0
        + a_times_2_k_mod_c(to_the_32_a%c+to_the_32_b%c, 32, c)
        + a_times_2_k_mod_c(to_the_64, 64, c) )
    %c;
}

int main()
{
    uint64_t retval = a_times_b_mod_c( 19010000000000000000, 1011000000000000, 1231231231231211 );
    std::cout << retval << "\n";
}