fork download
  1.  
  2. #define _USE_MATH_DEFINES
  3. #include <math.h>
  4. #include <stdint.h>
  5. #include <stdio.h>
  6. #include <string>
  7. #include <complex>
  8. #include <algorithm>
  9. #include <memory>
  10. #include <iostream>
  11. using std::cout;
  12. using std::endl;
  13. using std::complex;
  14.  
  15. #if USE_CHRONO
  16. #include <chrono>
  17. #else
  18. #include <time.h>
  19. #endif
  20.  
  21. #include <sstream>
  22.  
  23. namespace patch
  24. {
  25. template < typename T > std::string to_string( const T& n )
  26. {
  27. std::ostringstream stm ;
  28. stm << n ;
  29. return stm.str() ;
  30. }
  31. }
  32.  
  33. namespace Mini_Pi{
  34. ////////////////////////////////////////////////////////////////////////////////
  35. ////////////////////////////////////////////////////////////////////////////////
  36. // Debug Printing
  37. uint32_t rand_word(){
  38. return (uint32_t)(
  39. (rand() & 0xf) << 0 |
  40. (rand() & 0xf) << 8 |
  41. (rand() & 0xf) << 16 |
  42. (rand() & 0xf) << 24
  43. ) % 1000000000;
  44. }
  45. complex<double> rand_complex(){
  46. double r = (double)(rand() % 1000);
  47. double i = (double)(rand() % 1000);
  48. return complex<double>(r, i);
  49. }
  50. void print_fft(complex<double> *T, int k){
  51. int length = 1 << k;
  52. for (int c = 0; c < length; c++){
  53. std::cout << T[c].real() << " + " << T[c].imag() << "i" << " , ";
  54. }
  55. std::cout << std::endl;
  56. }
  57. void print_word(uint32_t word){
  58. char str[] = "012345678";
  59. for (int c = 8; c >= 0; c--){
  60. str[c] = word % 10 + '0';
  61. word /= 10;
  62. }
  63. std::cout << str;
  64. }
  65. void print_words(uint32_t *T, size_t L){
  66. while (L-- > 0){
  67. print_word(T[L]);
  68. }
  69. std::cout << std::endl;
  70. }
  71. ////////////////////////////////////////////////////////////////////////////////
  72. ////////////////////////////////////////////////////////////////////////////////
  73. // Helpers
  74. double wall_clock(){
  75. // Get the clock in seconds.
  76. #if USE_CHRONO
  77. auto ratio_object = std::chrono::high_resolution_clock::period();
  78. double ratio = (double)ratio_object.num / ratio_object.den;
  79. return std::chrono::high_resolution_clock::now().time_since_epoch().count() * ratio;
  80. #else
  81. return (double)clock() / CLOCKS_PER_SEC;
  82. #endif
  83. }
  84. void dump_to_file(const char *path, const std::string &str){
  85. // Dump a string to a file.
  86.  
  87. FILE *file = fopen(path, "wb");
  88. if (file == NULL)
  89. throw "Cannot Create File";
  90.  
  91. fwrite(str.c_str(), 1, str.size(), file);
  92. fclose(file);
  93. }
  94. ////////////////////////////////////////////////////////////////////////////////
  95. ////////////////////////////////////////////////////////////////////////////////
  96. // Fast Fourier Transform
  97. #ifndef M_PI
  98. #define M_PI 3.14159265358979323846
  99. #endif
  100. void fft_forward(complex<double> *T, int k){
  101. // Fast Fourier Transform
  102. // This function performs a forward FFT of length 2^k.
  103.  
  104. // This is a Decimation-in-Frequency (DIF) FFT.
  105. // The frequency domain output is in bit-reversed order.
  106.  
  107. //Parameters:
  108. // - T - Pointer to array.
  109. // - k - 2^k is the size of the transform
  110.  
  111. if (k == 0)
  112. return;
  113.  
  114. size_t length = (size_t)1 << k;
  115. size_t half_length = length / 2;
  116.  
  117. double omega = 2 * M_PI / length;
  118.  
  119. // Perform FFT reduction into two halves.
  120. for (size_t c = 0; c < half_length; c++){
  121. // Generate Twiddle Factor
  122. double angle = omega * c;
  123. auto twiddle_factor = complex<double>(cos(angle), sin(angle));
  124.  
  125. // Grab elements
  126. complex<double> a = T[c];
  127. complex<double> b = T[c + half_length];
  128.  
  129. // Perform butterfly
  130. T[c ] = a + b;
  131. T[c + half_length] = (a - b) * twiddle_factor;
  132. }
  133.  
  134. // Recursively perform FFT on lower elements.
  135. fft_forward(T, k - 1);
  136.  
  137. // Recursively perform FFT on upper elements.
  138. fft_forward(T + half_length, k - 1);
  139. }
  140. void fft_inverse(complex<double> *T, int k){
  141. // Fast Fourier Transform
  142. // This function performs an inverse FFT of length 2^k.
  143.  
  144. // This is a Decimation-in-Time (DIT) FFT.
  145. // The frequency domain input must be in bit-reversed order.
  146.  
  147. //Parameters:
  148. // - T - Pointer to array.
  149. // - k - 2^k is the size of the transform
  150.  
  151. if (k == 0)
  152. return;
  153.  
  154. size_t length = (size_t)1 << k;
  155. size_t half_length = length / 2;
  156.  
  157. double omega = -2 * M_PI / length;
  158.  
  159. // Recursively perform FFT on lower elements.
  160. fft_inverse(T, k - 1);
  161.  
  162. // Recursively perform FFT on upper elements.
  163. fft_inverse(T + half_length, k - 1);
  164.  
  165. // Perform FFT reduction into two halves.
  166. for (size_t c = 0; c < half_length; c++){
  167. // Generate Twiddle Factor
  168. double angle = omega * c;
  169. auto twiddle_factor = complex<double>(cos(angle), sin(angle));
  170.  
  171. // Grab elements
  172. complex<double> a = T[c];
  173. complex<double> b = T[c + half_length] * twiddle_factor;
  174.  
  175. // Perform butterfly
  176. T[c ] = a + b;
  177. T[c + half_length] = a - b;
  178. }
  179. }
  180. void fft_pointwise(complex<double> *T, const complex<double> *A, int k){
  181. // Performs pointwise multiplications of two FFT arrays.
  182.  
  183. //Parameters:
  184. // - T - Pointer to array.
  185. // - k - 2^k is the size of the transform
  186.  
  187. size_t length = (size_t)1 << k;
  188. for (size_t c = 0; c < length; c++){
  189. T[c] = T[c] * A[c];
  190. }
  191. }
  192. void int_to_fft(complex<double> *T, int k, const uint32_t *A, size_t AL){
  193. // Convert word array into FFT array. Put 3 decimal digits per complex point.
  194.  
  195. //Parameters:
  196. // - T - FFT array
  197. // - k - 2^k is the size of the transform
  198. // - A - word array
  199. // - AL - length of word array
  200.  
  201. size_t fft_length = (size_t)1 << k;
  202. complex<double> *Tstop = T + fft_length;
  203.  
  204. // Since there are 9 digits per word and we want to put 3 digits per
  205. // point, the length of the transform must be at least 3 times the word
  206. // length of the input.
  207. if (fft_length < 3*AL)
  208. throw "FFT length is too small.";
  209.  
  210. // Convert
  211. for (size_t c = 0; c < AL; c++){
  212. uint32_t word = A[c];
  213.  
  214. *T++ = word % 1000;
  215. word /= 1000;
  216. *T++ = word % 1000;
  217. word /= 1000;
  218. *T++ = word;
  219. }
  220.  
  221. // Pad the rest with zeros.
  222. while (T < Tstop)
  223. *T++ = complex<double>(0, 0);
  224. }
  225. void fft_to_int(const complex<double> *T, int k, uint32_t *A, size_t AL){
  226. // Convert FFT array back to word array. Perform rounding and carryout.
  227.  
  228. //Parameters:
  229. // - T - FFT array
  230. // - A - word array
  231. // - AL - length of word array
  232.  
  233. // Compute Scaling Factor
  234. size_t fft_length = (size_t)1 << k;
  235. double scale = 1. / fft_length;
  236.  
  237. // Since there are 9 digits per word and we want to put 3 digits per
  238. // point, the length of the transform must be at least 3 times the word
  239. // length of the input.
  240. if (fft_length < 3*AL)
  241. throw "FFT length is too small.";
  242.  
  243. // Round and carry out.
  244. uint64_t carry = 0;
  245. for (size_t c = 0; c < AL; c++){
  246. double f_point;
  247. uint64_t i_point;
  248. uint32_t word;
  249.  
  250. f_point = (*T++).real() * scale; // Load and scale
  251. i_point = (uint64_t)(f_point + 0.5); // Round
  252. carry += i_point; // Add to carry
  253. word = carry % 1000; // Get 3 digits.
  254. carry /= 1000;
  255.  
  256. f_point = (*T++).real() * scale; // Load and scale
  257. i_point = (uint64_t)(f_point + 0.5); // Round
  258. carry += i_point; // Add to carry
  259. word += (carry % 1000) * 1000; // Get 3 digits.
  260. carry /= 1000;
  261.  
  262. f_point = (*T++).real() * scale; // Load and scale
  263. i_point = (uint64_t)(f_point + 0.5); // Round
  264. carry += i_point; // Add to carry
  265. word += (carry % 1000) * 1000000; // Get 3 digits.
  266. carry /= 1000;
  267.  
  268. A[c] = word;
  269. }
  270. }
  271. ////////////////////////////////////////////////////////////////////////////////
  272. ////////////////////////////////////////////////////////////////////////////////
  273. ////////////////////////////////////////////////////////////////////////////////
  274. ////////////////////////////////////////////////////////////////////////////////
  275. // BigFloat object
  276. /* This is the big floating-point object. It represents an arbitrary precision
  277.  * floating-point number.
  278.  *
  279.  * Its numerical value is equal to:
  280.  *
  281.  * word = 10^9
  282.  * word^exp * (T[0] + T[1]*word + T[2]*word^2 + ... + T[L - 1]*word^(L - 1))
  283.  *
  284.  * T is an array of 32-bit integers. Each integer stores 9 decimal digits
  285.  * and must always have a value in the range [0, 999999999].
  286.  *
  287.  * T[L - 1] must never be zero.
  288.  *
  289.  * The number is positive when (sign = true) and negative when (sign = false).
  290.  * Zero is represented as (sign = true) and (L = 0).
  291.  *
  292.  */
  293. #define YCL_BIGFLOAT_EXTRA_PRECISION 2
  294. class BigFloat{
  295. public:
  296. BigFloat(BigFloat &&x);
  297. BigFloat& operator=(BigFloat &&x);
  298.  
  299. BigFloat();
  300. BigFloat(uint32_t x, bool sign = true);
  301.  
  302. std::string to_string (size_t digits = 0) const;
  303. std::string to_string_sci(size_t digits = 0) const;
  304. size_t get_precision() const;
  305. int64_t get_exponent() const;
  306. uint32_t word_at(int64_t mag) const;
  307.  
  308. void negate();
  309. BigFloat mul(uint32_t x) const;
  310. BigFloat add(const BigFloat &x, size_t p = 0) const;
  311. BigFloat sub(const BigFloat &x, size_t p = 0) const;
  312. BigFloat mul(const BigFloat &x, size_t p = 0) const;
  313. BigFloat rcp(size_t p) const;
  314. BigFloat div(const BigFloat &x, size_t p) const;
  315.  
  316. private:
  317. bool sign; // true = positive or zero, false = negative
  318. int64_t exp; // Exponent
  319. size_t L; // Length
  320. std::unique_ptr<uint32_t[]> T;
  321.  
  322. // Internal helpers
  323. int64_t to_string_trimmed(size_t digits, std::string &str) const;
  324. int ucmp(const BigFloat &x) const;
  325. BigFloat uadd(const BigFloat &x, size_t p) const;
  326. BigFloat usub(const BigFloat &x, size_t p) const;
  327.  
  328. friend BigFloat invsqrt(uint32_t x, size_t p);
  329. };
  330. BigFloat invsqrt(uint32_t x, size_t p);
  331. ////////////////////////////////////////////////////////////////////////////////
  332. // Move operators
  333. BigFloat::BigFloat(BigFloat &&x)
  334. : sign(x.sign)
  335. , exp(x.exp)
  336. , L(x.L)
  337. , T(std::move(x.T))
  338. {
  339. x.sign = true;
  340. x.exp = 0;
  341. x.L = 0;
  342. }
  343. BigFloat& BigFloat::operator=(BigFloat &&x){
  344. sign = x.sign;
  345. exp = x.exp;
  346. L = x.L;
  347. T = std::move(x.T);
  348.  
  349. x.sign = true;
  350. x.exp = 0;
  351. x.L = 0;
  352.  
  353. return *this;
  354. }
  355. ////////////////////////////////////////////////////////////////////////////////
  356. // Constructors
  357. BigFloat::BigFloat()
  358. : sign(true)
  359. , exp(0)
  360. , L(0)
  361. {}
  362. BigFloat::BigFloat(uint32_t x, bool sign_)
  363. : sign(true)
  364. , exp(0)
  365. , L(1)
  366. {
  367. // Construct a BigFloat with a value of x and the specified sign.
  368.  
  369. if (x == 0){
  370. L = 0;
  371. return;
  372. }
  373. sign = sign_;
  374.  
  375. T = std::unique_ptr<uint32_t[]>(new uint32_t[1]);
  376. T[0] = x;
  377. }
  378. ////////////////////////////////////////////////////////////////////////////////
  379. // String Conversion
  380. int64_t BigFloat::to_string_trimmed(size_t digits, std::string &str) const{
  381. // Converts this object to a string with "digits" significant figures.
  382.  
  383. // After calling this function, the following expression is equal to the
  384. // numeric value of this object. (after truncation of precision)
  385. // str + " * 10^" + (return value)
  386.  
  387. if (L == 0){
  388. str = "0";
  389. return 0;
  390. }
  391.  
  392. // Collect operands
  393. int64_t exponent = exp;
  394. size_t length = L;
  395. uint32_t *ptr = T.get();
  396.  
  397. if (digits == 0){
  398. // Use all digits.
  399. digits = length * 9;
  400. }else{
  401. // Truncate precision
  402. size_t words = (digits + 17) / 9;
  403. if (words < length){
  404. size_t chop = length - words;
  405. exponent += chop;
  406. length = words;
  407. ptr += chop;
  408. }
  409. }
  410. exponent *= 9;
  411.  
  412. // Build string
  413. char buffer[] = "012345678";
  414. str.clear();
  415. size_t c = length;
  416. while (c-- > 0){
  417. uint32_t word = ptr[c];
  418. for (int i = 8; i >= 0; i--){
  419. buffer[i] = word % 10 + '0';
  420. word /= 10;
  421. }
  422. str += buffer;
  423. }
  424.  
  425. // Count leading zeros
  426. size_t leading_zeros = 0;
  427. while (str[leading_zeros] == '0')
  428. leading_zeros++;
  429. digits += leading_zeros;
  430.  
  431. // Truncate
  432. if (digits < str.size()){
  433. exponent += str.size() - digits;
  434. str.resize(digits);
  435. }
  436.  
  437. return exponent;
  438. }
  439. std::string BigFloat::to_string(size_t digits) const{
  440. // Convert this number to a string. Auto-select format type.
  441. if (L == 0)
  442. return "0.";
  443.  
  444. int64_t mag = exp + L;
  445.  
  446. // Use scientific notation if out of range.
  447. if (mag > 1 || mag < 0)
  448. return to_string_sci();
  449.  
  450. // Convert
  451. std::string str;
  452. int64_t exponent = to_string_trimmed(digits, str);
  453.  
  454. // Less than 1
  455. if (mag == 0){
  456. if (sign)
  457. return std::string("0.") + str;
  458. else
  459. return std::string("-0.") + str;
  460. }
  461.  
  462. // Get a string with the digits before the decimal place.
  463. std::string before_decimal = patch::to_string((long long)T[L - 1]);
  464.  
  465. // Nothing after the decimal place.
  466. if (exponent >= 0){
  467. if (sign){
  468. return before_decimal + ".";
  469. }else{
  470. return std::string("-") + before_decimal + ".";
  471. }
  472. }
  473.  
  474. // Get digits after the decimal place.
  475. std::string after_decimal = str.substr((size_t)(str.size() + exponent), (size_t)-exponent);
  476.  
  477. if (sign){
  478. return before_decimal + "." + after_decimal;
  479. }else{
  480. return std::string("-") + before_decimal + "." + after_decimal;
  481. }
  482. }
  483. std::string BigFloat::to_string_sci(size_t digits) const{
  484. // Convert to string in scientific notation.
  485. if (L == 0)
  486. return "0.";
  487.  
  488. // Convert
  489. std::string str;
  490. int64_t exponent = to_string_trimmed(digits, str);
  491.  
  492. // Strip leading zeros.
  493. {
  494. size_t leading_zeros = 0;
  495. while (str[leading_zeros] == '0')
  496. leading_zeros++;
  497. str = &str[leading_zeros];
  498. }
  499.  
  500. // Insert decimal place
  501. exponent += str.size() - 1;
  502. str = str.substr(0, 1) + "." + &str[1];
  503.  
  504. // Add exponent
  505. if (exponent != 0){
  506. str += " * 10^";
  507. str += patch::to_string(exponent);
  508. }
  509.  
  510. // Add sign
  511. if (!sign)
  512. str = std::string("-") + str;
  513.  
  514. return str;
  515. }
  516. ////////////////////////////////////////////////////////////////////////////////
  517. // Getters
  518. size_t BigFloat::get_precision() const{
  519. // Returns the precision of the number in words.
  520. // Note that each word is 9 decimal digits.
  521. return L;
  522. }
  523. int64_t BigFloat::get_exponent() const{
  524. // Returns the exponent of the number in words.
  525. // Note that each word is 9 decimal digits.
  526. return exp;
  527. }
  528. uint32_t BigFloat::word_at(int64_t mag) const{
  529. // Returns the word at the mag'th digit place.
  530. // This is useful for additions where you need to access a specific "digit place"
  531. // of the operand without having to worry if it's out-of-bounds.
  532.  
  533. // This function is mathematically equal to:
  534. // (return value) = floor(this * (10^9)^-mag) % 10^9
  535.  
  536. if (mag < exp)
  537. return 0;
  538. if (mag >= exp + (int64_t)L)
  539. return 0;
  540. return T[(size_t)(mag - exp)];
  541. }
  542. int BigFloat::ucmp(const BigFloat &x) const{
  543. // Compare function that ignores the sign.
  544. // This is needed to determine which direction subtractions will go.
  545.  
  546. // Magnitude
  547. int64_t magA = exp + L;
  548. int64_t magB = x.exp + x.L;
  549. if (magA > magB)
  550. return 1;
  551. if (magA < magB)
  552. return -1;
  553.  
  554. // Compare
  555. int64_t mag = magA;
  556. while (mag >= exp || mag >= x.exp){
  557. uint32_t wordA = word_at(mag);
  558. uint32_t wordB = x.word_at(mag);
  559. if (wordA < wordB)
  560. return -1;
  561. if (wordA > wordB)
  562. return 1;
  563. mag--;
  564. }
  565. return 0;
  566. }
  567. ////////////////////////////////////////////////////////////////////////////////
  568. // Arithmetic
  569. void BigFloat::negate(){
  570. // Negate this number.
  571. if (L == 0)
  572. return;
  573. sign = !sign;
  574. }
  575. BigFloat BigFloat::mul(uint32_t x) const{
  576. // Multiply by a 32-bit unsigned integer.
  577.  
  578. if (L == 0 || x == 0)
  579. return BigFloat();
  580.  
  581. // Compute basic fields.
  582. BigFloat z;
  583. z.sign = sign;
  584. z.exp = exp;
  585. z.L = L;
  586.  
  587. // Allocate mantissa
  588. z.T = std::unique_ptr<uint32_t[]>(new uint32_t[z.L + 1]);
  589.  
  590. uint64_t carry = 0;
  591. for (size_t c = 0; c < L; c++){
  592. carry += (uint64_t)T[c] * x; // Multiply and add to carry
  593. z.T[c] = (uint32_t)(carry % 1000000000); // Store bottom 9 digits
  594. carry /= 1000000000; // Shift down the carry
  595. }
  596.  
  597. // Carry out
  598. if (carry != 0)
  599. z.T[z.L++] = (uint32_t)carry;
  600.  
  601. return z;
  602. }
  603. BigFloat BigFloat::uadd(const BigFloat &x, size_t p) const{
  604. // Perform addition ignoring the sign of the two operands.
  605.  
  606. // Magnitude
  607. int64_t magA = exp + L;
  608. int64_t magB = x.exp + x.L;
  609. int64_t top = std::max(magA, magB);
  610. int64_t bot = std::min(exp, x.exp);
  611.  
  612. // Target length
  613. int64_t TL = top - bot;
  614.  
  615. if (p == 0){
  616. // Default value. No trunction.
  617. p = (size_t)TL;
  618. }else{
  619. // Increase precision
  620. p += YCL_BIGFLOAT_EXTRA_PRECISION;
  621. }
  622.  
  623. // Perform precision truncation.
  624. if (TL > (int64_t)p){
  625. bot = top - p;
  626. TL = p;
  627. }
  628.  
  629. // Compute basic fields.
  630. BigFloat z;
  631. z.sign = sign;
  632. z.exp = bot;
  633. z.L = (uint32_t)TL;
  634.  
  635. // Allocate mantissa
  636. z.T = std::unique_ptr<uint32_t[]>(new uint32_t[z.L + 1]);
  637.  
  638. // Add
  639. uint32_t carry = 0;
  640. for (size_t c = 0; bot < top; bot++, c++){
  641. uint32_t word = word_at(bot) + x.word_at(bot) + carry;
  642. carry = 0;
  643. if (word >= 1000000000){
  644. word -= 1000000000;
  645. carry = 1;
  646. }
  647. z.T[c] = word;
  648. }
  649.  
  650. // Carry out
  651. if (carry != 0){
  652. z.T[z.L++] = 1;
  653. }
  654.  
  655. return z;
  656. }
  657. BigFloat BigFloat::usub(const BigFloat &x, size_t p) const{
  658. // Perform subtraction ignoring the sign of the two operands.
  659.  
  660. // "this" must be greater than or equal to x. Otherwise, the behavior
  661. // is undefined.
  662.  
  663. // Magnitude
  664. int64_t magA = exp + L;
  665. int64_t magB = x.exp + x.L;
  666. int64_t top = std::max(magA, magB);
  667. int64_t bot = std::min(exp, x.exp);
  668.  
  669. // Truncate precision
  670. int64_t TL = top - bot;
  671.  
  672. if (p == 0){
  673. // Default value. No trunction.
  674. p = (size_t)TL;
  675. }else{
  676. // Increase precision
  677. p += YCL_BIGFLOAT_EXTRA_PRECISION;
  678. }
  679.  
  680. if (TL > (int64_t)p){
  681. bot = top - p;
  682. TL = p;
  683. }
  684.  
  685. // Compute basic fields.
  686. BigFloat z;
  687. z.sign = sign;
  688. z.exp = bot;
  689. z.L = (uint32_t)TL;
  690.  
  691. // Allocate mantissa
  692. z.T = std::unique_ptr<uint32_t[]>(new uint32_t[z.L]);
  693.  
  694. // Subtract
  695. int32_t carry = 0;
  696. for (size_t c = 0; bot < top; bot++, c++){
  697. int32_t word = (int32_t)word_at(bot) - (int32_t)x.word_at(bot) - carry;
  698. carry = 0;
  699. if (word < 0){
  700. word += 1000000000;
  701. carry = 1;
  702. }
  703. z.T[c] = word;
  704. }
  705.  
  706. // Strip leading zeros
  707. while (z.L > 0 && z.T[z.L - 1] == 0)
  708. z.L--;
  709. if (z.L == 0){
  710. z.exp = 0;
  711. z.sign = true;
  712. z.T.reset();
  713. }
  714.  
  715. return z;
  716. }
  717. BigFloat BigFloat::add(const BigFloat &x, size_t p) const{
  718. // Addition
  719.  
  720. // The target precision is p.
  721. // If (p = 0), then no truncation is done. The entire operation is done
  722. // at maximum precision with no data loss.
  723.  
  724. // Same sign. Add.
  725. if (sign == x.sign)
  726. return uadd(x, p);
  727.  
  728. // this > x
  729. if (ucmp(x) > 0)
  730. return usub(x, p);
  731.  
  732. // this < x
  733. return x.usub(*this, p);
  734. }
  735. BigFloat BigFloat::sub(const BigFloat &x, size_t p) const{
  736. // Subtraction
  737.  
  738. // The target precision is p.
  739. // If (p = 0), then no truncation is done. The entire operation is done
  740. // at maximum precision with no data loss.
  741.  
  742. // Different sign. Add.
  743. if (sign != x.sign)
  744. return uadd(x, p);
  745.  
  746. // this > x
  747. if (ucmp(x) > 0)
  748. return usub(x, p);
  749.  
  750. // this < x
  751. BigFloat z = x.usub(*this, p);
  752. z.negate();
  753. return z;
  754. }
  755. BigFloat BigFloat::mul(const BigFloat &x, size_t p) const{
  756. // Multiplication
  757.  
  758. // The target precision is p.
  759. // If (p = 0), then no truncation is done. The entire operation is done
  760. // at maximum precision with no data loss.
  761.  
  762. // Either operand is zero.
  763. if (L == 0 || x.L == 0)
  764. return BigFloat();
  765.  
  766. if (p == 0){
  767. // Default value. No trunction.
  768. p = L + x.L;
  769. }else{
  770. // Increase precision
  771. p += YCL_BIGFLOAT_EXTRA_PRECISION;
  772. }
  773.  
  774. // Collect operands.
  775. int64_t Aexp = exp;
  776. int64_t Bexp = x.exp;
  777. size_t AL = L;
  778. size_t BL = x.L;
  779. uint32_t *AT = T.get();
  780. uint32_t *BT = x.T.get();
  781.  
  782. // Perform precision truncation.
  783. if (AL > p){
  784. size_t chop = AL - p;
  785. AL = p;
  786. Aexp += chop;
  787. AT += chop;
  788. }
  789. if (BL > p){
  790. size_t chop = BL - p;
  791. BL = p;
  792. Bexp += chop;
  793. BT += chop;
  794. }
  795.  
  796. // Compute basic fields.
  797. BigFloat z;
  798. z.sign = sign == z.sign; // Sign is positive if signs are equal.
  799. z.exp = Aexp + Bexp; // Add the exponents.
  800. z.L = AL + BL; // Add the lenghts for now. May need to correct later.
  801.  
  802. // Allocate mantissa
  803. z.T = std::unique_ptr<uint32_t[]>(new uint32_t[z.L]);
  804.  
  805. // Perform multiplication.
  806.  
  807. // Determine minimum FFT size.
  808. int k = 0;
  809. size_t length = 1;
  810. while (length < 3*z.L){
  811. length <<= 1;
  812. k++;
  813. }
  814.  
  815. // Allocate FFT arrays
  816. auto Ta = std::unique_ptr<complex<double>[]>(new complex<double>[length]);
  817. auto Tb = std::unique_ptr<complex<double>[]>(new complex<double>[length]);
  818.  
  819. // Perform a convolution using FFT.
  820. // Yeah, this is slow for small sizes, but it's asympotically optimal.
  821.  
  822. // 3 digits per point is small enough to not encounter round-off error
  823. // until a transform size of 2^30.
  824. // A transform length of 2^29 allows for the maximum product size to be
  825. // 2^29 * 3 = 1,610,612,736 decimal digits.
  826. if (k > 29)
  827. throw "FFT size limit exceeded.";
  828.  
  829. int_to_fft(Ta.get(), k, AT, AL); // Convert 1st operand
  830. int_to_fft(Tb.get(), k, BT, BL); // Convert 2nd operand
  831. fft_forward(Ta.get(), k); // Transform 1st operand
  832. fft_forward(Tb.get(), k); // Transform 2nd operand
  833. fft_pointwise(Ta.get(), Tb.get(), k); // Pointwise multiply
  834. fft_inverse(Ta.get(), k); // Perform inverse transform.
  835. fft_to_int(Ta.get(), k, z.T.get(), z.L); // Convert back to word array.
  836.  
  837. // Check top word and correct length.
  838. if (z.T[z.L - 1] == 0)
  839. z.L--;
  840.  
  841. return z;
  842. }
  843. BigFloat BigFloat::rcp(size_t p) const{
  844. // Compute reciprocal using Newton's Method.
  845.  
  846. // r1 = r0 - (r0 * x - 1) * r0
  847.  
  848. if (L == 0)
  849. throw "Divide by Zero";
  850.  
  851. // Collect operand
  852. int64_t Aexp = exp;
  853. size_t AL = L;
  854. uint32_t *AT = T.get();
  855.  
  856. // End of recursion. Generate starting point.
  857. if (p == 0){
  858. // Truncate precision to 3.
  859. p = 3;
  860. if (AL > p){
  861. size_t chop = AL - p;
  862. AL = p;
  863. Aexp += chop;
  864. AT += chop;
  865. }
  866.  
  867. // Convert number to floating-point.
  868. double val = AT[0];
  869. if (AL >= 2)
  870. val += AT[1] * 1000000000.;
  871. if (AL >= 3)
  872. val += AT[2] * 1000000000000000000.;
  873.  
  874. // Compute reciprocal.
  875. val = 1. / val;
  876. Aexp = -Aexp;
  877.  
  878. // Scale
  879. while (val < 1000000000.){
  880. val *= 1000000000.;
  881. Aexp--;
  882. }
  883.  
  884. // Rebuild a BigFloat.
  885. uint64_t val64 = (uint64_t)val;
  886.  
  887. BigFloat out;
  888. out.sign = sign;
  889.  
  890. out.T = std::unique_ptr<uint32_t[]>(new uint32_t[2]);
  891. out.T[0] = (uint32_t)(val64 % 1000000000);
  892. out.T[1] = (uint32_t)(val64 / 1000000000);
  893. out.L = 2;
  894. out.exp = Aexp;
  895.  
  896. return out;
  897. }
  898.  
  899. // Half the precision
  900. size_t s = p / 2 + 1;
  901. if (p == 1) s = 0;
  902. if (p == 2) s = 1;
  903.  
  904. // Recurse at half the precision
  905. BigFloat T = rcp(s);
  906.  
  907. // r1 = r0 - (r0 * x - 1) * r0
  908. return T.sub(this->mul(T, p).sub(BigFloat(1), p).mul(T, p), p);
  909. }
  910. BigFloat BigFloat::div(const BigFloat &x, size_t p) const{
  911. // Division
  912. return this->mul(x.rcp(p), p);
  913. }
  914. BigFloat invsqrt(uint32_t x, size_t p){
  915. // Compute inverse square root using Newton's Method.
  916.  
  917. // ( r0^2 * x - 1 )
  918. // r1 = r0 - (----------------) * r0
  919. // ( 2 )
  920.  
  921. if (x == 0)
  922. throw "Divide by Zero";
  923.  
  924. // End of recursion. Generate starting point.
  925. if (p == 0){
  926. double val = 1. / sqrt((double)x);
  927.  
  928. int64_t exponent = 0;
  929.  
  930. // Scale
  931. while (val < 1000000000.){
  932. val *= 1000000000.;
  933. exponent--;
  934. }
  935.  
  936. // Rebuild a BigFloat.
  937. uint64_t val64 = (uint64_t)val;
  938.  
  939. BigFloat out;
  940. out.sign = true;
  941.  
  942. out.T = std::unique_ptr<uint32_t[]>(new uint32_t[2]);
  943. out.T[0] = (uint32_t)(val64 % 1000000000);
  944. out.T[1] = (uint32_t)(val64 / 1000000000);
  945. out.L = 2;
  946. out.exp = exponent;
  947.  
  948. return out;
  949. }
  950.  
  951. // Half the precision
  952. size_t s = p / 2 + 1;
  953. if (p == 1) s = 0;
  954. if (p == 2) s = 1;
  955.  
  956. // Recurse at half the precision
  957. BigFloat T = invsqrt(x, s);
  958.  
  959. BigFloat temp = T.mul(T, p); // r0^2
  960. temp = temp.mul(x); // r0^2 * x
  961. temp = temp.sub(BigFloat(1), p); // r0^2 * x - 1
  962. temp = temp.mul(500000000); // (r0^2 * x - 1) / 2
  963. temp.exp--;
  964. temp = temp.mul(T, p); // (r0^2 * x - 1) / 2 * r0
  965. return T.sub(temp, p); // r0 - (r0^2 * x - 1) / 2 * r0
  966. }
  967. ////////////////////////////////////////////////////////////////////////////////
  968. ////////////////////////////////////////////////////////////////////////////////
  969. ////////////////////////////////////////////////////////////////////////////////
  970. ////////////////////////////////////////////////////////////////////////////////
  971. // e
  972. double logf_approx(double x){
  973. // Returns a very good approximation to log(x!).
  974. // log(x!) ~ (x + 1/2) * (log(x) - 1) + (log(2*pi) + 1) / 2
  975. // This approximation gets better as x is larger.
  976. if (x <= 1) return 0;
  977. return (x + .5) * (log(x) - 1) + (1.4189385332046727417803297364056176398613974736378);
  978. }
  979. size_t e_terms(size_t p){
  980. // Returns the # of terms needed to reach a precision of p.
  981.  
  982. // The taylor series converges to log(x!) / log(10) decimal digits after
  983. // x terms. So to find the number of terms needed to reach a precision of p
  984. // we need to solve this question for x:
  985. // p = log(x!) / log(1000000000)
  986.  
  987. // This function solves this equation via binary search.
  988.  
  989. double sizeL = (double)p * 20.723265836946411156161923092159277868409913397659 + 1;
  990.  
  991. size_t a = 0;
  992. size_t b = 1;
  993.  
  994. // Double up
  995. while (logf_approx((double)b) < sizeL)
  996. b <<= 1;
  997.  
  998. // Binary search
  999. while (b - a > 1){
  1000. size_t m = (a + b) >> 1;
  1001.  
  1002. if (logf_approx((double)m) < sizeL)
  1003. a = m;
  1004. else
  1005. b = m;
  1006. }
  1007.  
  1008. return b + 2;
  1009. }
  1010. void e_BSR(BigFloat &P, BigFloat &Q, uint32_t a, uint32_t b){
  1011. // Binary Splitting recursion for exp(1).
  1012.  
  1013. if (b - a == 1){
  1014. P = BigFloat(1);
  1015. Q = BigFloat(b);
  1016. return;
  1017. }
  1018.  
  1019. uint32_t m = (a + b) / 2;
  1020.  
  1021. BigFloat P0, Q0, P1, Q1;
  1022. e_BSR(P0, Q0, a, m);
  1023. e_BSR(P1, Q1, m, b);
  1024.  
  1025. P = P0.mul(Q1).add(P1);
  1026. Q = Q0.mul(Q1);
  1027. }
  1028. void e(size_t digits){
  1029. // The leading 2 doesn't count.
  1030. digits++;
  1031.  
  1032. size_t p = (digits + 8) / 9;
  1033. size_t terms = e_terms(p);
  1034.  
  1035. // Limit Exceeded
  1036. if ((uint32_t)terms != terms)
  1037. throw "Limit Exceeded";
  1038.  
  1039. cout << "Computing e..." << endl;
  1040. cout << "Algorithm: Taylor Series of exp(1)" << endl << endl;
  1041.  
  1042. double time0 = wall_clock();
  1043.  
  1044. cout << "Summing Series... " << terms << " terms" << endl;
  1045. BigFloat P, Q;
  1046. e_BSR(P, Q, 0, (uint32_t)terms);
  1047. double time1 = wall_clock();
  1048. cout << "Time: " << time1 - time0 << endl;
  1049.  
  1050. cout << "Division... " << endl;
  1051. P = P.div(Q, p).add(BigFloat(1), p);
  1052. double time2 = wall_clock();
  1053. cout << "Time: " << time2 - time1 << endl;
  1054.  
  1055. cout << "Total Time = " << time2 - time0 << endl << endl;
  1056.  
  1057. //dump_to_file("e.txt", P.to_string(digits));
  1058. cout << P.to_string(digits).c_str();
  1059. }
  1060. ////////////////////////////////////////////////////////////////////////////////
  1061. ////////////////////////////////////////////////////////////////////////////////
  1062. // Pi
  1063. void Pi_BSR(BigFloat &P, BigFloat &Q, BigFloat &R, uint32_t a, uint32_t b, size_t p){
  1064. // Binary Splitting recursion for the Chudnovsky Formula.
  1065.  
  1066. if (b - a == 1){
  1067. // P = (13591409 + 545140134 b)(2b-1)(6b-5)(6b-1) (-1)^b
  1068. P = BigFloat(b).mul(545140134);
  1069. P = P.add(BigFloat(13591409));
  1070. P = P.mul(2*b - 1);
  1071. P = P.mul(6*b - 5);
  1072. P = P.mul(6*b - 1);
  1073. if (b % 2 == 1)
  1074. P.negate();
  1075.  
  1076. // Q = 10939058860032000 * b^3
  1077. Q = BigFloat(b);
  1078. Q = Q.mul(Q).mul(Q).mul(26726400).mul(409297880);
  1079.  
  1080. // R = (2b-1)(6b-5)(6b-1)
  1081. R = BigFloat(2*b - 1);
  1082. R = R.mul(6*b - 5);
  1083. R = R.mul(6*b - 1);
  1084.  
  1085. return;
  1086. }
  1087.  
  1088. uint32_t m = (a + b) / 2;
  1089.  
  1090. BigFloat P0, Q0, R0, P1, Q1, R1;
  1091. Pi_BSR(P0, Q0, R0, a, m, p);
  1092. Pi_BSR(P1, Q1, R1, m, b, p);
  1093.  
  1094. P = P0.mul(Q1, p).add(P1.mul(R0, p), p);
  1095. Q = Q0.mul(Q1, p);
  1096. R = R0.mul(R1, p);
  1097. }
  1098. void Pi(size_t digits){
  1099. // The leading 3 doesn't count.
  1100. digits++;
  1101.  
  1102. size_t p = (digits + 8) / 9;
  1103. size_t terms = (size_t)(p * 0.6346230241342037371474889163921741077188431452678) + 1;
  1104.  
  1105. // Limit Exceeded
  1106. if ((uint32_t)terms != terms)
  1107. throw "Limit Exceeded";
  1108.  
  1109. cout << "Computing Pi..." << endl;
  1110. cout << "Algorithm: Chudnovsky Formula" << endl << endl;
  1111.  
  1112. double time0 = wall_clock();
  1113.  
  1114. cout << "Summing Series... " << terms << " terms" << endl;
  1115. BigFloat P, Q, R;
  1116. Pi_BSR(P, Q, R, 0, (uint32_t)terms, p);
  1117. P = Q.mul(13591409).add(P, p);
  1118. Q = Q.mul(4270934400);
  1119. double time1 = wall_clock();
  1120. cout << "Time: " << time1 - time0 << endl;
  1121.  
  1122. cout << "Division... " << endl;
  1123. P = Q.div(P, p);
  1124. double time2 = wall_clock();
  1125. cout << "Time: " << time2 - time1 << endl;
  1126.  
  1127. cout << "InvSqrt... " << endl;
  1128. Q = invsqrt(10005, p);
  1129. double time3 = wall_clock();
  1130. cout << "Time: " << time3 - time2 << endl;
  1131.  
  1132. cout << "Final Multiply... " << endl;
  1133. P = P.mul(Q, p);
  1134. double time4 = wall_clock();
  1135. cout << "Time: " << time4 - time3 << endl;
  1136.  
  1137. cout << "Total Time = " << time4 - time0 << endl << endl;
  1138.  
  1139. dump_to_file("pi.txt", P.to_string(digits));
  1140. }
  1141. ////////////////////////////////////////////////////////////////////////////////
  1142. ////////////////////////////////////////////////////////////////////////////////
  1143. } // Namespace: Mini_Pi
  1144. ////////////////////////////////////////////////////////////////////////////////
  1145. ////////////////////////////////////////////////////////////////////////////////
  1146. int main(){
  1147.  
  1148. size_t digits = 10000;
  1149.  
  1150. //Mini_Pi::e (digits);
  1151. Mini_Pi::Pi(digits);
  1152.  
  1153. return 0;
  1154. }
  1155.  
Runtime error #stdin #stdout #stderr 0.28s 16096KB
stdin
Standard input is empty
stdout
Computing Pi...
Algorithm: Chudnovsky Formula

Summing Series... 706 terms
Time: 0.21399
Division... 
Time: 0.034311
InvSqrt... 
Time: 0.016372
Final Multiply... 
Time: 0.009099
Total Time = 0.273772

stderr
terminate called after throwing an instance of 'char const*'