1 /++ 2 Generic utilities. 3 4 $(BOOKTABLE Cheat Sheet, 5 $(TR $(TH Function Name) $(TH Description)) 6 $(T2 swap, Swaps two values.) 7 $(T2 extMul, Extended unsigned multiplications.) 8 $(T2 min, Minimum value.) 9 $(T2 max, Maximum value.) 10 ) 11 12 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0) 13 Authors: Ilya Yaroshenko, $(HTTP erdani.com, Andrei Alexandrescu) (original std.* modules), 14 Macros: 15 T2=$(TR $(TDNW $(LREF $1)) $(TD $+)) 16 +/ 17 module mir.utility; 18 19 import std.traits; 20 21 import mir.math.common: optmath; 22 23 version(LDC) 24 pragma(LDC_inline_ir) R inlineIR(string s, R, P...)(P) @safe pure nothrow @nogc; 25 26 @optmath: 27 28 version(LDC) 29 { 30 /// 31 public import ldc.intrinsics: _expect = llvm_expect; 32 } 33 else version(GNU) 34 { 35 import gcc.builtins: __builtin_expect, __builtin_clong; 36 37 /// 38 T _expect(T)(in T val, in T expected_val) if (__traits(isIntegral, T)) 39 { 40 static if (T.sizeof <= __builtin_clong.sizeof) 41 return cast(T) __builtin_expect(val, expected_val); 42 else 43 return val; 44 } 45 } 46 else 47 { 48 /// 49 T _expect(T)(in T val, in T expected_val) if (__traits(isIntegral, T)) 50 { 51 return val; 52 } 53 } 54 55 public import std.algorithm.mutation: swap; 56 57 void swapStars(I1, I2)(auto ref I1 i1, auto ref I2 i2) 58 { 59 static if (__traits(compiles, swap(*i1, *i2))) 60 { 61 swap(*i1, *i2); 62 } 63 else 64 { 65 import mir.functional: unref; 66 auto e = unref(*i1); 67 i1[0] = *i2; 68 i2[0] = e; 69 } 70 } 71 72 /++ 73 Iterates the passed arguments and returns the minimum value. 74 Params: args = The values to select the minimum from. At least two arguments 75 must be passed, and they must be comparable with `<`. 76 Returns: The minimum of the passed-in values. 77 +/ 78 auto min(T...)(T args) 79 if (T.length >= 2) 80 { 81 //Get "a" 82 static if (T.length <= 2) 83 alias a = args[0]; 84 else 85 auto a = min(args[0 .. ($+1)/2]); 86 alias T0 = typeof(a); 87 88 //Get "b" 89 static if (T.length <= 3) 90 alias b = args[$-1]; 91 else 92 auto b = min(args[($+1)/2 .. $]); 93 alias T1 = typeof(b); 94 95 static assert (is(typeof(a < b)), "Invalid arguments: Cannot compare types " ~ T0.stringof ~ " and " ~ T1.stringof ~ "."); 96 97 static if ((isFloatingPoint!T0 && isNumeric!T1) || (isFloatingPoint!T1 && isNumeric!T0)) 98 { 99 import mir.math.common: fmin; 100 return fmin(a, b); 101 } 102 else 103 { 104 static if (isIntegral!T0 && isIntegral!T1) 105 static assert(isSigned!T0 == isSigned!T1, 106 "mir.utility.min is not defined for signed + unsigned pairs because of security reasons." 107 ~ "Please unify type or use a Phobos analog."); 108 //Do the "min" proper with a and b 109 return a < b ? a : b; 110 } 111 } 112 113 @safe version(mir_core_test) unittest 114 { 115 int a = 5; 116 short b = 6; 117 double c = 2; 118 auto d = min(a, b); 119 static assert(is(typeof(d) == int)); 120 assert(d == 5); 121 auto e = min(a, b, c); 122 static assert(is(typeof(e) == double)); 123 assert(e == 2); 124 } 125 126 /++ 127 `min` is not defined for arguments of mixed signedness because of security reasons. 128 Please unify type or use a Phobos analog. 129 +/ 130 version(mir_core_test) unittest 131 { 132 int a = -10; 133 uint b = 10; 134 static assert(!is(typeof(min(a, b)))); 135 } 136 137 138 /++ 139 Iterates the passed arguments and returns the minimum value. 140 Params: args = The values to select the minimum from. At least two arguments 141 must be passed, and they must be comparable with `<`. 142 Returns: The minimum of the passed-in values. 143 +/ 144 auto max(T...)(T args) 145 if (T.length >= 2) 146 { 147 //Get "a" 148 static if (T.length <= 2) 149 alias a = args[0]; 150 else 151 auto a = max(args[0 .. ($+1)/2]); 152 alias T0 = typeof(a); 153 154 //Get "b" 155 static if (T.length <= 3) 156 alias b = args[$-1]; 157 else 158 auto b = max(args[($+1)/2 .. $]); 159 alias T1 = typeof(b); 160 161 static assert (is(typeof(a < b)), "Invalid arguments: Cannot compare types " ~ T0.stringof ~ " and " ~ T1.stringof ~ "."); 162 163 static if ((isFloatingPoint!T0 && isNumeric!T1) || (isFloatingPoint!T1 && isNumeric!T0)) 164 { 165 import mir.math.common: fmax; 166 return fmax(a, b); 167 } 168 else 169 { 170 static if (isIntegral!T0 && isIntegral!T1) 171 static assert(isSigned!T0 == isSigned!T1, 172 "mir.utility.max is not defined for signed + unsigned pairs because of security reasons." 173 ~ "Please unify type or use a Phobos analog."); 174 //Do the "max" proper with a and b 175 return a > b ? a : b; 176 } 177 } 178 179 /// 180 @safe version(mir_core_test) unittest 181 { 182 int a = 5; 183 short b = 6; 184 double c = 2; 185 auto d = max(a, b); 186 static assert(is(typeof(d) == int)); 187 assert(d == 6); 188 auto e = min(a, b, c); 189 static assert(is(typeof(e) == double)); 190 assert(e == 2); 191 } 192 193 /++ 194 `max` is not defined for arguments of mixed signedness because of security reasons. 195 Please unify type or use a Phobos analog. 196 +/ 197 version(mir_core_test) unittest 198 { 199 int a = -10; 200 uint b = 10; 201 static assert(!is(typeof(max(a, b)))); 202 } 203 204 /++ 205 Return type for $(LREF extMul); 206 207 The payload order of `low` and `high` parts depends on the endianness. 208 +/ 209 struct ExtMulResult(I) 210 if (isUnsigned!I) 211 { 212 version (LittleEndian) 213 { 214 /// Lower I.sizeof * 8 bits 215 I low; 216 /// Higher I.sizeof * 8 bits 217 I high; 218 } 219 else 220 { 221 /// Higher I.sizeof * 8 bits 222 I high; 223 /// Lower I.sizeof * 8 bits 224 I low; 225 } 226 227 T opCast(T : ulong)() 228 { 229 static if (is(I == ulong)) 230 { 231 return cast(T)low; 232 } 233 else 234 { 235 return cast(T)(low | (ulong(high) << (I.sizeof * 8))); 236 } 237 } 238 } 239 240 private struct ExtDivResult(I) 241 if (isUnsigned!I) 242 { 243 version (LittleEndian) 244 { 245 /// Quotient 246 I quotient; 247 /// Remainder 248 I remainder; 249 } 250 else 251 { 252 /// Remainder 253 I remainder; 254 /// Quotient 255 I quotient; 256 } 257 } 258 259 /++ 260 Extended unsigned multiplications. 261 Performs U x U multiplication and returns $(LREF ExtMulResult)!U that contains extended result. 262 Params: 263 a = unsigned integer 264 b = unsigned integer 265 Returns: 266 128bit result if U is ulong or 256bit result if U is ucent. 267 Optimization: 268 Algorithm is optimized for LDC (LLVM IR, any target) and for DMD (X86_64). 269 +/ 270 ExtMulResult!U extMul(U)(in U a, in U b) @nogc nothrow pure @trusted 271 if(isUnsigned!U) 272 { 273 static if (is(U == ulong)) 274 alias H = uint; 275 else // ucent 276 alias H = ulong; 277 278 enum hbc = H.sizeof * 8; 279 280 static if (U.sizeof < 4) 281 { 282 auto ret = uint(a) * b; 283 version (LittleEndian) 284 return typeof(return)(cast(U) ret, cast(U)(ret >>> (U.sizeof * 8))); 285 else 286 return typeof(return)(cast(U)(ret >>> (U.sizeof * 8)), cast(U) ret); 287 } 288 else 289 static if (is(U == uint)) 290 { 291 auto ret = ulong(a) * b; 292 version (LittleEndian) 293 return typeof(return)(cast(uint) ret, cast(uint)(ret >>> 32)); 294 else 295 return typeof(return)(cast(uint)(ret >>> 32), cast(uint) ret); 296 } 297 else 298 static if (is(U == ulong) && __traits(compiles, ucent.init)) 299 { 300 auto ret = ucent(a) * b; 301 version (LittleEndian) 302 return typeof(return)(cast(ulong) ret, cast(ulong)(ret >>> 64)); 303 else 304 return typeof(return)(cast(ulong)(ret >>> 64), cast(ulong) ret); 305 } 306 else 307 { 308 if (!__ctfe) 309 { 310 static if (size_t.sizeof == 4) 311 { 312 // https://github.com/ldc-developers/ldc/issues/2391 313 } 314 else 315 version(LDC) 316 { 317 // LLVM IR by n8sh 318 pragma(inline, true); 319 static if (is(U == ulong)) 320 { 321 auto r = inlineIR!(` 322 %a = zext i64 %0 to i128 323 %b = zext i64 %1 to i128 324 %m = mul i128 %a, %b 325 %n = lshr i128 %m, 64 326 %h = trunc i128 %n to i64 327 %l = trunc i128 %m to i64 328 %agg1 = insertvalue [2 x i64] undef, i64 %l, 0 329 %agg2 = insertvalue [2 x i64] %agg1, i64 %h, 1 330 ret [2 x i64] %agg2`, ulong[2])(a, b); 331 version (LittleEndian) 332 return ExtMulResult!U(r[0], r[1]); 333 else 334 return ExtMulResult!U(r[1], r[0]); 335 } 336 else 337 static if (false) 338 { 339 auto r = inlineIR!(` 340 %a = zext i128 %0 to i256 341 %b = zext i128 %1 to i256 342 %m = mul i256 %a, %b 343 %n = lshr i256 %m, 128 344 %h = trunc i256 %n to i128 345 %l = trunc i256 %m to i128 346 %agg1 = insertvalue [2 x i128] undef, i128 %l, 0 347 %agg2 = insertvalue [2 x i128] %agg1, i128 %h, 1 348 ret [2 x i128] %agg2`, ucent[2])(a, b); 349 version (LittleEndian) 350 return ExtMulResult!U(r[0], r[1]); 351 else 352 return ExtMulResult!U(r[1], r[0]); 353 } 354 } 355 else 356 version(D_InlineAsm_X86_64) 357 { 358 static if (is(U == ulong)) 359 { 360 return extMul_X86_64(a, b); 361 } 362 } 363 } 364 365 U al = cast(H)a; 366 U ah = a >>> hbc; 367 U bl = cast(H)b; 368 U bh = b >>> hbc; 369 370 U p0 = al * bl; 371 U p1 = al * bh; 372 U p2 = ah * bl; 373 U p3 = ah * bh; 374 375 H cy = cast(H)(((p0 >>> hbc) + cast(H)p1 + cast(H)p2) >>> hbc); 376 U lo = p0 + (p1 << hbc) + (p2 << hbc); 377 U hi = p3 + (p1 >>> hbc) + (p2 >>> hbc) + cy; 378 379 version(LittleEndian) 380 return typeof(return)(lo, hi); 381 else 382 return typeof(return)(hi, lo); 383 } 384 } 385 386 /// 64bit x 64bit -> 128bit 387 version(mir_core_test) unittest 388 { 389 immutable a = 0x93_8d_28_00_0f_50_a5_56; 390 immutable b = 0x54_c3_2f_e8_cc_a5_97_10; 391 enum c = extMul(a, b); // Compile time algorithm 392 assert(extMul(a, b) == c); // Fast runtime algorithm 393 static assert(c.high == 0x30_da_d1_42_95_4a_50_78); 394 static assert(c.low == 0x27_9b_4b_b4_9e_fe_0f_60); 395 } 396 397 /// 32bit x 32bit -> 64bit 398 version(mir_core_test) unittest 399 { 400 immutable a = 0x0f_50_a5_56; 401 immutable b = 0xcc_a5_97_10; 402 static assert(cast(ulong)extMul(a, b) == ulong(a) * b); 403 } 404 405 /// 406 version(mir_core_test) unittest 407 { 408 immutable ushort a = 0xa5_56; 409 immutable ushort b = 0x97_10; 410 static assert(cast(uint)extMul(a, b) == a * b); 411 } 412 413 /// 414 version(mir_core_test) unittest 415 { 416 immutable ubyte a = 0x56; 417 immutable ubyte b = 0x10; 418 static assert(cast(ushort)extMul(a, b) == a * b); 419 } 420 421 version(D_InlineAsm_X86_64) 422 { 423 version(Windows) 424 { 425 private ulong[2] extMul_X86_64_impl()(ulong a, ulong b) 426 { 427 asm @safe pure nothrow @nogc 428 { 429 naked; 430 mov RAX, RCX; 431 mul RDX; 432 ret; 433 } 434 } 435 436 private ExtMulResult!ulong extMul_X86_64()(ulong a, ulong b) 437 { 438 auto res = extMul_X86_64_impl(a, b); 439 return ExtMulResult!ulong(res[0], res[1]); 440 } 441 } 442 else 443 private ExtMulResult!ulong extMul_X86_64()(ulong a, ulong b) 444 { 445 asm @safe pure nothrow @nogc 446 { 447 naked; 448 mov RAX, RDI; 449 mul RSI; 450 ret; 451 } 452 } 453 454 version(Windows) 455 { 456 private ulong[2] extDiv_X86_64_impl()(ulong high, ulong low, ulong d) 457 { 458 asm @safe pure nothrow @nogc 459 { 460 naked; 461 mov RAX, RCX; 462 div RDX; 463 ret; 464 } 465 } 466 467 private ExtDivResult!ulong extDiv_X86_64()(ExtMulResult!ulong pair, ulong d) 468 { 469 auto res = extDiv_X86_64_impl(pair.high, pair.low); 470 return ExtDivResult!ulong(res[0], res[1]); 471 } 472 } 473 else 474 private ExtDivResult!ulong extDiv_X86_64()(ExtMulResult!ulong pair, ulong d) 475 { 476 asm @safe pure nothrow @nogc 477 { 478 naked; 479 mov RAX, RDI; 480 div RSI; 481 ret; 482 } 483 } 484 } 485 486 version(LDC) {} else version(D_InlineAsm_X86_64) 487 @nogc nothrow pure @safe version(mir_core_test) unittest 488 { 489 immutable a = 0x93_8d_28_00_0f_50_a5_56; 490 immutable b = 0x54_c3_2f_e8_cc_a5_97_10; 491 492 immutable ExtMulResult!ulong c = extMul_X86_64(a, b); 493 494 assert(c.high == 0x30_da_d1_42_95_4a_50_78); 495 assert(c.low == 0x27_9b_4b_b4_9e_fe_0f_60); 496 } 497 498 // draft 499 // https://www.codeproject.com/Tips/785014/UInt-Division-Modulus 500 private ulong divmod128by64(const ulong u1, const ulong u0, ulong v, out ulong r) 501 { 502 const ulong b = 1L << 32; 503 ulong un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right; 504 505 import mir.bitop; 506 507 auto s = ctlz(v); 508 v <<= s; 509 vn1 = v >> 32; 510 vn0 = v & 0xffffffff; 511 512 un32 = (u1 << s) | (u0 >> (64 - s)); 513 un10 = u0 << s; 514 515 un1 = un10 >> 32; 516 un0 = un10 & 0xffffffff; 517 518 q1 = un32 / vn1; 519 rhat = un32 % vn1; 520 521 left = q1 * vn0; 522 right = (rhat << 32) + un1; 523 524 while ((q1 >= b) || (left > right)) 525 { 526 --q1; 527 rhat += vn1; 528 if (rhat >= b) 529 break; 530 left -= vn0; 531 right = (rhat << 32) | un1; 532 } 533 534 un21 = (un32 << 32) + (un1 - (q1 * v)); 535 536 q0 = un21 / vn1; 537 rhat = un21 % vn1; 538 539 left = q0 * vn0; 540 right = (rhat << 32) | un0; 541 542 while ((q0 >= b) || (left > right)) 543 { 544 --q0; 545 rhat += vn1; 546 if (rhat >= b) 547 break; 548 left -= vn0; 549 right = (rhat << 32) | un0; 550 } 551 552 r = ((un21 << 32) + (un0 - (q0 * v))) >> s; 553 return (q1 << 32) | q0; 554 } 555 556 /++ 557 Simple sort algorithm usefull for CTFE code. 558 +/ 559 template simpleSort(alias cmp = "a < b") 560 { 561 /// 562 T[] simpleSort(T)(return T[] array) 563 { 564 size_t i = 1; 565 while (i < array.length) 566 { 567 size_t j = i; 568 import mir.functional: naryFun; 569 while (j > 0 && !naryFun!cmp(array[j - 1], array[j])) 570 { 571 swap(array[j - 1], array[j]); 572 j--; 573 } 574 i++; 575 } 576 return array; 577 } 578 }