1 /++
2 Generic utilities.
3 
4 $(BOOKTABLE Cheat Sheet,
5 $(TR $(TH Function Name) $(TH Description))
6 $(T2 swap, Swaps two values.)
7 $(T2 extMul, Extended unsigned multiplications.)
8 $(T2 min, Minimum value.)
9 $(T2 max, Maximum value.)
10 )
11 
12 License: $(HTTP www.apache.org/licenses/LICENSE-2.0, Apache-2.0)
13 Authors: Ilya Yaroshenko, $(HTTP erdani.com, Andrei Alexandrescu) (original std.* modules), 
14 Macros:
15 T2=$(TR $(TDNW $(LREF $1)) $(TD $+))
16 +/
17 module mir.utility;
18 
19 import std.traits;
20 
21 import mir.math.common: optmath;
22 
23 version(LDC)
24 pragma(LDC_inline_ir) R inlineIR(string s, R, P...)(P) @safe pure nothrow @nogc;
25 
26 @optmath:
27 
28 version(LDC)
29 {
30     ///
31     public import ldc.intrinsics: _expect = llvm_expect;
32 }
33 else version(GNU)
34 {
35     import gcc.builtins: __builtin_expect, __builtin_clong;
36 
37     ///
38     T _expect(T)(in T val, in T expected_val) if (__traits(isIntegral, T))
39     {
40         static if (T.sizeof <= __builtin_clong.sizeof)
41             return cast(T) __builtin_expect(val, expected_val);
42         else
43             return val;
44     }
45 }
46 else
47 {
48     ///
49     T _expect(T)(in T val, in T expected_val) if (__traits(isIntegral, T))
50     {
51         return val;
52     }
53 }
54 
55 public import std.algorithm.mutation: swap;
56 
57 void swapStars(I1, I2)(auto ref I1 i1, auto ref I2 i2)
58 {
59     static if (__traits(compiles, swap(*i1, *i2)))
60     {
61         swap(*i1, *i2);
62     }
63     else
64     {
65         import mir.functional: unref;
66         auto e = unref(*i1);
67         i1[0] = *i2;
68         i2[0] = e;
69     }
70 }
71 
72 /++
73 Iterates the passed arguments and returns the minimum value.
74 Params: args = The values to select the minimum from. At least two arguments
75     must be passed, and they must be comparable with `<`.
76 Returns: The minimum of the passed-in values.
77 +/
78 auto min(T...)(T args)
79     if (T.length >= 2)
80 {
81     //Get "a"
82     static if (T.length <= 2)
83         alias a = args[0];
84     else
85         auto a = min(args[0 .. ($+1)/2]);
86     alias T0 = typeof(a);
87 
88     //Get "b"
89     static if (T.length <= 3)
90         alias b = args[$-1];
91     else
92         auto b = min(args[($+1)/2 .. $]);
93     alias T1 = typeof(b);
94 
95     static assert (is(typeof(a < b)), "Invalid arguments: Cannot compare types " ~ T0.stringof ~ " and " ~ T1.stringof ~ ".");
96 
97     static if ((isFloatingPoint!T0 && isNumeric!T1) || (isFloatingPoint!T1 && isNumeric!T0))
98     {
99         import mir.math.common: fmin;
100         return fmin(a, b);
101     }
102     else
103     {
104         static if (isIntegral!T0 && isIntegral!T1)
105             static assert(isSigned!T0 == isSigned!T1, 
106                 "mir.utility.min is not defined for signed + unsigned pairs because of security reasons."
107                 ~ "Please unify type or use a Phobos analog.");
108         //Do the "min" proper with a and b
109         return a < b ? a : b;
110     }
111 }
112 
113 @safe version(mir_core_test) unittest
114 {
115     int a = 5;
116     short b = 6;
117     double c = 2;
118     auto d = min(a, b);
119     static assert(is(typeof(d) == int));
120     assert(d == 5);
121     auto e = min(a, b, c);
122     static assert(is(typeof(e) == double));
123     assert(e == 2);
124 }
125 
126 /++
127 `min` is not defined for arguments of mixed signedness because of security reasons.
128 Please unify type or use a Phobos analog.
129 +/
130 version(mir_core_test) unittest
131 {
132     int a = -10;
133     uint b = 10;
134     static assert(!is(typeof(min(a, b))));
135 }
136 
137 
138 /++
139 Iterates the passed arguments and returns the minimum value.
140 Params: args = The values to select the minimum from. At least two arguments
141     must be passed, and they must be comparable with `<`.
142 Returns: The minimum of the passed-in values.
143 +/
144 auto max(T...)(T args)
145     if (T.length >= 2)
146 {
147     //Get "a"
148     static if (T.length <= 2)
149         alias a = args[0];
150     else
151         auto a = max(args[0 .. ($+1)/2]);
152     alias T0 = typeof(a);
153 
154     //Get "b"
155     static if (T.length <= 3)
156         alias b = args[$-1];
157     else
158         auto b = max(args[($+1)/2 .. $]);
159     alias T1 = typeof(b);
160 
161     static assert (is(typeof(a < b)), "Invalid arguments: Cannot compare types " ~ T0.stringof ~ " and " ~ T1.stringof ~ ".");
162 
163     static if ((isFloatingPoint!T0 && isNumeric!T1) || (isFloatingPoint!T1 && isNumeric!T0))
164     {
165         import mir.math.common: fmax;
166         return fmax(a, b);
167     }
168     else
169     {
170         static if (isIntegral!T0 && isIntegral!T1)
171             static assert(isSigned!T0 == isSigned!T1, 
172                 "mir.utility.max is not defined for signed + unsigned pairs because of security reasons."
173                 ~ "Please unify type or use a Phobos analog.");
174         //Do the "max" proper with a and b
175         return a > b ? a : b;
176     }
177 }
178 
179 ///
180 @safe version(mir_core_test) unittest
181 {
182     int a = 5;
183     short b = 6;
184     double c = 2;
185     auto d = max(a, b);
186     static assert(is(typeof(d) == int));
187     assert(d == 6);
188     auto e = min(a, b, c);
189     static assert(is(typeof(e) == double));
190     assert(e == 2);
191 }
192 
193 /++
194 `max` is not defined for arguments of mixed signedness because of security reasons.
195 Please unify type or use a Phobos analog.
196 +/
197 version(mir_core_test) unittest
198 {
199     int a = -10;
200     uint b = 10;
201     static assert(!is(typeof(max(a, b))));
202 }
203 
204 /++
205 Return type for $(LREF extMul);
206 
207 The payload order of `low` and `high` parts depends on the endianness.
208 +/
209 struct ExtMulResult(I)
210     if (isUnsigned!I)
211 {
212     version (LittleEndian)
213     {
214         /// Lower I.sizeof * 8 bits
215         I low;
216         /// Higher I.sizeof * 8 bits
217         I high;
218     }
219     else
220     {
221         /// Higher I.sizeof * 8 bits
222         I high;
223         /// Lower I.sizeof * 8 bits
224         I low;
225     }
226 
227     T opCast(T : ulong)()
228     {
229         static if (is(I == ulong))
230         {
231             return cast(T)low;
232         }
233         else
234         {
235             return cast(T)(low | (ulong(high) << (I.sizeof * 8)));
236         }
237     }
238 }
239 
240 private struct ExtDivResult(I)
241     if (isUnsigned!I)
242 {
243     version (LittleEndian)
244     {
245         /// Quotient
246         I quotient;
247         /// Remainder
248         I remainder;
249     }
250     else
251     {
252         /// Remainder
253         I remainder;
254         /// Quotient
255         I quotient;
256     }
257 }
258 
259 /++
260 Extended unsigned multiplications.
261 Performs U x U multiplication and returns $(LREF ExtMulResult)!U that contains extended result.
262 Params:
263     a = unsigned integer
264     b = unsigned integer
265 Returns:
266     128bit result if U is ulong or 256bit result if U is ucent.
267 Optimization:
268     Algorithm is optimized for LDC (LLVM IR, any target) and for DMD (X86_64).
269 +/
270 ExtMulResult!U extMul(U)(in U a, in U b) @nogc nothrow pure @trusted
271     if(isUnsigned!U)
272 {
273     static if (is(U == ulong))
274         alias H = uint;
275     else // ucent
276         alias H = ulong;
277 
278     enum hbc = H.sizeof * 8;
279 
280     static if (U.sizeof < 4)
281     {
282         auto ret = uint(a) * b;
283         version (LittleEndian)
284             return typeof(return)(cast(U) ret, cast(U)(ret >>> (U.sizeof * 8)));
285         else
286             return typeof(return)(cast(U)(ret >>> (U.sizeof * 8)), cast(U) ret);
287     }
288     else
289     static if (is(U == uint))
290     {
291         auto ret = ulong(a) * b;
292         version (LittleEndian)
293             return typeof(return)(cast(uint) ret, cast(uint)(ret >>> 32));
294         else
295             return typeof(return)(cast(uint)(ret >>> 32), cast(uint) ret);
296     }
297     else
298     static if (is(U == ulong) && __traits(compiles, ucent.init))
299     {
300         auto ret = ucent(a) * b;
301         version (LittleEndian)
302             return typeof(return)(cast(ulong) ret, cast(ulong)(ret >>> 64));
303         else
304             return typeof(return)(cast(ulong)(ret >>> 64), cast(ulong) ret);
305     }
306     else
307     {
308         if (!__ctfe)
309         {
310             static if (size_t.sizeof == 4)
311             {
312                 // https://github.com/ldc-developers/ldc/issues/2391
313             }
314             else
315             version(LDC)
316             {
317                 // LLVM IR by n8sh
318                 pragma(inline, true);
319                 static if (is(U == ulong))
320                 {
321                     auto r = inlineIR!(`
322                     %a = zext i64 %0 to i128
323                     %b = zext i64 %1 to i128
324                     %m = mul i128 %a, %b
325                     %n = lshr i128 %m, 64
326                     %h = trunc i128 %n to i64
327                     %l = trunc i128 %m to i64
328                     %agg1 = insertvalue [2 x i64] undef, i64 %l, 0
329                     %agg2 = insertvalue [2 x i64] %agg1, i64 %h, 1
330                     ret [2 x i64] %agg2`, ulong[2])(a, b);
331                     version (LittleEndian)
332                         return ExtMulResult!U(r[0], r[1]);
333                     else
334                         return ExtMulResult!U(r[1], r[0]);
335                 }
336                 else
337                 static if (false)
338                 {
339                     auto r = inlineIR!(`
340                     %a = zext i128 %0 to i256
341                     %b = zext i128 %1 to i256
342                     %m = mul i256 %a, %b
343                     %n = lshr i256 %m, 128
344                     %h = trunc i256 %n to i128
345                     %l = trunc i256 %m to i128
346                     %agg1 = insertvalue [2 x i128] undef, i128 %l, 0
347                     %agg2 = insertvalue [2 x i128] %agg1, i128 %h, 1
348                     ret [2 x i128] %agg2`, ucent[2])(a, b);
349                     version (LittleEndian)
350                         return ExtMulResult!U(r[0], r[1]);
351                     else
352                         return ExtMulResult!U(r[1], r[0]);
353                 }
354             }
355             else
356             version(D_InlineAsm_X86_64)
357             {
358                 static if (is(U == ulong))
359                 {
360                     return extMul_X86_64(a, b);
361                 }
362             }
363         }
364 
365         U al = cast(H)a;
366         U ah = a >>> hbc;
367         U bl = cast(H)b;
368         U bh = b >>> hbc;
369 
370         U p0 = al * bl;
371         U p1 = al * bh;
372         U p2 = ah * bl;
373         U p3 = ah * bh;
374 
375         H cy = cast(H)(((p0 >>> hbc) + cast(H)p1 + cast(H)p2) >>> hbc);
376         U lo = p0 + (p1 << hbc) + (p2 << hbc);
377         U hi = p3 + (p1 >>> hbc) + (p2 >>> hbc) + cy;
378 
379         version(LittleEndian)
380             return typeof(return)(lo, hi);
381         else
382             return typeof(return)(hi, lo);
383     }
384 }
385 
386 /// 64bit x 64bit -> 128bit
387 version(mir_core_test) unittest
388 {
389     immutable a = 0x93_8d_28_00_0f_50_a5_56;
390     immutable b = 0x54_c3_2f_e8_cc_a5_97_10;
391     enum c = extMul(a, b);     // Compile time algorithm
392     assert(extMul(a, b) == c); // Fast runtime algorithm
393     static assert(c.high == 0x30_da_d1_42_95_4a_50_78);
394     static assert(c.low == 0x27_9b_4b_b4_9e_fe_0f_60);
395 }
396 
397 /// 32bit x 32bit -> 64bit
398 version(mir_core_test) unittest
399 {
400     immutable a = 0x0f_50_a5_56;
401     immutable b = 0xcc_a5_97_10;
402     static assert(cast(ulong)extMul(a, b) == ulong(a) * b);
403 }
404 
405 ///
406 version(mir_core_test) unittest
407 {
408     immutable ushort a = 0xa5_56;
409     immutable ushort b = 0x97_10;
410     static assert(cast(uint)extMul(a, b) == a * b);
411 }
412 
413 ///
414 version(mir_core_test) unittest
415 {
416     immutable ubyte a = 0x56;
417     immutable ubyte b = 0x10;
418     static assert(cast(ushort)extMul(a, b) == a * b);
419 }
420 
421 version(D_InlineAsm_X86_64)
422 {
423     version(Windows)
424     {
425         private ulong[2] extMul_X86_64_impl()(ulong a, ulong b)
426         {
427             asm @safe pure nothrow @nogc
428             {
429                 naked;
430                 mov RAX, RCX;
431                 mul RDX;
432                 ret;
433             }
434         }
435 
436         private ExtMulResult!ulong extMul_X86_64()(ulong a, ulong b)
437         {
438             auto res = extMul_X86_64_impl(a, b);
439             return ExtMulResult!ulong(res[0], res[1]);
440         }
441     }
442     else
443     private ExtMulResult!ulong extMul_X86_64()(ulong a, ulong b)   
444     {  
445         asm @safe pure nothrow @nogc
446         {
447             naked;
448             mov RAX, RDI;
449             mul RSI;
450             ret;
451         }
452     }
453 
454     version(Windows)
455     {
456         private ulong[2] extDiv_X86_64_impl()(ulong high, ulong low, ulong d)
457         {
458             asm @safe pure nothrow @nogc
459             {
460                 naked;
461                 mov RAX, RCX;
462                 div RDX;
463                 ret;
464             }
465         }
466 
467         private ExtDivResult!ulong extDiv_X86_64()(ExtMulResult!ulong pair, ulong d)
468         {
469             auto res = extDiv_X86_64_impl(pair.high, pair.low);
470             return ExtDivResult!ulong(res[0], res[1]);
471         }
472     }
473     else
474     private ExtDivResult!ulong extDiv_X86_64()(ExtMulResult!ulong pair, ulong d)   
475     {  
476         asm @safe pure nothrow @nogc
477         {
478             naked;
479             mov RAX, RDI;
480             div RSI;
481             ret;
482         }
483     }
484 }
485 
486 version(LDC) {} else version(D_InlineAsm_X86_64)
487 @nogc nothrow pure @safe version(mir_core_test) unittest
488 {
489     immutable a = 0x93_8d_28_00_0f_50_a5_56;
490     immutable b = 0x54_c3_2f_e8_cc_a5_97_10;
491 
492     immutable ExtMulResult!ulong c = extMul_X86_64(a, b);
493 
494     assert(c.high == 0x30_da_d1_42_95_4a_50_78);
495     assert(c.low == 0x27_9b_4b_b4_9e_fe_0f_60);
496 }
497 
498 // draft
499 // https://www.codeproject.com/Tips/785014/UInt-Division-Modulus
500 private ulong divmod128by64(const ulong u1, const ulong u0, ulong v, out ulong r)
501 {
502     const ulong b = 1L << 32;
503     ulong un1, un0, vn1, vn0, q1, q0, un32, un21, un10, rhat, left, right;
504 
505     import mir.bitop;
506 
507     auto s = ctlz(v);
508     v <<= s;
509     vn1 = v >> 32;
510     vn0 = v & 0xffffffff;
511 
512     un32 = (u1 << s) | (u0 >> (64 - s));
513     un10 = u0 << s;
514 
515     un1 = un10 >> 32;
516     un0 = un10 & 0xffffffff;
517 
518     q1 = un32 / vn1;
519     rhat = un32 % vn1;
520 
521     left = q1 * vn0;
522     right = (rhat << 32) + un1;
523 
524     while ((q1 >= b) || (left > right))
525     {
526         --q1;
527         rhat += vn1;
528         if (rhat >= b)
529             break;
530         left -= vn0;
531         right = (rhat << 32) | un1;
532     }
533 
534     un21 = (un32 << 32) + (un1 - (q1 * v));
535 
536     q0 = un21 / vn1;
537     rhat = un21 % vn1;
538 
539     left = q0 * vn0;
540     right = (rhat << 32) | un0;
541 
542     while ((q0 >= b) || (left > right))
543     {
544         --q0;
545         rhat += vn1;
546         if (rhat >= b)
547             break;
548         left -= vn0;
549         right = (rhat << 32) | un0;
550     }
551 
552     r = ((un21 << 32) + (un0 - (q0 * v))) >> s;
553     return (q1 << 32) | q0;
554 }
555 
556 /++
557 Simple sort algorithm usefull for CTFE code.
558 +/
559 template simpleSort(alias cmp = "a < b")
560 {
561     ///
562     T[] simpleSort(T)(return T[] array)
563     {
564         size_t i = 1;
565         while (i < array.length)
566         {
567             size_t j = i;
568             import mir.functional: naryFun;
569             while (j > 0 && !naryFun!cmp(array[j - 1], array[j]))
570             {
571                 swap(array[j - 1], array[j]);
572                 j--;
573             }
574             i++;
575         }
576         return array;
577     }
578 }