1 /++
2    New Associative array. Much of this code is lifted from the rt/aaA.d module
3    in druntime. And so the author is repeated here, along with the copyright.
4    Copyright: Copyright Digital Mars 2000 - 2015, Steven Schveighoffer 2022.
5    License:   $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
6    Authors:   Martin Nowak, Steven Schveighoffer
7 +/
8 module schlib.newaa;
9 import core.memory;
10 
11 import std.algorithm : min, max;
12 
13 // grow threshold
14 private enum GROW_NUM = 4;
15 private enum GROW_DEN = 5;
16 // shrink threshold
17 private enum SHRINK_NUM = 1;
18 private enum SHRINK_DEN = 8;
19 // grow factor
20 private enum GROW_FAC = 4;
21 // growing the AA doubles it's size, so the shrink threshold must be
22 // smaller than half the grow threshold to have a hysteresis
23 static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN);
24 // initial load factor (for literals), mean of both thresholds
25 private enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2;
26 private enum INIT_DEN = SHRINK_DEN * GROW_DEN;
27 
28 private enum INIT_NUM_BUCKETS = 8;
29 // magic hash constants to distinguish empty, deleted, and filled buckets
30 private enum HASH_EMPTY = 0;
31 private enum HASH_DELETED = 0x1;
32 private enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1;
33 
34 struct Hash(K, V)
35 {
36     private struct Entry
37     {
38         /*const*/ K key; // this really should be const, but legacy issues.
39         V value;
40     }
41 
42     private struct Bucket
43     {
44         private pure nothrow @nogc:
45         size_t hash;
46         Entry *entry;
47 
48         @property bool empty() const
49         {
50             return hash == HASH_EMPTY;
51         }
52 
53         @property bool deleted() const
54         {
55             return hash == HASH_DELETED;
56         }
57 
58         @property bool filled() const @safe
59         {
60             return cast(ptrdiff_t) hash < 0;
61         }
62     }
63 
64     private struct Impl
65     {
66         this(size_t initialSize)
67         {
68             import std.traits : hasIndirections;
69 
70             // these are only for compatibility with druntime AA
71             keysz = cast(uint) K.sizeof;
72             valsz = cast(uint) V.sizeof;
73             buckets = allocBuckets(initialSize);
74             firstUsed = cast(uint) buckets.length;
75             valoff = cast(uint) Entry.value.offsetof;
76 
77             static if (__traits(hasPostblit, K))
78                 flags |= Flags.keyHasPostblit;
79 
80             static if(hasIndirections!Entry)
81                 flags |= Flags.hasPointers;
82 
83             if(!__ctfe)
84                 entryTI = typeid(Entry);
85         }
86 
87         Bucket[] buckets;
88         uint used;
89         uint deleted;
90         // these are not used in this implementation, but put here so we can
91         // keep the same layout if converted to an AA.
92         TypeInfo_Struct entryTI;
93         uint firstUsed;
94         immutable uint keysz;
95         immutable uint valsz;
96         immutable uint valoff;
97         Flags flags;
98 
99         enum Flags : ubyte
100         {
101             none = 0x0,
102             keyHasPostblit = 0x1,
103             hasPointers = 0x2,
104         }
105 
106         @property size_t length() const pure nothrow @nogc
107         {
108             assert(used >= deleted);
109             return used - deleted;
110         }
111 
112         @property size_t dim() const pure nothrow @nogc @safe
113         {
114             return buckets.length;
115         }
116 
117         @property size_t mask() const pure nothrow @nogc
118         {
119             return dim - 1;
120         }
121 
122         // find the first slot to insert a value with hash
123         inout(Bucket)* findSlotInsert(size_t hash) inout pure nothrow @nogc
124         {
125             for (size_t i = hash & mask, j = 1;; ++j)
126             {
127                 if (!buckets[i].filled)
128                     return &buckets[i];
129                 i = (i + j) & mask;
130             }
131         }
132 
133         // lookup a key
134         inout(Bucket)* findSlotLookupOrInsert(size_t hash, in K key) inout
135         {
136             for (size_t i = hash & mask, j = 1;; ++j)
137             {
138                 if ((buckets[i].hash == hash && buckets[i].entry.key == key) ||
139                     buckets[i].empty)
140                     return &buckets[i];
141                 i = (i + j) & mask;
142             }
143         }
144 
145         void grow()
146         {
147             // If there are so many deleted entries, that growing would push us
148             // below the shrink threshold, we just purge deleted entries instead.
149             if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM)
150                 resize(dim);
151             else
152                 resize(GROW_FAC * dim);
153         }
154 
155         void shrink()
156         {
157             if (dim > INIT_NUM_BUCKETS)
158                 resize(dim / GROW_FAC);
159         }
160 
161         void resize(size_t ndim) pure nothrow
162         {
163             auto obuckets = buckets;
164             buckets = allocBuckets(ndim);
165 
166             foreach (ref b; obuckets[firstUsed .. $])
167                 if (b.filled)
168                     *findSlotInsert(b.hash) = b;
169 
170             firstUsed = 0;
171             used -= deleted;
172             deleted = 0;
173             if(!__ctfe)
174                 GC.free(obuckets.ptr); // safe to free b/c impossible to reference
175         }
176 
177         void clear() pure nothrow
178         {
179             import core.stdc.string : memset;
180             // clear all data, but don't change bucket array length
181             memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof);
182             deleted = used = 0;
183             firstUsed = cast(uint) dim;
184         }
185 
186         static Bucket[] allocBuckets(size_t dim) @trusted pure nothrow
187         {
188             enum attr = GC.BlkAttr.NO_INTERIOR;
189             immutable sz = dim * Bucket.sizeof;
190             if(__ctfe)
191                 return new Bucket[sz];
192             else
193                 return (cast(Bucket*) GC.calloc(sz, attr))[0 .. dim];
194         }
195     }
196 
197     private Impl* aa;
198 
199     size_t length() const pure nothrow @nogc {
200         return aa ? aa.length : 0;
201     }
202 
203     ref V opIndexAssign(V value, const K key)
204     {
205         if(!aa)
206             aa = new Impl(INIT_NUM_BUCKETS);
207         auto h = calcHash(key);
208         auto location = aa.findSlotLookupOrInsert(h, key);
209         assert(location !is null);
210         if(location.empty)
211         {
212             if(location.deleted)
213                 --aa.deleted;
214             else if(++aa.used * GROW_DEN > aa.dim * GROW_NUM)
215             {
216                 aa.grow();
217                 location = aa.findSlotInsert(h);
218             }
219 
220             aa.firstUsed = min(aa.firstUsed, cast(uint)(location - aa.buckets.ptr));
221             location.hash = h;
222             location.entry = new Entry(key, value);
223         }
224         else
225         {
226             location.entry.value = value;
227         }
228         return location.entry.value;
229     }
230 
231     ref V opIndex(const K key, string file = __FILE__, size_t line = __LINE__) @safe
232     {
233         import core.exception;
234         // todo, throw range error
235         if(auto v = key in this)
236             return *v;
237         throw new RangeError(file, line);
238     }
239 
240     V* opBinaryRight(string s : "in")(const K key) @safe
241     {
242         if(!aa)
243             return null;
244         auto h = calcHash(key);
245         auto loc = aa.findSlotLookupOrInsert(h, key);
246         if(loc.empty)
247             return null;
248         return &loc.entry.value;
249     }
250 
251     size_t toHash() scope const nothrow
252     {
253         if(length == 0)
254             return 0;
255 
256         size_t h;
257         foreach(b; aa.buckets)
258         {
259             if(b.filled)
260                 h += hashOf(hashOf(b.entry.value), hashOf(b.entry.key));
261         }
262 
263         return h;
264     }
265 
266     private struct KVRange
267     {
268         Impl *impl;
269         size_t idx;
270 
271         this(Impl *impl, size_t idx)
272         {
273             this.impl = impl;
274             this.idx = idx;
275             if(impl && impl.buckets[idx].empty)
276                 popFront();
277         }
278 
279         pure nothrow @nogc @safe:
280         @property bool empty() { return impl is null || idx >= impl.dim; }
281         @property ref Entry front()
282         {
283             assert(!empty);
284             return *impl.buckets[idx].entry;
285         }
286         void popFront()
287         {
288             assert(!empty);
289             for(++idx; idx < impl.dim; ++idx)
290             {
291                 if(impl.buckets[idx].filled)
292                     break;
293             }
294         }
295         auto save() { return this; }
296     }
297 
298     auto opAssign(OK, OV)(OV[OK] other) {
299         void buildManually()
300         {
301             aa = null;
302             foreach(k, v; other)
303                 this[k] = v;
304         }
305         static if(is(OK == K) && is(OV == V))
306         {
307             if(__ctfe)
308             {
309                 // build it manually
310                 buildManually();
311             }
312             else
313             {
314                 // we can get away with a reinterpret cast
315                 aa = (() @trusted => *cast(Impl**)&other)();
316             }
317         }
318         else
319         {
320             buildManually();
321         }
322         return this;
323     }
324 
325     auto opAssign(OK, OV)(Hash!(K, V) other)
326     {
327         static if(is(OK == K) && is(OV == V))
328         {
329             aa = other.aa;
330         }
331         else
332         {
333             // build manually
334             aa = null;
335             foreach(e; other[])
336                 this[e.key] = e.value;
337         }
338     }
339 
340     @property auto byKeyValue() pure nothrow @nogc
341     {
342         return KVRange(aa, 0);
343     }
344 
345     @property auto byValue() pure nothrow @nogc
346     {
347         import std.algorithm : map;
348         return KVRange(aa, 0).map!(x => x.value);
349     }
350 
351     @property auto byKey() pure nothrow @nogc
352     {
353         import std.algorithm : map;
354         return KVRange(aa, 0).map!(x => x.key);
355     }
356 
357     alias opSlice = byKeyValue;
358 
359     this(OK, OV)(OV[OK] other)
360     {
361         opAssign(other);
362     }
363 
364     this(OK, OV)(Hash!(OK, OV) other)
365     {
366         opAssign(other);
367     }
368 
369     @property K[] keys()
370     {
371         import std.array;
372         return this.byKey.array;
373     }
374 
375     @property V[] values()
376     {
377         import std.array;
378         return this.byValue.array;
379     }
380 
381     void clear() pure nothrow
382     {
383         if(length > 0)
384             aa.clear();
385     }
386 
387     bool remove(const K key)
388     {
389         if(!aa)
390             return false;
391         auto h = calcHash(key);
392         auto loc = aa.findSlotLookupOrInsert(h, key);
393         if(!loc.empty)
394         {
395             loc.hash = HASH_DELETED;
396             loc.entry = null;
397             ++aa.deleted;
398 
399             if (aa.length * SHRINK_DEN < aa.dim * SHRINK_NUM && (__ctfe || !GC.inFinalizer()))
400                 aa.shrink();
401             return true;
402         }
403         return false;
404     }
405 }
406 
407 private size_t mix(size_t h) @safe pure nothrow @nogc
408 {
409     // final mix function of MurmurHash2
410     enum m = 0x5bd1e995;
411     h ^= h >> 13;
412     h *= m;
413     h ^= h >> 15;
414     return h;
415 }
416 
417 private size_t calcHash(K)(in K pkey)
418 {
419     immutable hash = hashOf(pkey);
420     // highest bit is set to distinguish empty/deleted from filled buckets
421     return mix(hash) | HASH_FILLED_MARK;
422 }
423 
424 auto asAA(K, V)(Hash!(K, V) hash) @trusted
425 {
426     if(hash.aa)
427     {
428         // shore up any differences in implementation, unless we are running at
429         // compile time.
430         if(__ctfe)
431         {
432             // need to build the AA from the hash
433             V[K] result;
434             foreach(e; hash[])
435             {
436                 result[e.key] = e.value;
437             }
438             return result;
439         }
440         if(hash.aa.entryTI is null)
441             // needs to be set 
442             hash.aa.entryTI = typeid(hash.Entry);
443 
444         // use a reinterpret cast.
445         return *cast(V[K]*)&hash;
446     }
447     return V[K].init;
448 }
449 
450 unittest {
451     import std.stdio;
452     auto buildAAAtCompiletime()
453     {
454         Hash!(string, int) h = ["hello": 5];
455         //h["hello"] = 5;
456         return h;
457     }
458     static h = buildAAAtCompiletime();
459     auto aa = h.asAA;
460     writeln(aa);
461     h["there"] = 4;
462     aa["D is the best"] = 3;
463     writeln(aa);
464 
465     aa = null;
466     aa["one"] = 1;
467     aa["two"] = 2;
468     aa["three"] = 3;
469     h = aa;
470     writeln(h[]);
471     writeln(aa);
472     foreach(k; h.byKey)
473         assert(h[k] == aa[k]);
474 
475     h = h; // ensure assignment works.
476     Hash!(string, int) h2 = h; // ensure construction works;
477     h.remove("one");
478     writeln(aa);
479     writeln(h[]);
480     import std.exception;
481     import core.exception;
482     assertThrown!RangeError(h2["four"]);
483 }