1 /++ 2 New Associative array. Much of this code is lifted from the rt/aaA.d module 3 in druntime. And so the author is repeated here, along with the copyright. 4 Copyright: Copyright Digital Mars 2000 - 2015, Steven Schveighoffer 2022. 5 License: $(HTTP www.boost.org/LICENSE_1_0.txt, Boost License 1.0). 6 Authors: Martin Nowak, Steven Schveighoffer 7 +/ 8 module schlib.newaa; 9 import core.memory; 10 11 import std.algorithm : min, max; 12 13 // grow threshold 14 private enum GROW_NUM = 4; 15 private enum GROW_DEN = 5; 16 // shrink threshold 17 private enum SHRINK_NUM = 1; 18 private enum SHRINK_DEN = 8; 19 // grow factor 20 private enum GROW_FAC = 4; 21 // growing the AA doubles it's size, so the shrink threshold must be 22 // smaller than half the grow threshold to have a hysteresis 23 static assert(GROW_FAC * SHRINK_NUM * GROW_DEN < GROW_NUM * SHRINK_DEN); 24 // initial load factor (for literals), mean of both thresholds 25 private enum INIT_NUM = (GROW_DEN * SHRINK_NUM + GROW_NUM * SHRINK_DEN) / 2; 26 private enum INIT_DEN = SHRINK_DEN * GROW_DEN; 27 28 private enum INIT_NUM_BUCKETS = 8; 29 // magic hash constants to distinguish empty, deleted, and filled buckets 30 private enum HASH_EMPTY = 0; 31 private enum HASH_DELETED = 0x1; 32 private enum HASH_FILLED_MARK = size_t(1) << 8 * size_t.sizeof - 1; 33 34 struct Hash(K, V) 35 { 36 private struct Entry 37 { 38 /*const*/ K key; // this really should be const, but legacy issues. 39 V value; 40 } 41 42 private struct Bucket 43 { 44 private pure nothrow @nogc: 45 size_t hash; 46 Entry *entry; 47 48 @property bool empty() const 49 { 50 return hash == HASH_EMPTY; 51 } 52 53 @property bool deleted() const 54 { 55 return hash == HASH_DELETED; 56 } 57 58 @property bool filled() const @safe 59 { 60 return cast(ptrdiff_t) hash < 0; 61 } 62 } 63 64 private struct Impl 65 { 66 this(size_t initialSize) 67 { 68 import std.traits : hasIndirections; 69 70 // these are only for compatibility with druntime AA 71 keysz = cast(uint) K.sizeof; 72 valsz = cast(uint) V.sizeof; 73 buckets = allocBuckets(initialSize); 74 firstUsed = cast(uint) buckets.length; 75 valoff = cast(uint) Entry.value.offsetof; 76 77 static if (__traits(hasPostblit, K)) 78 flags |= Flags.keyHasPostblit; 79 80 static if(hasIndirections!Entry) 81 flags |= Flags.hasPointers; 82 83 if(!__ctfe) 84 entryTI = typeid(Entry); 85 } 86 87 Bucket[] buckets; 88 uint used; 89 uint deleted; 90 // these are not used in this implementation, but put here so we can 91 // keep the same layout if converted to an AA. 92 TypeInfo_Struct entryTI; 93 uint firstUsed; 94 immutable uint keysz; 95 immutable uint valsz; 96 immutable uint valoff; 97 Flags flags; 98 99 enum Flags : ubyte 100 { 101 none = 0x0, 102 keyHasPostblit = 0x1, 103 hasPointers = 0x2, 104 } 105 106 @property size_t length() const pure nothrow @nogc 107 { 108 assert(used >= deleted); 109 return used - deleted; 110 } 111 112 @property size_t dim() const pure nothrow @nogc @safe 113 { 114 return buckets.length; 115 } 116 117 @property size_t mask() const pure nothrow @nogc 118 { 119 return dim - 1; 120 } 121 122 // find the first slot to insert a value with hash 123 inout(Bucket)* findSlotInsert(size_t hash) inout pure nothrow @nogc 124 { 125 for (size_t i = hash & mask, j = 1;; ++j) 126 { 127 if (!buckets[i].filled) 128 return &buckets[i]; 129 i = (i + j) & mask; 130 } 131 } 132 133 // lookup a key 134 inout(Bucket)* findSlotLookupOrInsert(size_t hash, in K key) inout 135 { 136 for (size_t i = hash & mask, j = 1;; ++j) 137 { 138 if ((buckets[i].hash == hash && buckets[i].entry.key == key) || 139 buckets[i].empty) 140 return &buckets[i]; 141 i = (i + j) & mask; 142 } 143 } 144 145 void grow() 146 { 147 // If there are so many deleted entries, that growing would push us 148 // below the shrink threshold, we just purge deleted entries instead. 149 if (length * SHRINK_DEN < GROW_FAC * dim * SHRINK_NUM) 150 resize(dim); 151 else 152 resize(GROW_FAC * dim); 153 } 154 155 void shrink() 156 { 157 if (dim > INIT_NUM_BUCKETS) 158 resize(dim / GROW_FAC); 159 } 160 161 void resize(size_t ndim) pure nothrow 162 { 163 auto obuckets = buckets; 164 buckets = allocBuckets(ndim); 165 166 foreach (ref b; obuckets[firstUsed .. $]) 167 if (b.filled) 168 *findSlotInsert(b.hash) = b; 169 170 firstUsed = 0; 171 used -= deleted; 172 deleted = 0; 173 if(!__ctfe) 174 GC.free(obuckets.ptr); // safe to free b/c impossible to reference 175 } 176 177 void clear() pure nothrow 178 { 179 import core.stdc.string : memset; 180 // clear all data, but don't change bucket array length 181 memset(&buckets[firstUsed], 0, (buckets.length - firstUsed) * Bucket.sizeof); 182 deleted = used = 0; 183 firstUsed = cast(uint) dim; 184 } 185 186 static Bucket[] allocBuckets(size_t dim) @trusted pure nothrow 187 { 188 enum attr = GC.BlkAttr.NO_INTERIOR; 189 immutable sz = dim * Bucket.sizeof; 190 if(__ctfe) 191 return new Bucket[sz]; 192 else 193 return (cast(Bucket*) GC.calloc(sz, attr))[0 .. dim]; 194 } 195 } 196 197 private Impl* aa; 198 199 size_t length() const pure nothrow @nogc { 200 return aa ? aa.length : 0; 201 } 202 203 ref V opIndexAssign(V value, const K key) 204 { 205 if(!aa) 206 aa = new Impl(INIT_NUM_BUCKETS); 207 auto h = calcHash(key); 208 auto location = aa.findSlotLookupOrInsert(h, key); 209 assert(location !is null); 210 if(location.empty) 211 { 212 if(location.deleted) 213 --aa.deleted; 214 else if(++aa.used * GROW_DEN > aa.dim * GROW_NUM) 215 { 216 aa.grow(); 217 location = aa.findSlotInsert(h); 218 } 219 220 aa.firstUsed = min(aa.firstUsed, cast(uint)(location - aa.buckets.ptr)); 221 location.hash = h; 222 location.entry = new Entry(key, value); 223 } 224 else 225 { 226 location.entry.value = value; 227 } 228 return location.entry.value; 229 } 230 231 ref V opIndex(const K key, string file = __FILE__, size_t line = __LINE__) @safe 232 { 233 import core.exception; 234 // todo, throw range error 235 if(auto v = key in this) 236 return *v; 237 throw new RangeError(file, line); 238 } 239 240 V* opBinaryRight(string s : "in")(const K key) @safe 241 { 242 if(!aa) 243 return null; 244 auto h = calcHash(key); 245 auto loc = aa.findSlotLookupOrInsert(h, key); 246 if(loc.empty) 247 return null; 248 return &loc.entry.value; 249 } 250 251 size_t toHash() scope const nothrow 252 { 253 if(length == 0) 254 return 0; 255 256 size_t h; 257 foreach(b; aa.buckets) 258 { 259 if(b.filled) 260 h += hashOf(hashOf(b.entry.value), hashOf(b.entry.key)); 261 } 262 263 return h; 264 } 265 266 private struct KVRange 267 { 268 Impl *impl; 269 size_t idx; 270 271 this(Impl *impl, size_t idx) 272 { 273 this.impl = impl; 274 this.idx = idx; 275 if(impl && impl.buckets[idx].empty) 276 popFront(); 277 } 278 279 pure nothrow @nogc @safe: 280 @property bool empty() { return impl is null || idx >= impl.dim; } 281 @property ref Entry front() 282 { 283 assert(!empty); 284 return *impl.buckets[idx].entry; 285 } 286 void popFront() 287 { 288 assert(!empty); 289 for(++idx; idx < impl.dim; ++idx) 290 { 291 if(impl.buckets[idx].filled) 292 break; 293 } 294 } 295 auto save() { return this; } 296 } 297 298 auto opAssign(OK, OV)(OV[OK] other) { 299 void buildManually() 300 { 301 aa = null; 302 foreach(k, v; other) 303 this[k] = v; 304 } 305 static if(is(OK == K) && is(OV == V)) 306 { 307 if(__ctfe) 308 { 309 // build it manually 310 buildManually(); 311 } 312 else 313 { 314 // we can get away with a reinterpret cast 315 aa = (() @trusted => *cast(Impl**)&other)(); 316 } 317 } 318 else 319 { 320 buildManually(); 321 } 322 return this; 323 } 324 325 auto opAssign(OK, OV)(Hash!(K, V) other) 326 { 327 static if(is(OK == K) && is(OV == V)) 328 { 329 aa = other.aa; 330 } 331 else 332 { 333 // build manually 334 aa = null; 335 foreach(e; other[]) 336 this[e.key] = e.value; 337 } 338 } 339 340 @property auto byKeyValue() pure nothrow @nogc 341 { 342 return KVRange(aa, 0); 343 } 344 345 @property auto byValue() pure nothrow @nogc 346 { 347 import std.algorithm : map; 348 return KVRange(aa, 0).map!(x => x.value); 349 } 350 351 @property auto byKey() pure nothrow @nogc 352 { 353 import std.algorithm : map; 354 return KVRange(aa, 0).map!(x => x.key); 355 } 356 357 alias opSlice = byKeyValue; 358 359 this(OK, OV)(OV[OK] other) 360 { 361 opAssign(other); 362 } 363 364 this(OK, OV)(Hash!(OK, OV) other) 365 { 366 opAssign(other); 367 } 368 369 @property K[] keys() 370 { 371 import std.array; 372 return this.byKey.array; 373 } 374 375 @property V[] values() 376 { 377 import std.array; 378 return this.byValue.array; 379 } 380 381 void clear() pure nothrow 382 { 383 if(length > 0) 384 aa.clear(); 385 } 386 387 bool remove(const K key) 388 { 389 if(!aa) 390 return false; 391 auto h = calcHash(key); 392 auto loc = aa.findSlotLookupOrInsert(h, key); 393 if(!loc.empty) 394 { 395 loc.hash = HASH_DELETED; 396 loc.entry = null; 397 ++aa.deleted; 398 399 if (aa.length * SHRINK_DEN < aa.dim * SHRINK_NUM && (__ctfe || !GC.inFinalizer())) 400 aa.shrink(); 401 return true; 402 } 403 return false; 404 } 405 } 406 407 private size_t mix(size_t h) @safe pure nothrow @nogc 408 { 409 // final mix function of MurmurHash2 410 enum m = 0x5bd1e995; 411 h ^= h >> 13; 412 h *= m; 413 h ^= h >> 15; 414 return h; 415 } 416 417 private size_t calcHash(K)(in K pkey) 418 { 419 immutable hash = hashOf(pkey); 420 // highest bit is set to distinguish empty/deleted from filled buckets 421 return mix(hash) | HASH_FILLED_MARK; 422 } 423 424 auto asAA(K, V)(Hash!(K, V) hash) @trusted 425 { 426 if(hash.aa) 427 { 428 // shore up any differences in implementation, unless we are running at 429 // compile time. 430 if(__ctfe) 431 { 432 // need to build the AA from the hash 433 V[K] result; 434 foreach(e; hash[]) 435 { 436 result[e.key] = e.value; 437 } 438 return result; 439 } 440 if(hash.aa.entryTI is null) 441 // needs to be set 442 hash.aa.entryTI = typeid(hash.Entry); 443 444 // use a reinterpret cast. 445 return *cast(V[K]*)&hash; 446 } 447 return V[K].init; 448 } 449 450 unittest { 451 import std.stdio; 452 auto buildAAAtCompiletime() 453 { 454 Hash!(string, int) h = ["hello": 5]; 455 //h["hello"] = 5; 456 return h; 457 } 458 static h = buildAAAtCompiletime(); 459 auto aa = h.asAA; 460 writeln(aa); 461 h["there"] = 4; 462 aa["D is the best"] = 3; 463 writeln(aa); 464 465 aa = null; 466 aa["one"] = 1; 467 aa["two"] = 2; 468 aa["three"] = 3; 469 h = aa; 470 writeln(h[]); 471 writeln(aa); 472 foreach(k; h.byKey) 473 assert(h[k] == aa[k]); 474 475 h = h; // ensure assignment works. 476 Hash!(string, int) h2 = h; // ensure construction works; 477 h.remove("one"); 478 writeln(aa); 479 writeln(h[]); 480 import std.exception; 481 import core.exception; 482 assertThrown!RangeError(h2["four"]); 483 }