buildbuck.c (2837B)
1 #include "stdinc.h" 2 #include "dat.h" 3 #include "fns.h" 4 5 /* 6 * An IEStream is a sorted list of index entries. 7 */ 8 struct IEStream 9 { 10 Part *part; 11 u64int off; /* read position within part */ 12 u64int n; /* number of valid ientries left to read */ 13 u32int size; /* allocated space in buffer */ 14 u8int *buf; 15 u8int *pos; /* current place in buffer */ 16 u8int *epos; /* end of valid buffer contents */ 17 }; 18 19 IEStream* 20 initiestream(Part *part, u64int off, u64int clumps, u32int size) 21 { 22 IEStream *ies; 23 24 /* out of memory? */ 25 ies = MKZ(IEStream); 26 ies->buf = MKN(u8int, size); 27 ies->epos = ies->buf; 28 ies->pos = ies->epos; 29 ies->off = off; 30 ies->n = clumps; 31 ies->size = size; 32 ies->part = part; 33 return ies; 34 } 35 36 void 37 freeiestream(IEStream *ies) 38 { 39 if(ies == nil) 40 return; 41 free(ies->buf); 42 free(ies); 43 } 44 45 /* 46 * Return the next IEntry (still packed) in the stream. 47 */ 48 static u8int* 49 peekientry(IEStream *ies) 50 { 51 u32int n, nn; 52 53 n = ies->epos - ies->pos; 54 if(n < IEntrySize){ 55 memmove(ies->buf, ies->pos, n); 56 ies->epos = &ies->buf[n]; 57 ies->pos = ies->buf; 58 nn = ies->size; 59 if(nn > ies->n * IEntrySize) 60 nn = ies->n * IEntrySize; 61 nn -= n; 62 if(nn == 0) 63 return nil; 64 //fprint(2, "peek %d from %llud into %p\n", nn, ies->off, ies->epos); 65 if(readpart(ies->part, ies->off, ies->epos, nn) < 0){ 66 seterr(EOk, "can't read sorted index entries: %r"); 67 return nil; 68 } 69 ies->epos += nn; 70 ies->off += nn; 71 } 72 return ies->pos; 73 } 74 75 /* 76 * Compute the bucket number for the given IEntry. 77 * Knows that the score is the first thing in the packed 78 * representation. 79 */ 80 static u32int 81 iebuck(Index *ix, u8int *b, IBucket *ib, IEStream *ies) 82 { 83 USED(ies); 84 USED(ib); 85 return hashbits(b, 32) / ix->div; 86 } 87 88 /* 89 * Fill ib with the next bucket in the stream. 90 */ 91 u32int 92 buildbucket(Index *ix, IEStream *ies, IBucket *ib, uint maxdata) 93 { 94 IEntry ie1, ie2; 95 u8int *b; 96 u32int buck; 97 98 buck = TWID32; 99 ib->n = 0; 100 while(ies->n){ 101 b = peekientry(ies); 102 if(b == nil) 103 return TWID32; 104 /* fprint(2, "b=%p ies->n=%lld ib.n=%d buck=%d score=%V\n", b, ies->n, ib->n, iebuck(ix, b, ib, ies), b); */ 105 if(ib->n == 0) 106 buck = iebuck(ix, b, ib, ies); 107 else{ 108 if(buck != iebuck(ix, b, ib, ies)) 109 break; 110 if(ientrycmp(&ib->data[(ib->n - 1)* IEntrySize], b) == 0){ 111 /* 112 * guess that the larger address is the correct one to use 113 */ 114 unpackientry(&ie1, &ib->data[(ib->n - 1)* IEntrySize]); 115 unpackientry(&ie2, b); 116 seterr(EOk, "duplicate index entry for score=%V type=%d", ie1.score, ie1.ia.type); 117 ib->n--; 118 if(ie1.ia.addr > ie2.ia.addr) 119 memmove(b, &ib->data[ib->n * IEntrySize], IEntrySize); 120 } 121 } 122 if((ib->n+1)*IEntrySize > maxdata){ 123 seterr(EOk, "bucket overflow"); 124 return TWID32; 125 } 126 memmove(&ib->data[ib->n * IEntrySize], b, IEntrySize); 127 ib->n++; 128 ies->n--; 129 ies->pos += IEntrySize; 130 } 131 return buck; 132 }