Back to home page

Redis cross reference

 
 

    


0001 /*
0002  * Copyright (c) 2009-2012, Pieter Noordhuis <pcnoordhuis at gmail dot com>
0003  * Copyright (c) 2009-2012, Salvatore Sanfilippo <antirez at gmail dot com>
0004  * All rights reserved.
0005  *
0006  * Redistribution and use in source and binary forms, with or without
0007  * modification, are permitted provided that the following conditions are met:
0008  *
0009  *   * Redistributions of source code must retain the above copyright notice,
0010  *     this list of conditions and the following disclaimer.
0011  *   * Redistributions in binary form must reproduce the above copyright
0012  *     notice, this list of conditions and the following disclaimer in the
0013  *     documentation and/or other materials provided with the distribution.
0014  *   * Neither the name of Redis nor the names of its contributors may be used
0015  *     to endorse or promote products derived from this software without
0016  *     specific prior written permission.
0017  *
0018  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
0019  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
0020  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
0021  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
0022  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
0023  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
0024  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
0025  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
0026  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
0027  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
0028  * POSSIBILITY OF SUCH DAMAGE.
0029  */
0030 
0031 #include <stdio.h>
0032 #include <stdlib.h>
0033 #include <string.h>
0034 #include "intset.h"
0035 #include "zmalloc.h"
0036 #include "endianconv.h"
0037 
0038 /* Note that these encodings are ordered, so:
0039  * INTSET_ENC_INT16 < INTSET_ENC_INT32 < INTSET_ENC_INT64. */
0040 #define INTSET_ENC_INT16 (sizeof(int16_t))
0041 #define INTSET_ENC_INT32 (sizeof(int32_t))
0042 #define INTSET_ENC_INT64 (sizeof(int64_t))
0043 
0044 /* Return the required encoding for the provided value. */
0045 static uint8_t _intsetValueEncoding(int64_t v) {
0046     if (v < INT32_MIN || v > INT32_MAX)
0047         return INTSET_ENC_INT64;
0048     else if (v < INT16_MIN || v > INT16_MAX)
0049         return INTSET_ENC_INT32;
0050     else
0051         return INTSET_ENC_INT16;
0052 }
0053 
0054 /* Return the value at pos, given an encoding. */
0055 static int64_t _intsetGetEncoded(intset *is, int pos, uint8_t enc) {
0056     int64_t v64;
0057     int32_t v32;
0058     int16_t v16;
0059 
0060     if (enc == INTSET_ENC_INT64) {
0061         memcpy(&v64,((int64_t*)is->contents)+pos,sizeof(v64));
0062         memrev64ifbe(&v64);
0063         return v64;
0064     } else if (enc == INTSET_ENC_INT32) {
0065         memcpy(&v32,((int32_t*)is->contents)+pos,sizeof(v32));
0066         memrev32ifbe(&v32);
0067         return v32;
0068     } else {
0069         memcpy(&v16,((int16_t*)is->contents)+pos,sizeof(v16));
0070         memrev16ifbe(&v16);
0071         return v16;
0072     }
0073 }
0074 
0075 /* Return the value at pos, using the configured encoding. */
0076 static int64_t _intsetGet(intset *is, int pos) {
0077     return _intsetGetEncoded(is,pos,intrev32ifbe(is->encoding));
0078 }
0079 
0080 /* Set the value at pos, using the configured encoding. */
0081 static void _intsetSet(intset *is, int pos, int64_t value) {
0082     uint32_t encoding = intrev32ifbe(is->encoding);
0083 
0084     if (encoding == INTSET_ENC_INT64) {
0085         ((int64_t*)is->contents)[pos] = value;
0086         memrev64ifbe(((int64_t*)is->contents)+pos);
0087     } else if (encoding == INTSET_ENC_INT32) {
0088         ((int32_t*)is->contents)[pos] = value;
0089         memrev32ifbe(((int32_t*)is->contents)+pos);
0090     } else {
0091         ((int16_t*)is->contents)[pos] = value;
0092         memrev16ifbe(((int16_t*)is->contents)+pos);
0093     }
0094 }
0095 
0096 /* Create an empty intset. */
0097 intset *intsetNew(void) {
0098     intset *is = zmalloc(sizeof(intset));
0099     is->encoding = intrev32ifbe(INTSET_ENC_INT16);
0100     is->length = 0;
0101     return is;
0102 }
0103 
0104 /* Resize the intset */
0105 static intset *intsetResize(intset *is, uint32_t len) {
0106     uint32_t size = len*intrev32ifbe(is->encoding);
0107     is = zrealloc(is,sizeof(intset)+size);
0108     return is;
0109 }
0110 
0111 /* Search for the position of "value". Return 1 when the value was found and
0112  * sets "pos" to the position of the value within the intset. Return 0 when
0113  * the value is not present in the intset and sets "pos" to the position
0114  * where "value" can be inserted. */
0115 static uint8_t intsetSearch(intset *is, int64_t value, uint32_t *pos) {
0116     int min = 0, max = intrev32ifbe(is->length)-1, mid = -1;
0117     int64_t cur = -1;
0118 
0119     /* The value can never be found when the set is empty */
0120     if (intrev32ifbe(is->length) == 0) {
0121         if (pos) *pos = 0;
0122         return 0;
0123     } else {
0124         /* Check for the case where we know we cannot find the value,
0125          * but do know the insert position. */
0126         if (value > _intsetGet(is,intrev32ifbe(is->length)-1)) {
0127             if (pos) *pos = intrev32ifbe(is->length);
0128             return 0;
0129         } else if (value < _intsetGet(is,0)) {
0130             if (pos) *pos = 0;
0131             return 0;
0132         }
0133     }
0134 
0135     while(max >= min) {
0136         mid = (min+max)/2;
0137         cur = _intsetGet(is,mid);
0138         if (value > cur) {
0139             min = mid+1;
0140         } else if (value < cur) {
0141             max = mid-1;
0142         } else {
0143             break;
0144         }
0145     }
0146 
0147     if (value == cur) {
0148         if (pos) *pos = mid;
0149         return 1;
0150     } else {
0151         if (pos) *pos = min;
0152         return 0;
0153     }
0154 }
0155 
0156 /* Upgrades the intset to a larger encoding and inserts the given integer. */
0157 static intset *intsetUpgradeAndAdd(intset *is, int64_t value) {
0158     uint8_t curenc = intrev32ifbe(is->encoding);
0159     uint8_t newenc = _intsetValueEncoding(value);
0160     int length = intrev32ifbe(is->length);
0161     int prepend = value < 0 ? 1 : 0;
0162 
0163     /* First set new encoding and resize */
0164     is->encoding = intrev32ifbe(newenc);
0165     is = intsetResize(is,intrev32ifbe(is->length)+1);
0166 
0167     /* Upgrade back-to-front so we don't overwrite values.
0168      * Note that the "prepend" variable is used to make sure we have an empty
0169      * space at either the beginning or the end of the intset. */
0170     while(length--)
0171         _intsetSet(is,length+prepend,_intsetGetEncoded(is,length,curenc));
0172 
0173     /* Set the value at the beginning or the end. */
0174     if (prepend)
0175         _intsetSet(is,0,value);
0176     else
0177         _intsetSet(is,intrev32ifbe(is->length),value);
0178     is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
0179     return is;
0180 }
0181 
0182 static void intsetMoveTail(intset *is, uint32_t from, uint32_t to) {
0183     void *src, *dst;
0184     uint32_t bytes = intrev32ifbe(is->length)-from;
0185     uint32_t encoding = intrev32ifbe(is->encoding);
0186 
0187     if (encoding == INTSET_ENC_INT64) {
0188         src = (int64_t*)is->contents+from;
0189         dst = (int64_t*)is->contents+to;
0190         bytes *= sizeof(int64_t);
0191     } else if (encoding == INTSET_ENC_INT32) {
0192         src = (int32_t*)is->contents+from;
0193         dst = (int32_t*)is->contents+to;
0194         bytes *= sizeof(int32_t);
0195     } else {
0196         src = (int16_t*)is->contents+from;
0197         dst = (int16_t*)is->contents+to;
0198         bytes *= sizeof(int16_t);
0199     }
0200     memmove(dst,src,bytes);
0201 }
0202 
0203 /* Insert an integer in the intset */
0204 intset *intsetAdd(intset *is, int64_t value, uint8_t *success) {
0205     uint8_t valenc = _intsetValueEncoding(value);
0206     uint32_t pos;
0207     if (success) *success = 1;
0208 
0209     /* Upgrade encoding if necessary. If we need to upgrade, we know that
0210      * this value should be either appended (if > 0) or prepended (if < 0),
0211      * because it lies outside the range of existing values. */
0212     if (valenc > intrev32ifbe(is->encoding)) {
0213         /* This always succeeds, so we don't need to curry *success. */
0214         return intsetUpgradeAndAdd(is,value);
0215     } else {
0216         /* Abort if the value is already present in the set.
0217          * This call will populate "pos" with the right position to insert
0218          * the value when it cannot be found. */
0219         if (intsetSearch(is,value,&pos)) {
0220             if (success) *success = 0;
0221             return is;
0222         }
0223 
0224         is = intsetResize(is,intrev32ifbe(is->length)+1);
0225         if (pos < intrev32ifbe(is->length)) intsetMoveTail(is,pos,pos+1);
0226     }
0227 
0228     _intsetSet(is,pos,value);
0229     is->length = intrev32ifbe(intrev32ifbe(is->length)+1);
0230     return is;
0231 }
0232 
0233 /* Delete integer from intset */
0234 intset *intsetRemove(intset *is, int64_t value, int *success) {
0235     uint8_t valenc = _intsetValueEncoding(value);
0236     uint32_t pos;
0237     if (success) *success = 0;
0238 
0239     if (valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,&pos)) {
0240         uint32_t len = intrev32ifbe(is->length);
0241 
0242         /* We know we can delete */
0243         if (success) *success = 1;
0244 
0245         /* Overwrite value with tail and update length */
0246         if (pos < (len-1)) intsetMoveTail(is,pos+1,pos);
0247         is = intsetResize(is,len-1);
0248         is->length = intrev32ifbe(len-1);
0249     }
0250     return is;
0251 }
0252 
0253 /* Determine whether a value belongs to this set */
0254 uint8_t intsetFind(intset *is, int64_t value) {
0255     uint8_t valenc = _intsetValueEncoding(value);
0256     return valenc <= intrev32ifbe(is->encoding) && intsetSearch(is,value,NULL);
0257 }
0258 
0259 /* Return random member */
0260 int64_t intsetRandom(intset *is) {
0261     return _intsetGet(is,rand()%intrev32ifbe(is->length));
0262 }
0263 
0264 /* Sets the value to the value at the given position. When this position is
0265  * out of range the function returns 0, when in range it returns 1. */
0266 uint8_t intsetGet(intset *is, uint32_t pos, int64_t *value) {
0267     if (pos < intrev32ifbe(is->length)) {
0268         *value = _intsetGet(is,pos);
0269         return 1;
0270     }
0271     return 0;
0272 }
0273 
0274 /* Return intset length */
0275 uint32_t intsetLen(intset *is) {
0276     return intrev32ifbe(is->length);
0277 }
0278 
0279 /* Return intset blob size in bytes. */
0280 size_t intsetBlobLen(intset *is) {
0281     return sizeof(intset)+intrev32ifbe(is->length)*intrev32ifbe(is->encoding);
0282 }
0283 
0284 #ifdef INTSET_TEST_MAIN
0285 #include <sys/time.h>
0286 
0287 void intsetRepr(intset *is) {
0288     int i;
0289     for (i = 0; i < intrev32ifbe(is->length); i++) {
0290         printf("%lld\n", (uint64_t)_intsetGet(is,i));
0291     }
0292     printf("\n");
0293 }
0294 
0295 void error(char *err) {
0296     printf("%s\n", err);
0297     exit(1);
0298 }
0299 
0300 void ok(void) {
0301     printf("OK\n");
0302 }
0303 
0304 long long usec(void) {
0305     struct timeval tv;
0306     gettimeofday(&tv,NULL);
0307     return (((long long)tv.tv_sec)*1000000)+tv.tv_usec;
0308 }
0309 
0310 #define assert(_e) ((_e)?(void)0:(_assert(#_e,__FILE__,__LINE__),exit(1)))
0311 void _assert(char *estr, char *file, int line) {
0312     printf("\n\n=== ASSERTION FAILED ===\n");
0313     printf("==> %s:%d '%s' is not true\n",file,line,estr);
0314 }
0315 
0316 intset *createSet(int bits, int size) {
0317     uint64_t mask = (1<<bits)-1;
0318     uint64_t i, value;
0319     intset *is = intsetNew();
0320 
0321     for (i = 0; i < size; i++) {
0322         if (bits > 32) {
0323             value = (rand()*rand()) & mask;
0324         } else {
0325             value = rand() & mask;
0326         }
0327         is = intsetAdd(is,value,NULL);
0328     }
0329     return is;
0330 }
0331 
0332 void checkConsistency(intset *is) {
0333     int i;
0334 
0335     for (i = 0; i < (intrev32ifbe(is->length)-1); i++) {
0336         uint32_t encoding = intrev32ifbe(is->encoding);
0337 
0338         if (encoding == INTSET_ENC_INT16) {
0339             int16_t *i16 = (int16_t*)is->contents;
0340             assert(i16[i] < i16[i+1]);
0341         } else if (encoding == INTSET_ENC_INT32) {
0342             int32_t *i32 = (int32_t*)is->contents;
0343             assert(i32[i] < i32[i+1]);
0344         } else {
0345             int64_t *i64 = (int64_t*)is->contents;
0346             assert(i64[i] < i64[i+1]);
0347         }
0348     }
0349 }
0350 
0351 int main(int argc, char **argv) {
0352     uint8_t success;
0353     int i;
0354     intset *is;
0355     sranddev();
0356 
0357     printf("Value encodings: "); {
0358         assert(_intsetValueEncoding(-32768) == INTSET_ENC_INT16);
0359         assert(_intsetValueEncoding(+32767) == INTSET_ENC_INT16);
0360         assert(_intsetValueEncoding(-32769) == INTSET_ENC_INT32);
0361         assert(_intsetValueEncoding(+32768) == INTSET_ENC_INT32);
0362         assert(_intsetValueEncoding(-2147483648) == INTSET_ENC_INT32);
0363         assert(_intsetValueEncoding(+2147483647) == INTSET_ENC_INT32);
0364         assert(_intsetValueEncoding(-2147483649) == INTSET_ENC_INT64);
0365         assert(_intsetValueEncoding(+2147483648) == INTSET_ENC_INT64);
0366         assert(_intsetValueEncoding(-9223372036854775808ull) == INTSET_ENC_INT64);
0367         assert(_intsetValueEncoding(+9223372036854775807ull) == INTSET_ENC_INT64);
0368         ok();
0369     }
0370 
0371     printf("Basic adding: "); {
0372         is = intsetNew();
0373         is = intsetAdd(is,5,&success); assert(success);
0374         is = intsetAdd(is,6,&success); assert(success);
0375         is = intsetAdd(is,4,&success); assert(success);
0376         is = intsetAdd(is,4,&success); assert(!success);
0377         ok();
0378     }
0379 
0380     printf("Large number of random adds: "); {
0381         int inserts = 0;
0382         is = intsetNew();
0383         for (i = 0; i < 1024; i++) {
0384             is = intsetAdd(is,rand()%0x800,&success);
0385             if (success) inserts++;
0386         }
0387         assert(intrev32ifbe(is->length) == inserts);
0388         checkConsistency(is);
0389         ok();
0390     }
0391 
0392     printf("Upgrade from int16 to int32: "); {
0393         is = intsetNew();
0394         is = intsetAdd(is,32,NULL);
0395         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
0396         is = intsetAdd(is,65535,NULL);
0397         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
0398         assert(intsetFind(is,32));
0399         assert(intsetFind(is,65535));
0400         checkConsistency(is);
0401 
0402         is = intsetNew();
0403         is = intsetAdd(is,32,NULL);
0404         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
0405         is = intsetAdd(is,-65535,NULL);
0406         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
0407         assert(intsetFind(is,32));
0408         assert(intsetFind(is,-65535));
0409         checkConsistency(is);
0410         ok();
0411     }
0412 
0413     printf("Upgrade from int16 to int64: "); {
0414         is = intsetNew();
0415         is = intsetAdd(is,32,NULL);
0416         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
0417         is = intsetAdd(is,4294967295,NULL);
0418         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
0419         assert(intsetFind(is,32));
0420         assert(intsetFind(is,4294967295));
0421         checkConsistency(is);
0422 
0423         is = intsetNew();
0424         is = intsetAdd(is,32,NULL);
0425         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT16);
0426         is = intsetAdd(is,-4294967295,NULL);
0427         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
0428         assert(intsetFind(is,32));
0429         assert(intsetFind(is,-4294967295));
0430         checkConsistency(is);
0431         ok();
0432     }
0433 
0434     printf("Upgrade from int32 to int64: "); {
0435         is = intsetNew();
0436         is = intsetAdd(is,65535,NULL);
0437         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
0438         is = intsetAdd(is,4294967295,NULL);
0439         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
0440         assert(intsetFind(is,65535));
0441         assert(intsetFind(is,4294967295));
0442         checkConsistency(is);
0443 
0444         is = intsetNew();
0445         is = intsetAdd(is,65535,NULL);
0446         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT32);
0447         is = intsetAdd(is,-4294967295,NULL);
0448         assert(intrev32ifbe(is->encoding) == INTSET_ENC_INT64);
0449         assert(intsetFind(is,65535));
0450         assert(intsetFind(is,-4294967295));
0451         checkConsistency(is);
0452         ok();
0453     }
0454 
0455     printf("Stress lookups: "); {
0456         long num = 100000, size = 10000;
0457         int i, bits = 20;
0458         long long start;
0459         is = createSet(bits,size);
0460         checkConsistency(is);
0461 
0462         start = usec();
0463         for (i = 0; i < num; i++) intsetSearch(is,rand() % ((1<<bits)-1),NULL);
0464         printf("%ld lookups, %ld element set, %lldusec\n",num,size,usec()-start);
0465     }
0466 
0467     printf("Stress add+delete: "); {
0468         int i, v1, v2;
0469         is = intsetNew();
0470         for (i = 0; i < 0xffff; i++) {
0471             v1 = rand() % 0xfff;
0472             is = intsetAdd(is,v1,NULL);
0473             assert(intsetFind(is,v1));
0474 
0475             v2 = rand() % 0xfff;
0476             is = intsetRemove(is,v2,NULL);
0477             assert(!intsetFind(is,v2));
0478         }
0479         checkConsistency(is);
0480         ok();
0481     }
0482 }
0483 #endif