Back to home page

Redis cross reference

 
 

    


0001 /*
0002  * Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
0003  * 
0004  * Redistribution and use in source and binary forms, with or without modifica-
0005  * tion, are permitted provided that the following conditions are met:
0006  * 
0007  *   1.  Redistributions of source code must retain the above copyright notice,
0008  *       this list of conditions and the following disclaimer.
0009  * 
0010  *   2.  Redistributions in binary form must reproduce the above copyright
0011  *       notice, this list of conditions and the following disclaimer in the
0012  *       documentation and/or other materials provided with the distribution.
0013  * 
0014  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
0015  * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
0016  * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO
0017  * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
0018  * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
0019  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
0020  * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
0021  * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
0022  * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
0023  * OF THE POSSIBILITY OF SUCH DAMAGE.
0024  *
0025  * Alternatively, the contents of this file may be used under the terms of
0026  * the GNU General Public License ("GPL") version 2 or any later version,
0027  * in which case the provisions of the GPL are applicable instead of
0028  * the above. If you wish to allow the use of your version of this file
0029  * only under the terms of the GPL and not to allow others to use your
0030  * version of this file under the BSD license, indicate your decision
0031  * by deleting the provisions above and replace them with the notice
0032  * and other provisions required by the GPL. If you do not delete the
0033  * provisions above, a recipient may use your version of this file under
0034  * either the BSD or the GPL.
0035  */
0036 
0037 #include "lzfP.h"
0038 
0039 #define HSIZE (1 << (HLOG))
0040 
0041 /*
0042  * don't play with this unless you benchmark!
0043  * decompression is not dependent on the hash function
0044  * the hashing function might seem strange, just believe me
0045  * it works ;)
0046  */
0047 #ifndef FRST
0048 # define FRST(p) (((p[0]) << 8) | p[1])
0049 # define NEXT(v,p) (((v) << 8) | p[2])
0050 # if ULTRA_FAST
0051 #  define IDX(h) ((( h             >> (3*8 - HLOG)) - h  ) & (HSIZE - 1))
0052 # elif VERY_FAST
0053 #  define IDX(h) ((( h             >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
0054 # else
0055 #  define IDX(h) ((((h ^ (h << 5)) >> (3*8 - HLOG)) - h*5) & (HSIZE - 1))
0056 # endif
0057 #endif
0058 /*
0059  * IDX works because it is very similar to a multiplicative hash, e.g.
0060  * ((h * 57321 >> (3*8 - HLOG)) & (HSIZE - 1))
0061  * the latter is also quite fast on newer CPUs, and compresses similarly.
0062  *
0063  * the next one is also quite good, albeit slow ;)
0064  * (int)(cos(h & 0xffffff) * 1e6)
0065  */
0066 
0067 #if 0
0068 /* original lzv-like hash function, much worse and thus slower */
0069 # define FRST(p) (p[0] << 5) ^ p[1]
0070 # define NEXT(v,p) ((v) << 5) ^ p[2]
0071 # define IDX(h) ((h) & (HSIZE - 1))
0072 #endif
0073 
0074 #define        MAX_LIT        (1 <<  5)
0075 #define        MAX_OFF        (1 << 13)
0076 #define        MAX_REF        ((1 << 8) + (1 << 3))
0077 
0078 #if __GNUC__ >= 3
0079 # define expect(expr,value)         __builtin_expect ((expr),(value))
0080 # define inline                     inline
0081 #else
0082 # define expect(expr,value)         (expr)
0083 # define inline                     static
0084 #endif
0085 
0086 #define expect_false(expr) expect ((expr) != 0, 0)
0087 #define expect_true(expr)  expect ((expr) != 0, 1)
0088 
0089 /*
0090  * compressed format
0091  *
0092  * 000LLLLL <L+1>    ; literal
0093  * LLLooooo oooooooo ; backref L
0094  * 111ooooo LLLLLLLL oooooooo ; backref L+7
0095  *
0096  */
0097 
0098 unsigned int
0099 lzf_compress (const void *const in_data, unsigned int in_len,
0100           void *out_data, unsigned int out_len
0101 #if LZF_STATE_ARG
0102               , LZF_STATE htab
0103 #endif
0104               )
0105 {
0106 #if !LZF_STATE_ARG
0107   LZF_STATE htab;
0108 #endif
0109   const u8 **hslot;
0110   const u8 *ip = (const u8 *)in_data;
0111         u8 *op = (u8 *)out_data;
0112   const u8 *in_end  = ip + in_len;
0113         u8 *out_end = op + out_len;
0114   const u8 *ref;
0115 
0116   /* off requires a type wide enough to hold a general pointer difference.
0117    * ISO C doesn't have that (size_t might not be enough and ptrdiff_t only
0118    * works for differences within a single object). We also assume that no
0119    * no bit pattern traps. Since the only platform that is both non-POSIX
0120    * and fails to support both assumptions is windows 64 bit, we make a
0121    * special workaround for it.
0122    */
0123 #if defined (WIN32) && defined (_M_X64)
0124   unsigned _int64 off; /* workaround for missing POSIX compliance */
0125 #else
0126   unsigned long off;
0127 #endif
0128   unsigned int hval;
0129   int lit;
0130 
0131   if (!in_len || !out_len)
0132     return 0;
0133 
0134 #if INIT_HTAB
0135   memset (htab, 0, sizeof (htab));
0136 # if 0
0137   for (hslot = htab; hslot < htab + HSIZE; hslot++)
0138     *hslot++ = ip;
0139 # endif
0140 #endif
0141 
0142   lit = 0; op++; /* start run */
0143 
0144   hval = FRST (ip);
0145   while (ip < in_end - 2)
0146     {
0147       hval = NEXT (hval, ip);
0148       hslot = htab + IDX (hval);
0149       ref = *hslot; *hslot = ip;
0150 
0151       if (1
0152 #if INIT_HTAB
0153           && ref < ip /* the next test will actually take care of this, but this is faster */
0154 #endif
0155           && (off = ip - ref - 1) < MAX_OFF
0156           && ip + 4 < in_end
0157           && ref > (u8 *)in_data
0158 #if STRICT_ALIGN
0159           && ref[0] == ip[0]
0160           && ref[1] == ip[1]
0161           && ref[2] == ip[2]
0162 #else
0163           && *(u16 *)ref == *(u16 *)ip
0164           && ref[2] == ip[2]
0165 #endif
0166         )
0167         {
0168           /* match found at *ref++ */
0169           unsigned int len = 2;
0170           unsigned int maxlen = in_end - ip - len;
0171           maxlen = maxlen > MAX_REF ? MAX_REF : maxlen;
0172 
0173           op [- lit - 1] = lit - 1; /* stop run */
0174           op -= !lit; /* undo run if length is zero */
0175 
0176           if (expect_false (op + 3 + 1 >= out_end))
0177             return 0;
0178 
0179           for (;;)
0180             {
0181               if (expect_true (maxlen > 16))
0182                 {
0183                   len++; if (ref [len] != ip [len]) break;
0184                   len++; if (ref [len] != ip [len]) break;
0185                   len++; if (ref [len] != ip [len]) break;
0186                   len++; if (ref [len] != ip [len]) break;
0187 
0188                   len++; if (ref [len] != ip [len]) break;
0189                   len++; if (ref [len] != ip [len]) break;
0190                   len++; if (ref [len] != ip [len]) break;
0191                   len++; if (ref [len] != ip [len]) break;
0192 
0193                   len++; if (ref [len] != ip [len]) break;
0194                   len++; if (ref [len] != ip [len]) break;
0195                   len++; if (ref [len] != ip [len]) break;
0196                   len++; if (ref [len] != ip [len]) break;
0197 
0198                   len++; if (ref [len] != ip [len]) break;
0199                   len++; if (ref [len] != ip [len]) break;
0200                   len++; if (ref [len] != ip [len]) break;
0201                   len++; if (ref [len] != ip [len]) break;
0202                 }
0203 
0204               do
0205                 len++;
0206               while (len < maxlen && ref[len] == ip[len]);
0207 
0208               break;
0209             }
0210 
0211           len -= 2; /* len is now #octets - 1 */
0212           ip++;
0213 
0214           if (len < 7)
0215             {
0216               *op++ = (off >> 8) + (len << 5);
0217             }
0218           else
0219             {
0220               *op++ = (off >> 8) + (  7 << 5);
0221               *op++ = len - 7;
0222             }
0223 
0224           *op++ = off;
0225           lit = 0; op++; /* start run */
0226 
0227           ip += len + 1;
0228 
0229           if (expect_false (ip >= in_end - 2))
0230             break;
0231 
0232 #if ULTRA_FAST || VERY_FAST
0233           --ip;
0234 # if VERY_FAST && !ULTRA_FAST
0235           --ip;
0236 # endif
0237           hval = FRST (ip);
0238 
0239           hval = NEXT (hval, ip);
0240           htab[IDX (hval)] = ip;
0241           ip++;
0242 
0243 # if VERY_FAST && !ULTRA_FAST
0244           hval = NEXT (hval, ip);
0245           htab[IDX (hval)] = ip;
0246           ip++;
0247 # endif
0248 #else
0249           ip -= len + 1;
0250 
0251           do
0252             {
0253               hval = NEXT (hval, ip);
0254               htab[IDX (hval)] = ip;
0255               ip++;
0256             }
0257           while (len--);
0258 #endif
0259         }
0260       else
0261         {
0262           /* one more literal byte we must copy */
0263           if (expect_false (op >= out_end))
0264             return 0;
0265 
0266           lit++; *op++ = *ip++;
0267 
0268           if (expect_false (lit == MAX_LIT))
0269             {
0270               op [- lit - 1] = lit - 1; /* stop run */
0271               lit = 0; op++; /* start run */
0272             }
0273         }
0274     }
0275 
0276   if (op + 3 > out_end) /* at most 3 bytes can be missing here */
0277     return 0;
0278 
0279   while (ip < in_end)
0280     {
0281       lit++; *op++ = *ip++;
0282 
0283       if (expect_false (lit == MAX_LIT))
0284         {
0285           op [- lit - 1] = lit - 1; /* stop run */
0286           lit = 0; op++; /* start run */
0287         }
0288     }
0289 
0290   op [- lit - 1] = lit - 1; /* end run */
0291   op -= !lit; /* undo run if length is zero */
0292 
0293   return op - (u8 *)out_data;
0294 }
0295