=== modified file 'Makefile.in' --- Makefile.in 2011-03-14 01:01:37 +0000 +++ Makefile.in 2011-03-14 02:19:21 +0000 @@ -236,7 +236,8 @@ # DO NOT DELETE THIS LINE -- make depend depends on it. -adler32.o zutil.o: zutil.h zlib.h zconf.h +adler32.o: adler32.c zutil.h zlib.h zconf.h +zutil.o: zutil.h zlib.h zconf.h gzclose.o gzlib.o gzread.o gzwrite.o: zlib.h zconf.h gzguts.h compress.o example.o minigzip.o uncompr.o: zlib.h zconf.h crc32.o: zutil.h zlib.h zconf.h crc32.h @@ -246,7 +247,8 @@ inftrees.o: zutil.h zlib.h zconf.h inftrees.h trees.o: deflate.h zutil.h zlib.h zconf.h trees.h -adler32.lo zutil.lo: zutil.h zlib.h zconf.h +adler32.lo: adler32.c zutil.h zlib.h zconf.h +zutil.lo: zutil.h zlib.h zconf.h gzclose.lo gzlib.lo gzread.lo gzwrite.lo: zlib.h zconf.h gzguts.h compress.lo example.lo minigzip.lo uncompr.lo: zlib.h zconf.h crc32.lo: zutil.h zlib.h zconf.h crc32.h === modified file 'adler32.c' --- adler32.c 2011-03-14 01:01:37 +0000 +++ adler32.c 2011-03-15 00:25:48 +0000 @@ -9,6 +9,24 @@ #define local static +#define GCC_VERSION_GE(x) ((__GNUC__-0) * 100 + __GNUC_MINOR__-0 >= x) + +#if GCC_VERSION_GE(301) +/* sometimes leakes out of old kernel header */ +# undef noinline +# define noinline __attribute__((__noinline__)) +#else +# ifndef noinline +# define noinline +# endif +#endif + +#if GCC_VERSION_GE(301) +# define GCC_ATTR_UNUSED_PARAM __attribute__((__unused__)) +#else +# define GCC_ATTR_UNUSED_PARAM +#endif + local uLong adler32_combine_(uLong adler1, uLong adler2, z_off64_t len2); #define BASE 65521UL /* largest prime smaller than 65536 */ @@ -23,7 +41,9 @@ /* use NO_DIVIDE if your processor does not do division in hardware */ #ifdef NO_DIVIDE -# define MOD(a) \ +/* use NO_SHIFT if your processor does shift > 1 by loop */ +# ifdef NO_SHIFT +# define reduce_full(a) \ do { \ if (a >= (BASE << 16)) a -= (BASE << 16); \ if (a >= (BASE << 15)) a -= (BASE << 15); \ @@ -43,7 +63,7 @@ if (a >= (BASE << 1)) a -= (BASE << 1); \ if (a >= BASE) a -= BASE; \ } while (0) -# define MOD4(a) \ +# define reduce_4(a) \ do { \ if (a >= (BASE << 4)) a -= (BASE << 4); \ if (a >= (BASE << 3)) a -= (BASE << 3); \ @@ -51,13 +71,90 @@ if (a >= (BASE << 1)) a -= (BASE << 1); \ if (a >= BASE) a -= BASE; \ } while (0) +# define reduce(a) reduce_4(a) +# else +# define reduce_full(a) \ + do { \ + unsigned long b = a & 0x0000ffff; \ + a >>= 16; \ + b -= a; \ + a <<= 4; \ + a += b; \ + } while(a >= BASE) +# define reduce_4(a) \ + do { \ + unsigned long b = a & 0x0000ffff; \ + a >>= 16; \ + b -= a; \ + a <<= 4; \ + a += b; \ + a = a >= BASE ? a - BASE : a; \ + } while(0) +# define reduce(a) \ + do { \ + unsigned long b = a & 0x0000ffff; \ + a >>= 16; \ + b -= a; \ + a <<= 4; \ + a += b; \ + } while(0) +# endif #else -# define MOD(a) a %= BASE -# define MOD4(a) a %= BASE -#endif - -/* ========================================================================= */ -uLong ZEXPORT adler32(adler, buf, len) +# define reduce_full(a) a %= BASE +# define reduce_4(a) a %= BASE +# define reduce(a) a %= BASE +#endif + +#ifndef MIN_WORK +# define MIN_WORK 16 +#endif + +/* ========================================================================= */ +local noinline uLong adler32_1(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len GCC_ATTR_UNUSED_PARAM; +{ + unsigned long sum2; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + adler += buf[0]; + if (adler >= BASE) + adler -= BASE; + sum2 += adler; + if (sum2 >= BASE) + sum2 -= BASE; + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +local noinline uLong adler32_common(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + unsigned long sum2; + + /* split Adler-32 into component sums */ + sum2 = (adler >> 16) & 0xffff; + adler &= 0xffff; + + while (len--) { + adler += *buf++; + sum2 += adler; + } + if (adler >= BASE) + adler -= BASE; + reduce_4(sum2); /* only added so many BASE's */ + return adler | (sum2 << 16); +} + +/* ========================================================================= */ +#ifndef HAVE_ADLER32_VEC +local noinline uLong adler32_vec(adler, buf, len) uLong adler; const Bytef *buf; uInt len; @@ -69,33 +166,6 @@ sum2 = (adler >> 16) & 0xffff; adler &= 0xffff; - /* in case user likes doing a byte at a time, keep it fast */ - if (len == 1) { - adler += buf[0]; - if (adler >= BASE) - adler -= BASE; - sum2 += adler; - if (sum2 >= BASE) - sum2 -= BASE; - return adler | (sum2 << 16); - } - - /* initial Adler-32 value (deferred check for len == 1 speed) */ - if (buf == Z_NULL) - return 1L; - - /* in case short lengths are provided, keep it somewhat fast */ - if (len < 16) { - while (len--) { - adler += *buf++; - sum2 += adler; - } - if (adler >= BASE) - adler -= BASE; - MOD4(sum2); /* only added so many BASE's */ - return adler | (sum2 << 16); - } - /* do length NMAX blocks -- requires just one modulo operation */ while (len >= NMAX) { len -= NMAX; @@ -104,8 +174,8 @@ DO16(buf); /* 16 sums unrolled */ buf += 16; } while (--n); - MOD(adler); - MOD(sum2); + reduce_full(adler); + reduce_full(sum2); } /* do remaining bytes (less than NMAX, still just one modulo) */ @@ -119,13 +189,35 @@ adler += *buf++; sum2 += adler; } - MOD(adler); - MOD(sum2); + reduce_full(adler); + reduce_full(sum2); } /* return recombined sums */ return adler | (sum2 << 16); } +#endif + +/* ========================================================================= */ +uLong ZEXPORT adler32(adler, buf, len) + uLong adler; + const Bytef *buf; + uInt len; +{ + /* in case user likes doing a byte at a time, keep it fast */ + if (len == 1) + return adler32_1(adler, buf, len); /* should create a fast tailcall */ + + /* initial Adler-32 value (deferred check for len == 1 speed) */ + if (buf == Z_NULL) + return 1L; + + /* in case short lengths are provided, keep it somewhat fast */ + if (len < MIN_WORK) + return adler32_common(adler, buf, len); + + return adler32_vec(adler, buf, len); +} /* ========================================================================= */ local uLong adler32_combine_(adler1, adler2, len2) @@ -141,7 +233,7 @@ rem = (unsigned)(len2 % BASE); sum1 = adler1 & 0xffff; sum2 = rem * sum1; - MOD(sum2); + reduce_full(sum2); sum1 += (adler2 & 0xffff) + BASE - 1; sum2 += ((adler1 >> 16) & 0xffff) + ((adler2 >> 16) & 0xffff) + BASE - rem; if (sum1 >= BASE) sum1 -= BASE;