=== modified file 'deflate.c' --- deflate.c 2011-03-14 01:01:37 +0000 +++ deflate.c 2011-05-08 14:30:48 +0000 @@ -1060,6 +1060,54 @@ /* For 80x86 and 680x0, an optimized version will be provided in match.asm or * match.S. The code will be functionally equivalent. */ +#if defined(__x86_64__) && defined(__GNUC__) +# define LM_LEN_REMAINDER (scan < strend-16) +# define LM_INNER_CHECK \ + unsigned int t; \ + asm ("movdqu (%0), %%xmm0\n\t" \ + "movdqu (%1), %%xmm1\n\t" \ + "pcmpeqb %%xmm0, %%xmm1\n\t" \ + "pmovmskb %%xmm1, %2\n\t" \ + "xor $0x0000ffff, %2\n\t" \ + "jnz 2f\n" \ + "1:\n\t" \ + "add $16, %0\n\t" \ + "add $16, %1\n\t" \ + "cmp %3, %0\n\t" \ + "ja 2f\n\t" \ + "movdqu (%0), %%xmm0\n\t" \ + "movdqu (%1), %%xmm1\n\t" \ + "pcmpeqb %%xmm0, %%xmm1\n\t" \ + "pmovmskb %%xmm1, %2\n\t" \ + "xor $0x0000ffff, %2\n\t" \ + "jz 1b\n\t" \ + "2:" \ + : /* %0 */ "=r" (scan), \ + /* %1 */ "=r" (match), \ + /* %2 */ "=&r" (t) \ + : /* %3 */ "r" (strend-16), \ + "0" (scan), \ + "1" (match) \ + ); \ + if (t) { \ + asm ("bsf %1, %0" : "=r" (t) : "r" (t)); \ + scan += t; \ + match += t; \ + } else if (strend - scan) { \ + if (strend - scan >= 8) \ + goto CHECK_REST; \ + else do { \ + if (*++scan == *++match) \ + break; \ + } while (scan < strend); \ + } +#endif + +#ifndef LM_LEN_REMAINDER +# define LM_LEN_REMAINDER 0 +# define LM_INNER_CHECK goto CHECK_REST; +#endif + local uInt longest_match(s, cur_match) deflate_state *s; IPos cur_match; /* current match */ @@ -1168,15 +1216,20 @@ scan += 2, match++; Assert(*scan == *match, "match[2]?"); - /* We check for insufficient lookahead only every 8th comparison; - * the 256th check will be made at strstart+258. - */ - do { - } while (*++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - *++scan == *++match && *++scan == *++match && - scan < strend); + if (LM_LEN_REMAINDER) { + LM_INNER_CHECK + } else { +CHECK_REST: + /* We check for insufficient lookahead only every 8th comparison; + * the 256th check will be made at strstart+258. + */ + do { + } while (*++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + *++scan == *++match && *++scan == *++match && + scan < strend); + } Assert(scan <= s->window+(unsigned)(s->window_size-1), "wild scan");