#include #include #include #include #include #include #include #include #include /* gcc specific */ typedef unsigned int uint128_t __attribute__((__mode__(TI))); #define hashmult 13493690561280548289ULL /* #define hashmult 2654435761 */ struct block { char *addr; size_t len; }; struct block slurp(char *filename) { int fd=open(filename,O_RDONLY); char *s; struct stat statbuf; struct block r; if (fd==-1) err(1, "%s", filename); if (fstat(fd, &statbuf)==-1) err(1, "%s", filename); /* allocates an extra 7 bytes to allow full-word access to the last byte */ s = mmap(NULL,statbuf.st_size+7, PROT_READ|PROT_WRITE,MAP_FILE|MAP_PRIVATE,fd, 0); if (s==MAP_FAILED) err(1, "%s", filename); r.addr = s; r.len = statbuf.st_size; return r; } #define HASHSIZE (1<<21) struct hashnode { int value; char keylen; char key[59]; }; struct hashnode ht[HASHSIZE+10]; unsigned long hash(char *addr, size_t len) { /* assumptions: 1) unaligned accesses work 2) little-endian 3) 7 bytes beyond last byte can be accessed */ uint128_t x=0, w; size_t i, shift; for (i=0; i+7>64); } void insert(char *keyaddr, size_t keylen, int value) { struct hashnode *l=&ht[hash(keyaddr, keylen) & (HASHSIZE-1)]; while (l->keylen != 0) l++; l->value = value; l->keylen = keylen+1; memcpy(l->key, keyaddr, keylen); } int lookup(char *keyaddr, size_t keylen) { struct hashnode *l=&ht[hash(keyaddr, keylen) & (HASHSIZE-1)]; while (l->keylen !=0) { if (keylen == l->keylen-1 && memcmp(keyaddr, l->key, keylen)==0) { /* printf("%d\n",l->value);*/ return l->value; } l++; } return -1; } /* int main() { char a[40]="abcdefghijklmnopqrstuvwxyz1234567890"; char b[40]="abcdefghijklmnopqrstuvwxyz1234567890"; int i; for (i=32; i>=0; i--) { a[i] = '$'; b[i] = '%'; printf("%ld,%ld\n",hash(a,i),hash(b,i)); if (hash(a,i)!=hash(b,i)) { fprintf(stderr, "hash error\n"); exit(1); } } return 0; } */ int main(int argc, char *argv[]) { struct block input1, input2; char *p, *nextp, *endp; unsigned int i; unsigned long r=0; if (argc!=3) { fprintf(stderr, "usage: %s \n", argv[0]); exit(1); } input1 = slurp(argv[1]); input2 = slurp(argv[2]); for (p=input1.addr, endp=input1.addr+input1.len, i=0; pnext) count++; sum += count; sumsq += count*count; } printf("sum=%ld, sumsq=%ld, hashlen=%ld, chisq=%f\n", sum, sumsq, HASHSIZE, ((double)sumsq)*HASHSIZE/sum-sum); /* expected value for chisq is ~HASHSIZE */ #else for (i=0; i<10; i++) { for (p=input2.addr, endp=input2.addr+input2.len; p>32); p = nextp+1; } } printf("%ld, ht=%p\n",r,ht); #endif return 0; }