/* a very simplified version of a Forth text interpreter Input: A sequence of the following: 1) '\n' (line feed) followed by a character identifying a wordlist followed by a name: define the name in the wordlist 2) '\t' (tab) followed by a sequence of characters: set the search order; the bottom of the search order is first, the top last 3) ' ' (space) followed by a name: look up the name in the search order; there may be names that are not in the search order. Names do not contain characters <= ' ', and these characters are also not used for identifying wordlists. To verify that these things work, every defined word gets a serial number (starting with 1) and a hash is computed across all found words */ //unsigned int const hashSize = 6553600; static unsigned int const hashSize = 8192; #include #include #include #include #include #include #include #include #include // // Programm // void initTables(); inline void setHashed(size_t tableIndex, char *key, int len, unsigned long value); inline unsigned long getHashed(size_t tableIndex, char* key, size_t len); /* the search order is a sequence of characters, starting with the bottom, represented with the pointer and length here */ unsigned char *order; size_t order_len; /* insert name starting at s+1 (and ending at the next char <=' ') into the wordlist given at *s, associate serialno with it; return the first character after the name */ inline unsigned char *create(unsigned char *s, unsigned long serialno) { unsigned char w=*s++; // count key length size_t i; for (i=0; s[i]>' '; i++) ; setHashed(w, s, i, serialno); return s+i; } /* set the search order to the one specified by starting at s, and ending at the first character <=' '; return the first character after the search order */ inline unsigned char *set_order(unsigned char *s) { order = s; for (order_len=0; s[order_len]>' '; order_len++) ; // printf("%s; ", strndup(order, order_len)); return order+order_len; } /* look up the name starting at s and ending at the next char <=' ' in the search order, storing the serialno of the word in foundp if successful, otherwise 0; return the first character after the name */ inline unsigned char *find(unsigned char *s, unsigned long *foundp) { size_t i; signed long j; for (i=0; s[i]>' '; i++) // Count key lenght ; for (j=order_len-1; j>=0; j--) { if(j == (order_len-1) || order[j] != order[j+1]) { *foundp = getHashed(order[j], s, i); if (*foundp != 0) return s+i; } } *foundp = 0; return s+i; } static unsigned const long k0=0xb64d532aaaaaaad5; unsigned long serialno = 1; unsigned long hashValue = 0; /* process the input starting at s and ending at the first '\0' */ unsigned long process(unsigned char *s) { unsigned long found; while (1) { switch (*s++) { case '\0': return hashValue; case '\n': s=create(s,serialno++); break; case '\t': s=set_order(s); break; case ' ' : { /* unsigned char *s1=s; */ s=find(s,&found); /* fwrite(s1,1,s-s1,stdout); printf(" = %ld\n",found);} */ if (found!=0) { hashValue=(hashValue^found)*k0; hashValue^= (hashValue>>41); } } break; default: fprintf(stderr,"invalid input"); exit(1); } } return 0; } int main(int argc, char* argv[]) { int fd; struct stat buf; unsigned char *s; initTables(); if (argc!=2) { fprintf(stderr,"Usage: %s \n",argv[0]); exit(1); } fd=open(argv[1], O_RDONLY); if (fd==-1) { perror(argv[1]); exit(1); } if (fstat(fd, &buf) == -1) { perror(argv[1]); exit(1); } s = mmap(NULL, buf.st_size+1, PROT_READ|PROT_WRITE, MAP_PRIVATE, fd, 0); s[buf.st_size] = '\0'; printf("%lx\n",process(s)); return 0; } // // Murmurhash2 // #define ROT32(x, y) ((x << y) | (x >> (32 - y))) // avoid effort inline uint32_t murmur3_32(const char *key, uint32_t len, uint32_t seed) { static const uint32_t c1 = 0xcc9e2d51; static const uint32_t c2 = 0x1b873593; static const uint32_t r1 = 15; static const uint32_t r2 = 13; static const uint32_t m = 5; static const uint32_t n = 0xe6546b64; uint32_t hash = seed; const int nblocks = len / 4; const uint32_t *blocks = (const uint32_t *) key; int i; uint32_t k; for (i = 0; i < nblocks; i++) { k = blocks[i]; k *= c1; k = ROT32(k, r1); k *= c2; hash ^= k; hash = ROT32(hash, r2) * m + n; } const uint8_t *tail = (const uint8_t *) (key + nblocks * 4); uint32_t k1 = 0; switch (len & 3) { case 3: k1 ^= tail[2] << 16; case 2: k1 ^= tail[1] << 8; case 1: k1 ^= tail[0]; k1 *= c1; k1 = ROT32(k1, r1); k1 *= c2; hash ^= k1; } hash ^= len; hash ^= (hash >> 16); hash *= 0x85ebca6b; hash ^= (hash >> 13); hash *= 0xc2b2ae35; hash ^= (hash >> 16); return hash; } /* Hash a string for a particular hash table. */ inline unsigned long int hash(char *key, int len ) { return murmur3_32(key, len, len) % hashSize; } // // New // struct entry_hh { char *key; unsigned long value; size_t len; struct entry_hh *next; }; typedef struct entry_hh entry_ht; entry_ht** tables[256] = {0}; //int* iitables[256]; //int iTables[256]; void initTables() { // size_t i; // for(i = 0; i < 256; i++) // iTables[i] = 0; } inline entry_ht** createTable(size_t index) { if(tables[index] != NULL) return tables[index]; // if(iTables[index] == 1) // return tables[index]; // iTables[index] = 1; // printf("Create table: %d\n", index); tables[index] = malloc(hashSize * sizeof(entry_ht*)); //iitables[index] = malloc(hashSize * sizeof(int*)); //memset(iitables[index], 0, hashSize); memset(tables[index], 0, hashSize * sizeof(entry_ht*)); return tables[index]; } // http://mgronhol.github.io/fast-strcmp/ inline int fast_compare( const char *ptr0, const char *ptr1, int len ){ int fast = len/sizeof(size_t) + 1; int offset = (fast-1)*sizeof(size_t); int current_block = 0; if( len <= sizeof(size_t)){ fast = 0; } size_t *lptr0 = (size_t*)ptr0; size_t *lptr1 = (size_t*)ptr1; while( current_block < fast ){ if( (lptr0[current_block] ^ lptr1[current_block] )){ int pos; for(pos = current_block*sizeof(size_t); pos < len ; ++pos ){ if( (ptr0[pos] ^ ptr1[pos]) || (ptr0[pos] == 0) || (ptr1[pos] == 0) ){ return 1; } } } ++current_block; } while( len > offset ){ if( (ptr0[offset] ^ ptr1[offset] )){ return 1; } ++offset; } return 0; } inline void setHashed(size_t tableIndex, char *key, int len, unsigned long value) { createTable(tableIndex); unsigned long int h = hash(key, len); //if(iitables[tableIndex][h] == 0) if(tables[tableIndex][h] == NULL) { //iitables[tableIndex][h] = 1; tables[tableIndex][h] = malloc(sizeof(entry_ht)); tables[tableIndex][h]->key = key;//strndup(key, len); tables[tableIndex][h]->value = value; tables[tableIndex][h]->len = len; tables[tableIndex][h]->next = NULL; return; } entry_ht* current = tables[tableIndex][h]; entry_ht* next = tables[tableIndex][h]; // char *newKey = strndup(key, len); do { // if(len == next->len) { // if(newKey == NULL) // newKey = strndup(key, len); if(len == next->len) if(fast_compare(key, next->key, len) == 0) // if(strcmp(newKey, next->key) == 0) { next->value = value; return; } } current = next; next = next->next; //if(next != NULL) // printf("Collision %s %s\n", strndup(current->key,5), strndup(newKey,5)); } while(next != NULL); // if(newKey == NULL) // newKey = strndup(key, len); current->next = malloc(sizeof(entry_ht)); current->next->value = value; current->next->key = key; current->next->len = len; current->next->next = NULL; } inline unsigned long getHashed(size_t tableIndex, char* key, size_t len) { createTable(tableIndex); unsigned long int h = hash(key, len); // if(iitables[tableIndex][h] == 0) if(tables[tableIndex][h] == NULL) return 0; entry_ht* next = tables[tableIndex][h]; // char *newKey = strndup(key, len); do { //if(len == next->len) { //if(newKey == NULL) // newKey = strndup(key, len); //if(strcmp(newKey, next->key) == 0) if(len == next->len) if(fast_compare(key, next->key, len) == 0) return next->value; } next = next->next; } while(next != NULL); return 0; }