/* This is a version of generate specialized for ordinary life (i.e., the 23/3 rule). It uses an idea from: "Smalltalk-80, the Language and its Implementation". It adds the cells and its neighbours in parallel (32 at a time), representing the sum in several words, where every word contains one bit of the 32 sums. This addition is performed using bitwise operations (&,|,^,~). The book proposed using bitblt operations on a direct representation. This limits the size and is slow. I implemented this on a C64 (1MHz 6502) and got 1/2 generation/s for 256*200 cells. Fortunately, this idea can also be applied to the data structure of xlife. However, for the highest speed we need to specialize it for every rule. For not implemented rules one can fall back on xlifes original algorithm, which is quite fast in itself. - anton anton@mips.complang.tuwien.ac.at */ #define NDEBUG /* turns off assertions. also turns off visual pollution caused by premature killing */ #include #include "defs.h" #include "cellbox.h" void generate_23_3() { cellbox *cptr,*tptr,*cptrup,*cptrdn,*cptrlf,*cptrrt; #ifdef PROF static int count_pass2=0; static int count_half1=0; static int count_half2=0; static int count_up=0; static int count_lf=0; static int count_rt=0; static int count_dn=0; #endif PROF #ifdef PROF link_called = link_search = 0; create_called = create_null = 0; #endif PROF generations++; numcells=0; for (cptr = head; cptr!=NULL; cptr = cptr->next) { /* first pass: just copy to olive and create neighbours, if necessary */ u_long live1 = cptr->live1; u_long live2 = cptr->live2; cptr->olive1 = live1; cptr->olive2 = live2; cptrup=cptr->up; cptrdn=cptr->dn; cptrlf=cptr->lf; cptrrt=cptr->rt; if (cptrup==NULL && (live1&0xff)!=0) { cptrup=cptr->up=link1(cptr->x,cptr->y - 8); cptrup->dn=cptr; } if (cptrdn==NULL && (live2&0xff000000)!=0) { cptrdn=cptr->dn=link1(cptr->x,cptr->y + 8); cptrdn->up=cptr; } if ((live1&0x01010101)!=0) { if (cptrlf==NULL) { cptrlf=cptr->lf=link1(cptr->x - 8,cptr->y); cptrlf->rt=cptr; } if ((live1&0x00000001)!=0 && cptrlf->up==NULL) { cellbox *cptrul=link1(cptr->x - 8,cptr->y - 8); cptrlf->up=cptrul; cptrul->dn=cptrlf; cptrup->lf=cptrul; cptrul->rt=cptrup; } } if ((live2&0x01010101)!=0) { if (cptrlf==NULL) { cptrlf=cptr->lf=link1(cptr->x - 8,cptr->y); cptrlf->rt=cptr; } if ((live2&0x01000000)!=0 && cptrlf->dn==NULL){ cellbox *cptrdl=link1(cptr->x - 8,cptr->y + 8); cptrlf->dn=cptrdl; cptrdl->up=cptrlf; cptrdn->lf=cptrdl; cptrdl->rt=cptrdn; } } if ((live1&0x80808080)!=0) { if(cptrrt == NULL){ cptrrt=cptr->rt=link1(cptr->x + 8,cptr->y); cptrrt->lf=cptr; } if ((live1&0x00000080)!=0 && cptrrt->up==NULL){ cellbox *cptrur=link1(cptr->x + 8,cptr->y - 8); cptrrt->up=cptrur; cptrur->dn=cptrrt; cptrup->rt=cptrur; cptrur->lf=cptrup; } } if ((live2&0x80808080)!=0) { if(cptrrt == NULL){ cptrrt=cptr->rt=link1(cptr->x + 8,cptr->y); cptrrt->lf=cptr; } if((live2&0x80000000)!=0 && cptrrt->dn==NULL) { cellbox *cptrdr=link1(cptr->x + 8,cptr->y + 8); cptrrt->dn=cptrdr; cptrdr->up=cptrrt; cptrdn->rt=cptrdr; cptrdr->lf=cptrdn; } } } for (cptr = head; cptr!=NULL; cptr=cptr->next) { /* the two half-blocks and their neighbouring half-blocks: */ u_long live1, live2, ul2, u2, ur2, l1, r1, l2, r2, dl1, d1, dr1; /* the cell-neighbours of the current half-block. I.e., the first bit of uln is the upper left neighbour cell of the first bit of the present half-block */ u_long upn, dnn, lfn, rtn, uln, urn, dln, drn; /* the bits of the sum of the neighbours; bits 2 and 3 are ored, since this is all we need for normal life */ u_long bit0, bit1, bit23; /* second pass: now the real fun starts */ /* these assertions just check that the first pass did its job we rely on that in this pass. */ assert(cptr->up!=NULL || (link1(cptr->x,cptr->y - 8)->olive2 & 0xff000000)==0); assert(cptr->dn!=NULL || (link1(cptr->x,cptr->y + 8)->olive1 & 0x000000ff)==0); assert(cptr->lf!=NULL || ((link1(cptr->x - 8,cptr->y)->olive1 & 0x80808080)==0 && (link1(cptr->x - 8,cptr->y)->olive2 & 0x80808080)==0)); assert(cptr->rt!=NULL || ((link1(cptr->x + 8,cptr->y)->olive1 & 0x01010101)==0 && (link1(cptr->x + 8,cptr->y)->olive2 & 0x01010101)==0)); assert((cptr->up!=NULL && cptr->up->lf!=NULL) || (link1(cptr->x-8,cptr->y-8)->olive2 & 0x80000000)==0); assert((cptr->up!=NULL && cptr->up->rt!=NULL) || (link1(cptr->x+8,cptr->y-8)->olive2 & 0x01000000)==0); assert((cptr->dn!=NULL && cptr->dn->lf!=NULL) || (link1(cptr->x-8,cptr->y+8)->olive1 & 0x00000080)==0); assert((cptr->dn!=NULL && cptr->dn->rt!=NULL) || (link1(cptr->x+8,cptr->y+8)->olive1 & 0x00000001)==0); /* some statistical information from runs using breeder, primes, oscspn with maxdead=2: count_up/dn/lf/rt etc. is pretty stable at 60-64% of pass2 each. And half1 and half2 are at 74-79% */ /* compute live1 and live2 of the neighbours; mask everything away that is not needed for computing the current block; maybe even do a shift that may be needed later */ /* the following code is a bit convoluted to help the register allocator */ #ifdef PROF count_pass2++; #endif PROF live1=cptr->olive1; live2=cptr->olive2; if (cptr->lf!=NULL) { #ifdef PROF count_lf++; #endif PROF l1=cptr->lf->olive1 & 0x80808080; l2=cptr->lf->olive2 & 0x80808080; } else { l1=0; l2=0; } if (cptr->rt!=NULL) { #ifdef PROF count_rt++; #endif PROF r1=cptr->rt->olive1 & 0x01010101; r2=cptr->rt->olive2 & 0x01010101; } else { r1=0; r2=0; } /* compute neighbours of live1 */ /* if everything is 0, no need to compute. The corners need not be checked, because they not make a difference if they are the only live thing. This check is only profitable if there is a significant number of useless halfblocks. */ if (live1!=0 || (live2&0xff)!=0 || l1!=0 || r1!=0 || (cptr->up!=NULL && (cptr->up->olive2&0xff000000)!=0)) { #ifdef PROF count_half1++; #endif PROF if (cptr->up!=NULL) { #ifdef PROF count_up++; #endif PROF u2=cptr->up->olive2; ul2= cptr->up->lf!=NULL ? cptr->up->lf->olive2>>31 : 0; ur2= cptr->up->rt!=NULL ? (cptr->up->rt->olive2&0x01000000)>>17 : 0; } else { u2=0; /* the following is not quite obvious. But if the upper left block had its lower right cell set, there would be an up block (also expressed in the assertion for cptr->up->lf). Conversely, since there is no up block, there cannot be a cell set there. */ ul2=0; ur2=0; } /* compute the sum in lockstep with the neighbouring cells. bit23 represents an or of bit 2 and 3 of the sum. For normal life we don't need to know more than that one of these bits is set */ lfn=(l1>>7) | ((live1&0x7f7f7f7f)<<1); uln= ul2 | ((u2&0x7f7f7f7f)>>23) | (lfn<<8); bit1=uln&lfn; bit0=uln^lfn; dln=(lfn>>8) | (l2<<17) | (live2<<25); bit1^=bit0&dln; bit0^=dln; rtn=(r1<<7) | ((live1&0xfefefefe)>>1); bit23=bit1&(bit0&rtn); bit1^=bit0&rtn; bit0^=rtn; urn= ur2 | (u2>>25) | (rtn<<8); bit23|=bit1&(bit0&urn); bit1^=bit0&urn; bit0^=urn; drn=(rtn>>8) | ((live2&0xfefefefe)<<23) | (r2<<31); bit23|=bit1&(bit0&drn); bit1^=bit0&drn; bit0^=drn; upn=(live1<<8)|(u2>>24); bit23|=bit1&(bit0&upn); bit1^=bit0&upn; bit0^=upn; dnn=(live1>>8)|(live2<<24); bit23|=bit1&(bit0&dnn); bit1^=bit0&dnn; bit0^=dnn; /* now compute the new live cells. This is a little tricky, but you see that it works when you build the truth table. */ cptr->live1 = (~bit23)&bit1&(live1|bit0); /* we can look at the whole thing as a boolean function with 9 arguments. Currently it is evaluated using 13 xors, 1 not, 5 ors and (after common subexpression elimination) 13 ands. Can it be done cheaper? */ } /* this is analogous to the first half-box */ if (live2!=0 || (live1>>24)!=0 || l2!=0 || r2!=0 || (cptr->dn!=NULL && (cptr->dn->olive1<<24)!=0)) { #ifdef PROF count_half2++; #endif PROF if (cptr->dn!=NULL) { #ifdef PROF count_dn++; #endif PROF d1=cptr->dn->olive1; dl1=cptr->dn->lf!=NULL ? (cptr->dn->lf->olive1&0x80808080)<<17 : 0; dr1=cptr->dn->rt!=NULL ? cptr->dn->rt->olive1<<31 : 0; } else { d1=0; dl1=0; dr1=0; } lfn=(l2>>7) | ((live2&0x7f7f7f7f)<<1); dln=(lfn>>8) | dl1 | (d1<<25); bit1=dln&lfn; bit0=dln^lfn; uln=(l1>>31) | ((live1&0x7f7f7f7f)>>23) | (lfn<<8); bit1^=bit0&uln; bit0^=uln; rtn=(r2<<7) | ((live2&0xfefefefe)>>1); bit23=bit1&(bit0&rtn); bit1^=bit0&rtn; bit0^=rtn; drn=(rtn>>8) | ((d1&0xfefefefe)<<23) | dr1; bit23|=bit1&(bit0&drn); bit1^=bit0&drn; bit0^=drn; urn=(r1>>17) | (live1>>25) | (rtn<<8); bit23|=bit1&(bit0&urn); bit1^=bit0&urn; bit0^=urn; dnn=(live2>>8)|(d1<<24); bit23|=bit1&(bit0&dnn); bit1^=bit0&dnn; bit0^=dnn; upn=(live2<<8)|(live1>>24); bit23|=bit1&(bit0&upn); bit1^=bit0&upn; bit0^=upn; cptr->live2 = (~bit23)&bit1&(live2|bit0); } if(dispcells){ u_long bits; bits=cptr->live1; bits = (bits & 0x55555555) + ((bits>>1)&0x55555555); bits = (bits & 0x33333333) + ((bits>>2)&0x33333333); bits = (bits & 0x0f0f0f0f) + ((bits>>4)&0x0f0f0f0f); bits = (bits & 0x00ff00ff) + ((bits>>8)&0x00ff00ff); bits = (bits & 0x0000ffff) + (bits>>16); numcells+=bits; bits=cptr->live2; bits = (bits & 0x55555555) + ((bits>>1)&0x55555555); bits = (bits & 0x33333333) + ((bits>>2)&0x33333333); bits = (bits & 0x0f0f0f0f) + ((bits>>4)&0x0f0f0f0f); bits = (bits & 0x00ff00ff) + ((bits>>8)&0x00ff00ff); bits = (bits & 0x0000ffff) + (bits>>16); numcells+=bits; } } /* we cannot do this in the second pass because the kills would destroy the assertions. however, we need not do it every generation (at least the killing) */ for (cptr = head; cptr!=NULL;) { if(cptr->live1 || cptr->live2){ cptr->dead=0; cptr=cptr->next; } else { #ifdef NDEBUG cptr->dead++; if(cptr->dead > maxdead){ tptr=cptr->next; kill(cptr); cptr=tptr; } else{ cptr=cptr->next; } #else /* kill everything dead, otherwise the number of blocks explodes due to the assertions */ tptr=cptr->next; kill(cptr); cptr=tptr; #endif } } #ifdef PROF printf("num=%d ",numboxes); if(link_called){ printf("l_c=%d ",link_called); if(link_search){ printf(" l_ave=%f ",link_search/(float)link_called); } } if(create_called){ printf("c_c=%d ",create_called); if(create_null){ printf(" c_ave=%f ",create_null/(float)create_called); } } printf("pass2: %d, half1: %d, half2: %d, up:%d dn:%d lf:%d rt:%d",count_pass2, count_half1, count_half2, count_up, count_dn, count_lf, count_rt); printf("\n"); #endif PROF }