By Manuel Wiesinger (0825632) and Florian Schweikert (0825224)
| tsc | branch mis. | L2 accesses | u+sys instr | u cycles | sys cycles | |
|---|---|---|---|---|---|---|
| Original | 5959477640 | 8715955 | 31014072 | 2208801396 | 5396134850 | 563609274 |
| Code Motion Out of Loops (cube-only) | 5942767490 | 8714386 | 30964939 | 2208228686 | 5383009066 | 560006140 |
| Precompute Logical Functions | 5966822300 | 8718101 | 31061537 | 2208721466 | 5401281670 | 565803216 |
| Short circuiting Monotone Functions | 5948175840 | 8714332 | 30991731 | 2207619456 | 5388212693 | 560236818 |
| Code Motion Out of Loops (malloc) | 4022532060 | 5714512 | 33334407 | 718810209 | 3569633339 | 453076908 |
| Defer init | 4029017660 | 5716107 | 33383267 | 719205239 | 3575807222 | 453386998 |
| cube to makro | 3751759080 | 4943210 | 33293716 | 680737223 | 3299406269 | 452554596 |
| hash to makro | 3705133350 | 4972240 | 33270328 | 568827250 | 3254863619 | 450430868 |
In detail
68 for (i=0; cube(i)<=n; i++)
69 for (j=i+1; cube(i)+cube(j)<=n; j++) {
70 long sum = cube(i)+cube(j);
71 struct node **sumnodepp = lookup(sum,table,table_size);
72 if (*sumnodepp != NULL) {
73 (*sumnodepp)->count++;
74 if ((*sumnodepp)->count==2) /* don't count additional hits */
75 count++;
76 } else {
77 struct node *new = malloc(sizeof(struct node));
78 new->next = NULL;
79 new->value = sum;
80 new->count = 1;
81 *sumnodepp = new;
82 occupation++;
83 }
84 }
70 for (i=0; cubei<=n; i++) {
71 cubei = cube(i);
72 cubej = cube(i+1);
73
74 sum = cubei + cubej;
75 for (j=i+1; sum<=n; j++) {
76 struct node **sumnodepp = lookup(sum,table,table_size);
77 if (*sumnodepp != NULL) {
78 (*sumnodepp)->count++;
79 if ((*sumnodepp)->count==2) /* don't count additional hits */
80 count++;
81 } else {
82 struct node *new = malloc(sizeof(struct node));
83 new->next = NULL;
84 new->value = sum;
85 new->count = 1;
86 *sumnodepp = new;
87 occupation++;
88 }
89 cubej = cube(j+1);
90 sum = cubei + cubej;
91 }
92 }
15,16d14 < #define factor (2.0/(3.0*log(2.0))) < 32c30 < return 1<<(long)(log((double)n)* factor); --- > return 1<<(long)(log((double)n)*(2.0/(3.0*log(2.0)))); 61,62d58
We analised the inner loop, it is called 1918 times unnecessarly by the outer loop. (for n= 10^11)
72 for (i=0; cubei<=n; i++) {
73 cubei = cube(i);
74 cubej = cube(i+1);
75
76 sum = cubei + cubej;
77 if (sum>n)
78 break;
79
80 for (j=i+1; sum<=n; j++) {
81 struct node **sumnodepp = lookup(sum,table,table_size);
82 if (*sumnodepp != NULL) {
83 (*sumnodepp)->count++;
84 if ((*sumnodepp)->count==2) /* don't count additional hits */
85 count++;
86 } else {
87 struct node *new = malloc(sizeof(struct node));
88 new->next = NULL;
89 new->value = sum;
90 new->count = 1;
91 *sumnodepp = new;
92 occupation++;
93 }
94 cubej = cube(j+1);
95 sum = cubei + cubej;
96 }
97 }
72 int c = 0;
73 for(i=0;cubei <= n; i++) {
74 cubei = cube(i);
75 cubej = cube(i+1);
76 sum = cubei + cubej;
77 if (sum>n)
78 break;
79 for(j=i+1; sum <= n; j++ ) {
80 c++;
81
82 cubej = cube(j+1);
83 sum = cubei + cubej;
84 }
85 }
86
87 struct node *new = malloc(c*sizeof(struct node));
88
89 cubei = 0;
90 cubej = 0;
91 sum = 0;
92
93 for (i=0; cubei<=n; i++) {
94 cubei = cube(i);
95 cubej = cube(i+1);
96
97 sum = cubei + cubej;
98 if (sum>n)
99 break;
100
101 for (j=i+1; sum<=n; j++) {
102 struct node **sumnodepp = lookup(sum,table,table_size);
103 if (*sumnodepp != NULL) {
104 (*sumnodepp)->count++;
105 if ((*sumnodepp)->count==2) /* don't count additional hits */
106 count++;
107 } else {
108 // struct node *new = malloc(sizeof(struct node));
109 new++;
110 new->next = NULL;
111 new->value = sum;
112 new->count = 1;
113 *sumnodepp = new;
114 occupation++;
115 }
116 cubej = cube(j+1);
117 sum = cubei + cubej;
118 }
119 }
-32.5 % compared to the original
-37.8 % compared to the original