/* * translation of travelling_pictures_solver.py * * The path of the input file can be specified as command-line argument. * Output is sent to stdout. * * All error checks are done with assert() such that building with NDEBUG * yields the fastest unchecked version. Defining DEBUG as 1 or 2 enables, * on the other hand, some debug output on stderr. * * -- Rainer Zachmann */ //#define NDEBUG #include //#define DEBUG 1 //#define DEBUG 2 #include "hashset.h" #include #include #include #include #ifdef NDEBUG # define assert_const(eval,c) (eval) #else # define assert_const(eval,c) assert ((eval) == (c)) #endif size_t __attribute__((const)) hash (const char *str) /* D. Bernstein: hash[i] = 33 * hash[i-1] + str[i] */ { size_t hash = 0x1505; unsigned char c; while ((c = (unsigned char) *str++)) hash = (hash << 5) + hash + c; return hash; } static inline unsigned int metric (struct hashset *a, struct hashset *b); int main (int argc, char **argv) { const char *fn = "input_qualification_round_2019/a_example.txt"; if (argc > 1) fn = argv[1]; FILE *input = fopen (fn, "r"); assert (input); unsigned int n; assert_const (fscanf (input, "%u", &n), 1); #ifdef DEBUG fprintf (stderr, "Input %s: size = %u\n", fn, n); #endif struct hashset tags[n]; bool tagv[n]; /* parse input file */ unsigned int tagsum = 0; memset (tags, 0, n * sizeof *tags); for (unsigned int i = 0; i < n; i++) { char c; assert_const (fscanf (input, "\n%c", &c), 1); assert (c == 'H' || c == 'V'); tagv[i] = c == 'V'; unsigned int tagn; assert_const (fscanf (input, "%u", &tagn), 1); tagsum += tagn; for (unsigned int j = 0; j < tagn; j++) { char buf[16]; assert_const (fscanf (input, "%16s", buf), 1); hs_put (&tags[i], buf); } } fclose (input); #ifdef DEBUG fprintf (stderr, "Average tag length = %f\n", (double) tagsum / n); #endif #if DEBUG > 1 fputs ("Number of tags per picture:\n", stderr); for (unsigned int i = 0; i < n; i++) fprintf (stderr, "[%u] %c %zu\n", i, tagv[i] ? 'V' : 'H', tags[i].nmemb); #endif /* prune pics by average/3 minimum tags */ unsigned int thresh = tagsum / (3 * n); unsigned int redn = 0; /* reduced n */ #if DEBUG > 1 fprintf (stderr, "Consider only pictures with more than %u tags.\n", thresh); #endif /* combine vertical pics */ unsigned int vpair[n]; for (unsigned int i = 0; i < n; i++) { if (tagv[i] && tags[i].nmemb > thresh) { unsigned int j; for (j = i + 1; (!tagv[j] || tags[j].nmemb <= thresh) && j < n; j++); if (j == n) { # ifdef DEBUG fprintf (stderr, "V [ %u ] not paired\n", i); # endif tagv[i] = false; } else { # if DEBUG > 1 fprintf (stderr, "V [ %u | %u ]\n", i, j); # endif vpair[i] = j; for (size_t h = 0; h < tags[j].size; h++) for (struct node *ptr = tags[j].tab[h]; ptr; ptr = ptr->next) hs_put (&tags[i], ptr->key); tags[j].nmemb = 0; /* mark as used */ } i = j; } } #if DEBUG > 1 fputs ("After pair building:\n", stderr); for (unsigned int i = 0; i < n; i++) fprintf (stderr, "[%u] %c %zu\n", i, tagv[i] ? 'V' : 'H', tags[i].nmemb); #endif /* set up a graph structure in Yale format * * `grval' contains the matrix values in row-major order * `grcol' stores the corresponding column indices * `grrow' defines the first `grval' index of a matrix row * indices of `grrow' are shifted by one for `grrow[0]' would always be 0 * and the array would be one element longer => `grrow[i - 1]' if `i > 0' */ unsigned int grsiz = 16, grnum = 0, prevrow = 0; unsigned int *grval = malloc (grsiz * sizeof *grval); assert (grval); unsigned int *grcol = malloc (grsiz * sizeof *grcol); assert (grcol); unsigned int *grrow = malloc (grsiz * sizeof *grrow); assert (grrow); for (unsigned int y = 0; y < n; y++) { if (tags[y].nmemb > thresh) { redn++; for (unsigned int x = 0; x < n; x++) if (x != y && tags[x].nmemb > thresh) { unsigned int min; if (x > y) min = metric (&tags[y], &tags[x]); else { /* copy already computed value */ min = 0; for (unsigned int k = x ? grrow[x - 1] : 0; k < (y > prevrow ? grnum : grrow[x]); k++) if (grcol[k] == y) { min = grval[k]; break; } } if (min) { if (grnum == grsiz) { assert (grsiz <= UINT_MAX / 2); grsiz *= 2; grval = realloc (grval, grsiz * sizeof *grval); assert (grval); grcol = realloc (grcol, grsiz * sizeof *grcol); assert (grcol); grrow = realloc (grrow, grsiz * sizeof *grrow); assert (grrow); } grval[grnum] = min; grcol[grnum] = x; if (y > prevrow) { for (unsigned int i = prevrow; i < y; i++) grrow[i] = grnum; prevrow = y; } grnum++; } } } # ifdef DEBUG if (y == n / 4) fputs ("adjacency 25%\n", stderr); else if (y == n / 2) fputs ("adjacency 50%\n", stderr); else if (y == 3 * n / 4) fputs ("adjacency 75%\n", stderr); # endif } for (unsigned int i = prevrow; i < grnum; i++) grrow[i] = grnum; #ifdef DEBUG fprintf (stderr, "Reduced size = %u\n", redn); #endif #if DEBUG > 1 for (unsigned int i = 0; i < grnum; i++) { fprintf (stderr, "row %u", i); for (unsigned int j = i ? grrow[i - 1] : 0; j < grrow[i]; j++) fprintf (stderr, " col %u (%u)", grcol[j], grval[j]); fputc ('\n', stderr); } #endif /* solve TSP */ unsigned int path[redn]; { unsigned int cur; for (cur = 0; tags[cur].nmemb <= thresh; cur++); path[0] = cur; /* uses first instead of random */ for (unsigned int i = 1; i < redn; i++) { unsigned int max, grind; find_max: max = 0; for (unsigned int k = cur ? grrow[cur - 1] : 0; k < grrow[cur]; k++) if (grval[k] > max) { max = grval[k]; grind = k; } if (max) { for (unsigned int p = 0; p < i; p++) if (path[p] == grcol[grind]) { grval[grind] = 0; goto find_max; } cur = grcol[grind]; path[i] = grcol[grind]; } else { unsigned int best, k; for (best = 0, k = cur ? grrow[cur - 1] : 0; best < n; ) { if (tags[best].nmemb <= thresh) { best++; continue; } if (k < grrow[cur]) { if (grcol[k] == best) { best++; k++; continue; } else if (grcol[k] < best) { k++; continue; } } for (unsigned int p = 0; p < i; p++) if (path[p] == best) { best++; goto next_k; } goto found_best; next_k:; } assert (0); found_best: cur = best; path[i] = best; } # ifdef DEBUG if (i == redn / 4) fputs ("TSP solving 25%\n", stderr); else if (i == redn / 2) fputs ("TSP solving 50%\n", stderr); else if (i == 3 * redn / 4) fputs ("TSP solving 75%\n", stderr); # endif } } /* translate the path into output format */ #ifdef DEBUG fputs ("Program output on stdout\n", stderr); #endif printf ("%u\n", redn); for (unsigned int i = 0; i < redn; i++) if (tagv[path[i]]) printf ("%u %u\n", path[i], vpair[path[i]]); else printf ("%u\n", path[i]); #ifdef DEBUG unsigned int score = 0; for (unsigned int i = 0; i < redn - 1; i++) score += metric (&tags[path[i]], &tags[path[i + 1]]); fprintf (stderr, "Score = %u\n", score); #endif /* free */ free (grval); free (grcol); free (grrow); for (unsigned int i = 0; i < n; i++) hs_free (&tags[i]); } static inline unsigned int __attribute__((pure)) metric (struct hashset *a, struct hashset *b) { unsigned int sim = 0, min, tmp; for (size_t h = 0; h < b->size; h++) for (struct node *ptr = b->tab[h]; ptr; ptr = ptr->next) if (hs_has (a, ptr->key)) sim++; min = sim; tmp = (unsigned int) a->nmemb - sim; if (tmp < min) min = tmp; tmp = (unsigned int) b->nmemb - sim; if (tmp < min) min = tmp; return min; }