/* This program checks for duplicate inputs (patterns) 
 *
 * coded by David Calinski
 * 
 * you can use it freely on GNU General Public License
 *
 * */

#include "dchecker.h"


void panic (char*, ...);


void comparator (char *p0, char *p1) { /* return 1 if they are the same; */
	char *p0end, *p1end;
	char temp[512];
	int p0len, p1len;

	if (!p0 || !p1) return;

	if (!(p0end = strstr (p0, "<$="))) return;

	if (!(p1end = strstr (p1, "<$="))) return;

	p0len = p0end - p0;
	p1len = p1end - p1;
	
	if (p0len !=  p1len) return;
	
	if (!strncmp (p0, p1, p0len)) {
		strncpy (temp,p0, 512);
		temp[511]= '\0';
		if (strstr (temp, "<$=")) *strstr (temp, "<$=") = '\0';
		if (strchr (temp, '&')) *strchr (temp, '&') = ' ';
		fprintf (stderr, "\b*Warning*: duplicate pattern found: %s\n", temp);
	}
	return;
}

/* before calling to show_that_i_work():
 * remember you should have already one spare character on stdout
 */
static void show_that_i_work (void) {
	static char i = 0;

	i++;
	putchar ('\b');
	switch (i % 4) {
		case 0:
			putchar ('|');
			break;
		case 1:
			putchar ('/');
			break;
		case 2:
			putchar ('-');
			break;
		case 3:
			putchar ('\\');
			break;
	}
	fflush (stdout);
	return;
}

/* Very distant patterns will never be the same,
 * as patterns are generally sorted by length, or first character.
 * So what we check for duplicates only patterns that are relatively close to each other.
 */
#define MAX_FOLLOWING_PATTERNS 512
#define MAX_PATTERN_LEN 128

void duplicate_checker (FILE *f, int binary) { /* binary is used only in inputs_d_f */
	char *act, *words[MAX_FOLLOWING_PATTERNS];
	register int i;
	int j, k, act_t, z;
	int t[MAX_FOLLOWING_PATTERNS]; /* topic numbers */

	if (!f) 
		panic ("DEBUG ERROR: duplicate_checker() got closed file"); 
	puts ("Working...");
	
	words[0] = (char*) malloc (sizeof(char) * MAX_PATTERN_LEN * MAX_FOLLOWING_PATTERNS);
	assert (words[0]);

	for (i = 1; i < MAX_FOLLOWING_PATTERNS; i++)
		words[i] = words[0] + MAX_PATTERN_LEN * i;
	
	for (i = 0; i < MAX_FOLLOWING_PATTERNS; i++)
		memset (words[i], 0, MAX_PATTERN_LEN);

	act = (char*) malloc (sizeof(char) * MAX_PATTERN_LEN);
	assert (act);
	memset (act, 0, MAX_PATTERN_LEN);
	
	i = 0;
	clearerr (f);
	rewind (f);

	putchar ('-'); /* need for show_that_i_work() */
	if (binary) {
		while ((fread (&act_t,  sizeof(int),  1, f)) == 1) {
			fread (&k,  sizeof(int),  1, f);
			fread (&j,  sizeof(int),  1, f);
			fread (act, sizeof(char), k, f);
			z = MAX_FOLLOWING_PATTERNS - 1;
			while (z >= 0) {
				if (act_t == t[z])
					comparator (act, words[z]);
				z--;
			}
			t[i] = act_t;
			strcpy (words[i++], act);
			if (i >= MAX_FOLLOWING_PATTERNS) {
				show_that_i_work(); /* ... very hard, of course */
				i = 0;
			}

		}
		putchar ('\r'); /* delete that status character showed by "show_that_i_work()" */
	}
	else {
		while (fgets (act, MAX_PATTERN_LEN, f)) {
			if (strlen(act) < 2) break; /* if we just read only '\n' ? It's not needed, is it? */
			z = MAX_FOLLOWING_PATTERNS - 1;
			while (z >= 0)
				comparator (act, words[z--]);
			strcpy (words[i++], act);
			if (i >= MAX_FOLLOWING_PATTERNS) {
				show_that_i_work();
				i = 0;
			}

		}
		putchar ('\r');
	}
  
	free (words[0]);
	free (act);
	return;
}


