/*
 *	recite - english text speech synthesizer
 *	Copyright (C) 1993 Peter Miller.
 *	All rights reserved.
 *
 *	This program is free software; you can redistribute it and/or modify
 *	it under the terms of the GNU General Public License as published by
 *	the Free Software Foundation; either version 1, or (at your option)
 *	any later version.
 *
 *	This program is distributed in the hope that it will be useful,
 *	but WITHOUT ANY WARRANTY; without even the implied warranty of
 *	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *	GNU General Public License for more details.
 *
 *	You should have received a copy of the GNU General Public License
 *	along with this program; if not, write to the Free Software
 *	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
 *
 * MANIFEST: functions to translate english to phonemes
 */

#include <ctype.h>

#include <english.h>
#include <mem.h>
#include <phoneme.h>
#include <spellword.h>
#include <saynum.h>
#include <trace.h>


#define THE_END -1

/*
 * maximum length of a word
 */
#define MAX_LENGTH 1000

static	int	Char, Char1, Char2, Char3;
static	char	*get_byte_data;
static	long	get_byte_pos;
static	long	get_byte_len;
static	char	*put_byte_data;
static	long	put_byte_pos;
static	long	put_byte_max;

typedef struct peephole_ty peephole_ty;
struct peephole_ty
{
	char	*pattern;
	char	*replacement;
};

static peephole_ty peephole[] =
{
	{ "AA?r",	"(AXR)?",	},
	{ "a?r",	"(AXR)?",	},
	{ "EH?r",	"(EXR)?",	},
	{ "IY?r",	"(IXR)?",	},
	{ "OW?r",	"(OXR)?",	},
	{ "AX?r",	"(OXR)?",	},
	{ "AO?r",	"(OXR)?",	},
	{ "UH?r",	"(UXR)?",	},
};



static void outchar _((int));

static void
outchar(chr)
	int		chr;
{
	peephole_ty	*p;

	if (put_byte_pos >= put_byte_max)
	{
		long	nbytes;

		put_byte_max += (1L << 15);
		nbytes = put_byte_max + 6; /* for peephole */
		if (!put_byte_data)
			put_byte_data = mem_alloc(nbytes);
		else
			mem_change_size(&put_byte_data, nbytes);
	}
	put_byte_data[put_byte_pos++] = chr;

	/*
	 * see of any of the peephole optimizations apply
	 */
	for (p = peephole; p < ENDOF(peephole); ++p)
	{
		char	*bp;
		char	*cp;
		int	match;
		int	digit;

		bp = put_byte_data + put_byte_pos - 1;
		cp = p->pattern + strlen(p->pattern) - 1;
		match = 1;
		digit = 0;
		while (cp >= p->pattern)
		{
			int	c1, c2;

			if (bp < put_byte_data)
			{
				match = 0;
				break;
			}
			c1 = *cp--;
			c2 = *bp--;
			if (c1 == '?')
			{
				if (c2 < '1' || c2 > '9')
					++bp;
				else
					digit = c2;
			}
			else if (c1 != c2)
			{
				match = 0;
				break;
			}
		}
		if (match)
		{
			put_byte_pos = bp - put_byte_data + 1;
			for (cp = p->replacement; *cp; ++cp)
			{
				if (*cp == '?')
				{
					if (digit)
						put_byte_data[put_byte_pos++] = digit;
				}
				else
					put_byte_data[put_byte_pos++] = *cp;
			}
			break;
		}
	}
}


void
english_outstring(string)
	char *string;
{
	trace(("outstring(\"%s\")\n{\n"/*}*/, string));
	while (*string)
		outchar(*string++);
	trace((/*{*/"}\n"));
}


static int makeupper _((int));

static int
makeupper(character)
	int	character;
{
	if (islower(character))
		return toupper(character);
	return character;
}


static int get_byte _((void));

static int
get_byte()
{
	if (get_byte_pos >= get_byte_len)
		return THE_END;
	return (unsigned char)get_byte_data[get_byte_pos++];
}


static int new_char _((void));

static int
new_char()
{
	/*
	 * If the cache is full of newline, time to prime the look-ahead
	 * again.  If an THE_END is found, fill the remainder of the queue with
	 * THE_END's.
	 */
	if (Char == '\n' && Char1 == '\n' && Char2 == '\n' && Char3 == '\n')
	{
		/* prime the pump again */
		Char = get_byte();
		if (Char == THE_END)
		{
			Char1 = THE_END;
			Char2 = THE_END;
			Char3 = THE_END;
			return Char;
		}
		if (Char == '\n')
			return Char;

		Char1 = get_byte();
		if (Char1 == THE_END)
		{
			Char2 = THE_END;
			Char3 = THE_END;
			return Char;
		}
		if (Char1 == '\n')
			return Char;

		Char2 = get_byte();
		if (Char2 == THE_END)
		{
			Char3 = THE_END;
			return Char;
		}
		if (Char2 == '\n')
			return Char;

		Char3 = get_byte();
	}
	else
	{
		/*
		 * Buffer not full of newline, shuffle the characters and
		 * either get a new one or propagate a newline or THE_END.
		 */
		Char = Char1;
		Char1 = Char2;
		Char2 = Char3;
		if (Char3 != '\n' && Char3 != THE_END)
			Char3 = get_byte();
	}
	return Char;
}


static void have_dollars _((void));

static void
have_dollars()
{
	long int value;

	trace(("have_dollars()\n{\n"/*}*/));
	value = 0L;
	for (new_char() ; isdigit(Char) || Char == ',' ; new_char())
	{
		if (Char != ',')
			value = 10 * value + (Char-'0');
	}

	say_cardinal(value);	/* Say number of whole dollars */

	/* Found a character that is a non-digit and non-comma */

	/* Check for no decimal or no cents digits */
	if (Char != '.' || !isdigit(Char1))
	{
		if (value == 1L)
			xlate_word(" DOLLAR ");
		else
			xlate_word(" DOLLARS ");
		goto done;
	}

	/* We have '.' followed by a digit */

	new_char();	/* Skip the period */

	/* If it is ".dd " say as " DOLLARS AND n CENTS " */
	if (isdigit(Char1) && !isdigit(Char2))
	{
		if (value == 1L)
			xlate_word(" DOLLAR ");
		else
			xlate_word(" DOLLARS ");
		if (Char == '0' && Char1 == '0')
		{
			new_char();	/* Skip tens digit */
			new_char();	/* Skip units digit */
			goto done;
		}

		xlate_word(" AND ");
		value = (Char-'0')*10 + Char1-'0';
		say_cardinal(value);

		if (value == 1L)
			xlate_word(" CENT ");
		else
			xlate_word(" CENTS ");
		new_char();	/* Used Char (tens digit) */
		new_char();	/* Used Char1 (units digit) */
		goto done;
	}

	/* Otherwise say as "n POINT ddd DOLLARS " */

	xlate_word(" POINT ");
	for ( ; isdigit(Char) ; new_char())
	{
		say_ascii(Char);
	}

	xlate_word(" DOLLARS ");
	done:
	trace((/*{*/"}\n"));
}


static void have_special _((void));

static void
have_special()
{
	trace(("have_special()\n{\n"/*}*/));
	if (Char == '\n')
		outchar('\n');
	else if (!isspace(Char))
		say_ascii(Char);
	new_char();
	trace((/*{*/"}\n"));
}


static void have_number _((void));

static void
have_number()
{
	long	value;
	int	lastdigit;

	trace(("have_number()\n{\n"/*}*/));
	value = Char - '0';
	lastdigit = Char;

	for (new_char() ; isdigit(Char) ; new_char())
	{
		value = 10 * value + (Char-'0');
		lastdigit = Char;
	}

	/* Recognize ordinals based on last digit of number */
	switch (lastdigit)
	{
	case '1':
		/* ST */
		if (makeupper(Char) == 'S' && makeupper(Char1) == 'T' && !isalpha(Char2) && !isdigit(Char2))
		{
			say_ordinal(value);
			new_char();	/* Used Char */
			new_char();	/* Used Char1 */
			goto done;
		}
		break;

	case '2':
		/* ND */
		if (makeupper(Char) == 'N' && makeupper(Char1) == 'D' && !isalpha(Char2) && !isdigit(Char2))
		{
			say_ordinal(value);
			new_char();	/* Used Char */
			new_char();	/* Used Char1 */
			goto done;
		}
		break;

	case '3':
		/* RD */
		if (makeupper(Char) == 'R' && makeupper(Char1) == 'D' && !isalpha(Char2) && !isdigit(Char2))
		{
			say_ordinal(value);
			new_char();	/* Used Char */
			new_char();	/* Used Char1 */
			goto done;
		}
		break;

	case '0':
	case '4':
	case '5':
	case '6':
	case '7':
	case '8':
	case '9':
		/* TH */
		if (makeupper(Char) == 'T' && makeupper(Char1) == 'H' && !isalpha(Char2) && !isdigit(Char2))
		{
			say_ordinal(value);
			new_char();	/* Used Char */
			new_char();	/* Used Char1 */
			goto done;
		}
		break;
	}

	say_cardinal(value);

	/* Recognize decimal points */
	if (Char == '.' && isdigit(Char1))
	{
		xlate_word(" POINT ");
		for (new_char() ; isdigit(Char) ; new_char())
		{
			say_ascii(Char);
		}
	}

	/* Spell out trailing abbreviations */
	if (isalpha(Char))
	{
		while (isalpha(Char))
		{
			say_ascii(Char);
			new_char();
		}
	}
	done:
	trace((/*{*/"}\n"));
}


/*
 * Handle abbreviations.  Text in buff was followed by '.'
 */

static void abbrev _((char *));

static void
abbrev(buff)
	char	*buff;
{
	trace(("abbrev()\n{\n"/*}*/));
	if (strcmp(buff, " DR ") == 0)
	{
		xlate_word(" DOCTOR ");
		new_char();
	}
	else if (strcmp(buff, " MR ") == 0)
	{
		xlate_word(" MISTER ");
		new_char();
	}
	else if (strcmp(buff, " MRS ") == 0)
	{
		xlate_word(" MISSUS ");
		new_char();
	}
	else if (strcmp(buff, " PHD ") == 0)
	{
		spell_word(" PHD ");
		new_char();
	}
	else
		xlate_word(buff);
	trace((/*{*/"}\n"));
}


static void have_letter _((void));

static void
have_letter()
{
	int	count;
	char	buff[MAX_LENGTH];

	trace(("have_letter()\n{\n"/*}*/));
	count = 0;
	buff[count++] = ' ';	/* Required initial blank */

	buff[count++] = makeupper(Char);
	new_char();

	while (isalpha(Char) || Char == '\'')
	{
		if (count < MAX_LENGTH - 2)
			buff[count++] = makeupper(Char);
		new_char();
	}

	buff[count++] = ' ';	/* Required terminating blank */
	buff[count] = 0;

	/* Check for AAANNN type abbreviations */
	if (isdigit(Char))
	{
		spell_word(buff);
		goto done;
	}
	if (count == 3 && !isalpha(buff[1]))	 /* one character, two spaces */
		say_ascii(buff[1]);
	else if (Char == '.')		/* Possible abbreviation */
		abbrev(buff);
	else
		xlate_word(buff);

	if (Char == '-' && isalpha(Char1))
		new_char();	/* Skip hyphens */
	done:
	trace((/*{*/"}\n"));
}


void
english_to_phonemes(in, inlen, out_p, outlen_p)
	char	*in;
	long	inlen;
	char	**out_p;
	long	*outlen_p;
{
	trace(("english_to_phonemes()\n{\n"/*}*/));
	get_byte_data = in;
	get_byte_pos = 0;
	get_byte_len = inlen;

	put_byte_pos = 0;
	put_byte_max = 0;
	put_byte_data = 0;

	/* Prime the queue */
	Char = '\n';
	Char1 = '\n';
	Char2 = '\n';
	Char3 = '\n';

	new_char();
	while (Char != THE_END)
	{
		if (isdigit(Char))
			have_number();
		else if (isalpha(Char) || Char == '\'')
			have_letter();
		else if (Char == '$' && isdigit(Char1))
			have_dollars();
		else
			have_special();
	}

	if (put_byte_pos < put_byte_max)
	{
		put_byte_max = put_byte_pos;
		mem_change_size(&put_byte_data, put_byte_max);
	}

	*out_p = put_byte_data;
	*outlen_p = put_byte_pos;
	trace((/*{*/"}\n"));
}
