/* transform.c:  convert Yidtype-like input into hclassic tex-font style
output. 

translates each input character to a TeX string as appropriate for Yiddish.
The input character \ starts literal mode, which is terminated by \n.
Numbers are reversed in order.

Author: Raphael Finkel, 1/92  raphael@ms.uky.edu

*/

#include <stdio.h>
#include <ctype.h>
#include <string.h>
#if defined(__sparc__) || defined(__sgi__)
#	include <strings.h>
#endif

#define MAXNUMSTRING 100 /* length of longest string of digits in input */

char *outputs[128] = {
	/* nul */ "" ,
	/* soh ^A */ "\\shindot{\\samech}{}" ,
	/* stx ^B */ "" , /* nekud marker; leave alone */
	/* etx */ "" ,
	/* eot */ "" ,
	/* enq */ "" ,
	/* ack */ "" ,
	/* bel */ "" ,
	/* bs */ "" ,
	/* ht */ "\t" ,
	/* nl */ "\n" ,
	/* vt */ "" ,
	/* np */ "" ,
	/* cr */ "\n" ,
	/* so */ "" ,
	/* si */ "" ,
	/* dle */ "" ,
	/* dc1 */ "" ,
	/* dc2 */ "" ,
	/* dc3 */ "" ,
	/* dc4 */ "" ,
	/* nak */ "" ,
	/* syn */ "" ,
	/* etb */ "" ,
	/* can */ "" ,
	/* em */ "" ,
	/* sub */ "" ,
	/* esc */ "" ,
	/* fs */ "" ,
	/* gs */ "" ,
	/* rs */ "" ,
	/* us */ "" ,
	/* sp */ " " ,
	/* ! */ "{\\roman !}" ,
	/* " */ "\"" ,
	/* # */ "\\aleph{}" ,
	/* $ */ "{\\roman \\$}" ,
	/* % */ "%" , /* straight through for TeX comments */
	/* & */ "{\\roman \\&}" ,
	/* ' */ "'" ,
	/* ( */ "{\\roman )}" ,
	/* ) */ "{\\roman (}" ,
	/* * */ "{\\roman *}" ,
	/* + */ "{\\roman +}" ,
	/* , */ "," ,
	/* - */ "-" ,
	/* . */ "." ,
	/* / */ "{\\roman /}" ,
	/* 0 */ "{\\roman 0}" ,
	/* 1 */ "{\\roman 1}" ,
	/* 2 */ "{\\roman 2}" ,
	/* 3 */ "{\\roman 3}" ,
	/* 4 */ "{\\roman 4}" ,
	/* 5 */ "{\\roman 5}" ,
	/* 6 */ "{\\roman 6}" ,
	/* 7 */ "{\\roman 7}" ,
	/* 8 */ "{\\roman 8}" ,
	/* 9 */ "{\\roman 9}" ,
	/* : */ ":" ,
	/* ; */ "{\\roman ;}" ,
	/* < */ "{\\roman $>$}" ,
	/* = */ "{\\roman =}" ,
	/* > */ "{\\roman $<$}" ,
	/* ? */ "{\\roman ?}" ,
	/* @ */ "{\\roman @}" ,
	/* A */ "\\yud\\yud{}" ,
	/* B */ "\\raphe{\\bet}{}" ,
	/* C */ "\\ftzadik{}" ,
	/* D */ "" ,
	/* E */ "" ,
	/* F */ "\\ffeh{}" ,
	/* G */ "\\raphe{\\gimel}{}" ,
	/* H */ "\\chet{}" ,
	/* I */ "\\pasakhtsveyyud{}" ,
	/* J */ "" ,
	/* K */ "\\Kaf{}" ,
	/* L */ "" ,
	/* M */ "\\fmem{}" ,
	/* N */ "\\fnun{}" ,
	/* O */ "\\vav\\yud{}" ,
	/* P */ "\\peh{}" , /* no dot, no raphe.  Needed in Hebrew */
	/* Q */ "\\sinwithdot{}" , /* needed in Hebrew */
	/* R */ "" ,
	/* S */ "\\shin{}" ,
	/* T */ "\\tav{}" ,
	/* U */ "" ,
	/* V */ "" ,
	/* W */ "\\Tav{}" ,
	/* X */ "\\fchaf{}" ,
	/* Y */ "" ,
	/* Z */ "\\raphe{\\zion}{}" ,
	/* [ */ "{\\roman ]}" ,
	/* \\ */ "" ,
	/* ] */ "{\\roman [}" ,
	/* ^ */ "\\verb+^+" ,
	/* _ */ "{\\roman \\_}" ,
	/* ` */ "{\\roman `}" ,
	/* a */ "\\patach{\\aleph}{}" ,
	/* b */ "\\bet{}" ,
	/* c */ "\\tzadi{}" ,
	/* d */ "\\dalet{}" ,
	/* e */ "\\ayin{}" ,
	/* f */ "\\raphe{\\feh}{}" ,
	/* g */ "\\gimel{}" ,
	/* h */ "\\heh{}" ,
	/* i */ "\\chiriq{\\yud}{}" ,
	/* j */ "" ,
	/* k */ "\\kuf{}" ,
	/* l */ "\\lamed{}" ,
	/* m */ "\\mem{}" ,
	/* n */ "\\nun{}" ,
	/* o */ "\\kamatz{\\aleph}{}" ,
	/* p */ "\\Peh{}" ,
	/* q */ "" ,
	/* r */ "\\resh{}" ,
	/* s */ "\\samech{}" ,
	/* t */ "\\tet{}" ,
	/* u */ "\\Vav{}" ,
	/* v */ "\\tsveyvov{}" ,
	/* w */ "\\vav{}" ,
	/* x */ "\\chaf{}" ,
	/* y */ "\\yud{}" ,
	/* z */ "\\zion{}" ,
	/* { */ "\\}" ,
	/* | */ "{\\roman |}" ,
	/* } */ "\\{" ,
	/* ~ */ "{\\roman ~}" ,
	/* del */ "",
};

#define isneutral(c) \
	((c) == '.' || (c) == '/' || (c) == '$' || (c) == '%' || \
	(c) == '+' || (c) == '-' || (c) == ',' || (c) == ':' || (c) == '~' )
	/* these characters are LTR if between numbers, even in RTL text. */

int main(argc, argv)
int argc;
char *argv[];
{
	int c;
	char *form = 0;
		/* form is non-null if a vowel (nekud) is in progress */
	if (argc != 1) {
		fprintf(stderr, "Usage: %s (uses stdin, stdout)\n", argv[0]);
		return(1);
	}
	while ((c = getchar()) != EOF) {
		if (c == '\\') { /* special escape: terminated by '\n' */
			do {
				putchar(c);
				c = getchar();
				if (c == '_') putchar('\\');
			} while (c != '\n');
			putchar('\n');
		} else if ((isdigit(c) || isneutral(c)) && (c != '-') ) {
			/* collect numbers, reverse */
			char numbuf[MAXNUMSTRING], *numPtr = numbuf;
			do { /* collect */
				*numPtr++ = c;
				c = getchar();
			} while (isdigit(c) || isneutral(c) || c == '?');
				/* we need to be able to say 19?? and reverse it all. */
			/* may have gone too far.  Put back any chars at end that are
			neutral */
			ungetc(c, stdin);
			numPtr--; /* back to last one we have stored in numbuf */
			while (isneutral(*numPtr) && numPtr > numbuf) {
				ungetc(*numPtr, stdin);
				numPtr--;
			}
			if (numPtr == numbuf) { /* it was only neutrals or just 1 char */
				/* output just the one, to make progress */
				fprintf(stdout, "%s", outputs[(int) *numPtr]);
				continue;
			}
			fprintf(stdout, "%s", "{\\roman ");
			for (; numPtr >= numbuf; --numPtr) { /* output reversed */
				if ((*numPtr == '$') || (*numPtr == '%'))
					putchar('\\'); /* TeX doesn't like bare $ or % */
				putchar(*numPtr);
			}
			fprintf(stdout, "%s", "}");
		} else { /* usual case.  */
			char *where, nextChar;
			int dageshOn;
			nextChar = getchar();
			switch (nextChar) {
				case '': form = "\\patach"; break;
				case '': form = "\\kamatz"; break;
				case '': form = "\\segol"; break;
				case '': form = "\\chiriq"; break;
				case '': form = "\\kibutz"; break;
				case '': form = "\\shva"; break;
				case '': form = "\\tzere"; break;
				case '': form = "\\sindot"; break;
				case '': form = "\\dagesh"; break;
				case '': form = "\\chatafpatach"; break;
				case '': form = "\\chatafkamatz"; break;
				case '': form = "\\chatafsegol"; break;
				default: form = 0; ungetc(nextChar, stdin); 
			} /* switch */
			if (form) { /* try for a dagesh */
				nextChar = getchar();
				if (nextChar == '') {
					dageshOn = 1;
				} else {
					dageshOn = 0;
					ungetc(nextChar, stdin);
				}
			}
			if (form) { /* vowel in progress */
				char letter[MAXNUMSTRING];
				if ((int) c >= 128) {
					fprintf(stderr, "Character %c unrecognized\n", c);
					c = '=';
				}
				strcpy(letter, outputs[(int) c]);
				if ((where = (rindex(letter, '{')))) *where = 0; /* remove {}}*/
				if (dageshOn) {
					fprintf(stdout, "%s{\\dagesh{%s}}{}", form, letter);
				} else {
					fprintf(stdout, "%s{%s}{}", form, letter);
				}
				form = 0;
			} else {
				if (c == 0222) {
					fprintf(stdout, "---");
					// fprintf(stderr, "I saw it!\n");
				} else if (c == 0223) {
					fprintf(stdout, ",,");
					// fprintf(stderr, "I saw it!\n");
				} else if (c == 0224) {
					fprintf(stdout, "\"");
				} else if (c >= 128) { // no longer possible
					// fprintf(stdout, "[%c]", c);
				} else if (c > 0)	 /* otherwise spurious vowel; ignore */
					fprintf(stdout, "%s", outputs[(int) c]);
					// fprintf(stderr, "spurious %d\n", (int) c);
			} // vowel not in progress
		} // usual case
	} // each char
	return(0);
} /* main */

