#!/usr/bin/perl -w

# convert external Yiddish into internal form

# Author: Raphael Finkel, 1998
# See the COPYRIGHT file enclosed with this distribution.

if (!defined($ARGV[0])) {
	$ARGV[0] = "-q"; # default is Qtext
}

sub myreverse {  #
	local($orig) = @_;
	if ($orig =~ /^(.)$/) {
		return $orig;
	} elsif ($orig =~ /^(.)(.)$/) {
		return "$2$1";
	} else {
		return ($orig =~ s/(.)(.*)(.)/"$3&myreverse($2)$1"/e);
	}
}

sub ToUTF {
	local($left, $right) = @_;
	$mychar = ord($left) * 256 + ord($right);
	if ($mychar <= 0x7F) { # 7 sig bits; plain 7-bit ascii
		return chr $mychar;
	} elsif ($mychar <= 0x7FF) { # 11 sig bits; Hebrew is in this range */
		$first = chr(0300 | ($mychar >> 6)&037);
		$second = chr(0200 | $mychar & 077);
		return "$first$second";
	} elsif ($mychar <= 0xFFFF) { # 16 sig bits */
		$first = chr(0340 | (($mychar >> 12)&017));
		$second = chr(0200 | (($mychar >> 6)&077));
		$third = chr(0200 | ($mychar & 077));
		return "$first$second$third";
	}
} # ToUTF

$SIG{ALRM} = sub {exit(1);};
alarm(10);
if ($ARGV[0] =~ "-m(x?)") { # mac conversions
	$CodeTable{"\x81"} = "I";  # Pasakh tsvey yud
	$CodeTable{"\xa0"} = " ";  # space
	$CodeTable{"\xa1"} = "!";  # RTO + ! 
	$CodeTable{"\xa2"} = "\"";  # RTO + " 
	$CodeTable{"\xa4"} = "\$";  # RTO + $ 
	$CodeTable{"\xa5"} = "%";  # RTO + % 
	$CodeTable{"\xa7"} = "'";  # RTO + ' 
	$CodeTable{"\xa8"} = ")";  # RTO + ( 
	$CodeTable{"\xa9"} = "(";  # RTO + ) 
	$CodeTable{"\xaa"} = "*";  # RTO + * 
	$CodeTable{"\xab"} = "+";  # RTO + + 
	$CodeTable{"\xac"} = ",";  # RTO + , 
	$CodeTable{"\xad"} = "-";  # RTO + - 
	$CodeTable{"\xae"} = ".";  # RTO + . 
	$CodeTable{"\xb0"} = "\x20\x2e0\x20\x2c";  # old nisus
	$CodeTable{"\xb1"} = "\x20\x2e1\x20\x2c";  # old nisus
	$CodeTable{"\xb2"} = "\x20\x2e2\x20\x2c";  # old nisus
	$CodeTable{"\xb3"} = "\x20\x2e3\x20\x2c";  # old nisus
	$CodeTable{"\xb4"} = "\x20\x2e4\x20\x2c";  # old nisus
	$CodeTable{"\xb5"} = "\x20\x2e5\x20\x2c";  # old nisus
	$CodeTable{"\xb6"} = "\x20\x2e6\x20\x2c";  # old nisus
	$CodeTable{"\xb7"} = "\x20\x2e7\x20\x2c";  # old nisus
	$CodeTable{"\xb8"} = "\x20\x2e8\x20\x2c";  # old nisus
	$CodeTable{"\xb9"} = "\x20\x2e9\x20\x2c";  # old nisus
	$CodeTable{"\xba"} = ":";  # RTO + : 
	$CodeTable{"\xbb"} = ";";  # RTO + ; 
	$CodeTable{"\xbd"} = "=";  # RTO + = 
	$CodeTable{"\xbf"} = "?";  # RTO + ? 
	$CodeTable{"\xc0"} = "p";  # old nisus
	$CodeTable{"\xc1"} = "W";  # old nisus
	$CodeTable{"\xc2"} = "K";  # old nisus
	$CodeTable{"\xc4"} = "o";  # old nisus
	$CodeTable{"\xcb"} = "";  # singleton komets
	$CodeTable{"\xcc"} = "";  # singleton pasekh
	$CodeTable{"\xc6"} = "";  # singleton dagesh
	$CodeTable{"\xd1"} = "---";  # old nisus 
	$CodeTable{"\xd2"} = "B";  # old nisus 
	$CodeTable{"\xd3"} = "i";  # old nisus 
	$CodeTable{"\xd4"} = "I";  # old nisus 
	$CodeTable{"\xd5"} = "p";  # old nisus 
	$CodeTable{"\xd7"} = "Q";  # HEBREW LETTER SHIN WITH SIN DOT 
	$CodeTable{"\xd8"} = "a";  # old nisus
	$CodeTable{"\xdc"} = ",,";  # old nisus 
	$CodeTable{"\xe0"} = "#";  # SHTUMER ALEPH
	$CodeTable{"\xe1"} = "b";  # HEBREW LETTER BET 
	$CodeTable{"\xe2"} = "g";  # HEBREW LETTER GIMEL 
	$CodeTable{"\xe3"} = "d";  # HEBREW LETTER DALET 
	$CodeTable{"\xe4"} = "h";  # HEBREW LETTER HE 
	$CodeTable{"\xe5"} = "w";  # HEBREW LETTER VAV 
	$CodeTable{"\xe6"} = "z";  # HEBREW LETTER ZAYIN 
	$CodeTable{"\xe7"} = "H";  # HEBREW LETTER HET 
	$CodeTable{"\xe8"} = "t";  # HEBREW LETTER TET 
	$CodeTable{"\xe9"} = "y";  # HEBREW LETTER YOD 
	$CodeTable{"\xea"} = "X";  # HEBREW LETTER FINAL KAF 
	$CodeTable{"\xeb"} = "x";  # HEBREW LETTER KAF 
	$CodeTable{"\xec"} = "l";  # HEBREW LETTER LAMED 
	$CodeTable{"\xed"} = "M";  # HEBREW LETTER FINAL MEM 
	$CodeTable{"\xee"} = "m";  # HEBREW LETTER MEM 
	$CodeTable{"\xef"} = "N";  # HEBREW LETTER FINAL NUN 
	$CodeTable{"\xf0"} = "n";  # HEBREW LETTER NUN 
	$CodeTable{"\xf1"} = "s";  # HEBREW LETTER SAMEKH 
	$CodeTable{"\xf2"} = "e";  # HEBREW LETTER AYIN 
	$CodeTable{"\xf3"} = "F";  # HEBREW LETTER FINAL PE 
	# $CodeTable{"\xf4"} = "P";  # HEBREW LETTER PE 
	$CodeTable{"\xf4"} = "f";  # nisus
	$CodeTable{"\xf5"} = "C";  # HEBREW LETTER FINAL TSADI 
	$CodeTable{"\xf6"} = "c";  # HEBREW LETTER TSADI 
	$CodeTable{"\xf7"} = "k";  # HEBREW LETTER QOF 
	$CodeTable{"\xf8"} = "r";  # HEBREW LETTER RESH 
	$CodeTable{"\xf9"} = "S";  # HEBREW LETTER SHIN 
	$CodeTable{"\xfa"} = "T";  # HEBREW LETTER TAV 
	$CodeTable{"\xfb"} = "{";  # RTO + } 
	$CodeTable{"\xfc"} = "[";  # RTO + ] 
	$CodeTable{"\xfd"} = "}";  # RTO + { 
	$CodeTable{"\xfe"} = "]";  # RTO + [ 
} elsif ($ARGV[0] eq "-s") { # MS conversions
	$CodeTable{"\xd8"} = "\"";  # Quote
	$CodeTable{"\xe2"} = "g";  # HEBREW LETTER GIMEL 
	$CodeTable{"\xe3"} = "d";  # HEBREW LETTER DALET 
	$CodeTable{"\xe4"} = "h";  # HEBREW LETTER HE 
	$CodeTable{"\xe6"} = "z";  # HEBREW LETTER ZAYIN 
	$CodeTable{"\xe7"} = "H";  # HEBREW LETTER HET 
	$CodeTable{"\xe8"} = "t";  # HEBREW LETTER TET 
	$CodeTable{"\xe9"} = "y";  # HEBREW LETTER YOD 
	$CodeTable{"\xea"} = "X";  # HEBREW LETTER FINAL KAF 
	$CodeTable{"\xec"} = "l";  # HEBREW LETTER LAMED 
	$CodeTable{"\xed"} = "M";  # HEBREW LETTER FINAL MEM 
	$CodeTable{"\xee"} = "m";  # HEBREW LETTER MEM 
	$CodeTable{"\xef"} = "N";  # HEBREW LETTER FINAL NUN 
	$CodeTable{"\xf0"} = "n";  # HEBREW LETTER NUN 
	$CodeTable{"\xf1"} = "s";  # HEBREW LETTER SAMEKH 
	$CodeTable{"\xf2"} = "e";  # HEBREW LETTER AYIN 
	$CodeTable{"\xf3"} = "F";  # HEBREW LETTER FINAL PE 
	$CodeTable{"\xf7"} = "k";  # HEBREW LETTER QOF 
	$CodeTable{"\xf8"} = "r";  # HEBREW LETTER RESH 
	$CodeTable{"\xf9"} = "S";  # HEBREW LETTER SHIN 
	$CodeTable{"\xe1"} = "b";  # HEBREW LETTER BET 
	$CodeTable{"\xe5"} = "w";  # HEBREW LETTER VAV 
	$CodeTable{"\xeb"} = "x";  # HEBREW LETTER KAF 
	$CodeTable{"\xf4"} = "P";  # HEBREW LETTER PE 
	$CodeTable{"\xfa"} = "T";  # HEBREW LETTER TAV 
	$CodeTable{"\xe0"} = "#";  # SHTUMER ALEPH
	$CodeTable{"\xf5"} = "C";  # HEBREW LETTER FINAL TSADI 
	$CodeTable{"\xf6"} = "c";  # HEBREW LETTER TSADI 
	$CodeTable{"\xd5"} = "O";  # HEBREW LIGATURE YIDDISH VAV YOD
	$CodeTable{"\xd4"} = "v";  # HEBREW LIGATURE YIDDISH DOUBLE VAV
	$CodeTable{"\xd6"} = "A";  # HEBREW LIGATURE YIDDISH DOUBLE YOD
} elsif ($ARGV[0] eq "-q") { # qtext conversions
	$CodeTable{"\xe0"} = "#";  # YIDDISH LETTER ALEF
	$CodeTable{"\xd9"} = "B";  # YIDDISH LETTER VEYS
	$CodeTable{"\xe1"} = "b";  # YIDDISH LETTER BEYS
	$CodeTable{"\xe2"} = "g";  # YIDDISH LETTER GIMEL
	$CodeTable{"\xe3"} = "d";  # YIDDISH LETTER DALED
	$CodeTable{"\xe4"} = "h";  # YIDDISH LETTER HE
	$CodeTable{"\xe5"} = "w";  # YIDDISH LETTER VOV
	$CodeTable{"\xda"} = "u";  # YIDDISH LETTER MELUPN-VOV
	$CodeTable{"\xe6"} = "z";  # YIDDISH LETTER ZAYIN
	$CodeTable{"\xe7"} = "H";  # YIDDISH LETTER KHES
	$CodeTable{"\xe8"} = "t";  # YIDDISH LETTER TES
	$CodeTable{"\xe9"} = "y";  # YIDDISH LETTER YUD
	$CodeTable{"\xea"} = "X";  # YIDDISH LETTER FINAL KHOF
	$CodeTable{"\xeb"} = "x";  # YIDDISH LETTER KHOF
	$CodeTable{"\xdd"} = "K";     # YIDDISH LETTER KOF
	$CodeTable{"\xec"} = "l";  # YIDDISH LETTER LAMED
	$CodeTable{"\xed"} = "M";  # YIDDISH LETTER SHLOS-MEM
	$CodeTable{"\xee"} = "m";  # YIDDISH LETTER MEM
	$CodeTable{"\xef"} = "N";  # YIDDISH LETTER LANGE-NUN
	$CodeTable{"\xf0"} = "n";  # YIDDISH LETTER NUN
	$CodeTable{"\xf1"} = "s";  # YIDDISH LETTER SAMEKH
	$CodeTable{"\xf2"} = "e";  # YIDDISH LETTER AYIN
	$CodeTable{"\xf3"} = "F";  # YIDDISH LETTER LANGE-PE
	$CodeTable{"\xdf"} = "f";  # YIDDISH LETTER FE
	$CodeTable{"\xde"} = "p";  # YIDDISH LETTER PE
	$CodeTable{"\xf5"} = "C";  # YIDDISH LETTER LANGE-TSADIK
	$CodeTable{"\xf6"} = "c";  # YIDDISH LETTER TSADIK
	$CodeTable{"\xf7"} = "k";  # YIDDISH LETTER KUF
	$CodeTable{"\xf8"} = "r";  # YIDDISH LETTER REYSH
	$CodeTable{"\xfb"} = "Q";  # YIDDISH LETTER SIN
	$CodeTable{"\xf9"} = "S";  # YIDDISH LETTER SHIN
	$CodeTable{"\xd5"} = "O";  # YIDDISH LETTER VOV-YOD
	$CodeTable{"\xfa"} = "T";  # YIDDISH LETTER SOF
	$CodeTable{"\xfc"} = "W";  # YIDDISH LETTER TOF
	$CodeTable{"\xd4"} = "v";  # YIDDISH LETTER TSVEY-VOV
	$CodeTable{"\xd6"} = "A";  # YIDDISH LETTER TSVEY-YUD
	$CodeTable{"\xdc"} = "I";  # YIDDISH LETTER PASEKH-TSVEY-YUD
	$CodeTable{"\xd7"} = "a";  # YIDDISH LETTER PASEKH-ALEF
	$CodeTable{"\xd8"} = "o";  # YIDDISH LETTER KOMETS-ALEF
	$CodeTable{"\xdb"} = "i";  # YIDDISH LETTER PINTL-YUD
} elsif (lc($ARGV[0]) =~ /^-(u|t)/) { # UTF-8 conversions
	$CodeTable{ToUTF("\x20", "\x2c")} = "";  # Directional note
	$CodeTable{ToUTF("\x20", "\x2e")} = "";  # Directional note
	$CodeTable{ToUTF("\x20", "\x13")} = "-";  # Dash
	$CodeTable{ToUTF("\x20", "\x14")} = '-';  # Dash
	$CodeTable{ToUTF("\x20", "\x1d")} = '"';  # Close quote
	$CodeTable{ToUTF("\x05", "\x1c")} = '"';  # Open quote
	$CodeTable{ToUTF("\x05", "\xbe")} = '-';  # Hyphen
	$CodeTable{ToUTF("\xfe", "\xff")} = "";  # Start of UCS-16
	$CodeTable{ToUTF("", "\xd0")} = "#";  # HEBREW LETTER ALEF
	$CodeTable{ToUTF("", "\xd1")} = "b";  # HEBREW LETTER BET
	$CodeTable{ToUTF("", "\xd2")} = "g";  # HEBREW LETTER GIMEL
	$CodeTable{ToUTF("", "\xd3")} = "d";  # HEBREW LETTER DALET
	$CodeTable{ToUTF("", "\xd4")} = "h";  # HEBREW LETTER HE
	$CodeTable{ToUTF("", "\xd5")} = "w";  # HEBREW LETTER VAV
	$CodeTable{ToUTF("", "\xd6")} = "z";  # HEBREW LETTER ZAYIN
	$CodeTable{ToUTF("", "\xd7")} = "H";  # HEBREW LETTER HET
	$CodeTable{ToUTF("", "\xd8")} = "t";  # HEBREW LETTER TET
	$CodeTable{ToUTF("", "\xd9")} = "y";  # HEBREW LETTER YOD
	$CodeTable{ToUTF("", "\xda")} = "X";  # HEBREW LETTER FINAL KAF
	$CodeTable{ToUTF("", "\xdb")} = "x";  # HEBREW LETTER KAF
	$CodeTable{ToUTF("", "\xdc")} = "l";  # HEBREW LETTER LAMED
	$CodeTable{ToUTF("", "\xdd")} = "M";  # HEBREW LETTER FINAL MEM
	$CodeTable{ToUTF("", "\xde")} = "m";  # HEBREW LETTER MEM
	$CodeTable{ToUTF("", "\xdf")} = "N";  # HEBREW LETTER FINAL NUN
	$CodeTable{ToUTF("", "\xe0")} = "n";  # HEBREW LETTER NUN
	$CodeTable{ToUTF("", "\xe1")} = "s";  # HEBREW LETTER SAMEKH
	$CodeTable{ToUTF("", "\xe2")} = "e";  # HEBREW LETTER AYIN
	$CodeTable{ToUTF("", "\xe3")} = "F";  # HEBREW LETTER FINAL PE
	$CodeTable{ToUTF("", "\xe4")} = "p";  # HEBREW LETTER PE
	$CodeTable{ToUTF("", "\xe5")} = "C";	 # HEBREW LETTER FINAL TSADI
	$CodeTable{ToUTF("", "\xe6")} = "c";  # HEBREW LETTER TSADI
	$CodeTable{ToUTF("", "\xe7")} = "k";  # HEBREW LETTER QOF
	$CodeTable{ToUTF("", "\xe8")} = "r";  # HEBREW LETTER RESH
	$CodeTable{ToUTF("", "\xe9")} = "S";  # HEBREW LETTER SHIN
	$CodeTable{ToUTF("", "\xf1")} = "O";  # HEBREW LIGATURE YIDDISH VAV YOD
	$CodeTable{ToUTF("", "\xea")} = "T";  # HEBREW LETTER TAV
	$CodeTable{ToUTF("", "\xf0")} = "v";  # HEBREW LIGATURE YIDDISH DOUBLE VAV
	$CodeTable{ToUTF("", "\xf2")} = "A";  # HEBREW LIGATURE YIDDISH DOUBLE YOD
	$CodeTable{ToUTF("", "\xb4")} = "\x83"; # HEBREW POINT HIRIQ
	$CodeTable{ToUTF("", "\xc2")} = ""; # HEBREW POINT SIN DOT
	$CodeTable{ToUTF("", "\xbf")} = ""; # HEBREW POINT RAFE
	$CodeTable{ToUTF("", "\xb7")} = ""; # HEBREW POINT PATAH
	$CodeTable{ToUTF("", "\xb8")} = ""; # HEBREW POINT QAMATS
	$CodeTable{ToUTF("", "\xb6")} = ""; # HEBREW POINT SEGOL
	$CodeTable{ToUTF("", "\xb4")} = ""; # HEBREW POINT KHIRIK
	$CodeTable{ToUTF("", "\xbb")} = ""; # HEBREW POINT KUBUTS
	$CodeTable{ToUTF("", "\xb0")} = ""; # HEBREW POINT SHVA
	$CodeTable{ToUTF("", "\xb5")} = ""; # HEBREW POINT TSERE
	$CodeTable{ToUTF("", "\xb9")} = ""; # HEBREW POINT KHOLAM
	$CodeTable{ToUTF("", "\xbc")} = ""; # HEBREW POINT DAGESH OR MAPIQ
	$CodeTable{ToUTF("", "\xb2")} = ""; # HEBREW POINT HATAF PATAH
	$CodeTable{ToUTF("", "\xb3")} = ""; # HEBREW POINT HATAF QAMATS
	$CodeTable{ToUTF("", "\xb1")} = ""; # HEBREW POINT HATAF SEGOL
	$CodeTable{ToUTF("\x00", "\xb4")} = "'"; # single quote
	$CodeTable{ToUTF("\x20", "\x1e")} = "\""; # initial double-quote
	$CodeTable{ToUTF("\x20", "\x1c")} = "''"; # final double-quote
	$CodeTable{ToUTF(" ", "")} = ""; # quotation dash
	$CodeTable{ToUTF("\x20", "\x15")} = ""; # quotation dash
} else {
	print "Unknown conversion: $ARGV[0].\n" ,
	"\tI only handle -u, -U, -Ur -q, -m, -s. \n" ,
	"\t(UTF-8, Unicode-16, Shuffled Unicode-16, QText, Mac, MS)\n" ,
	"\t-r in addition means the input is reversed.\n";
exit(1);
}

sub treat { # reverse changing << and >> to special characters
	my ($text) = @_;
	$text =~ s/<<//g;
	$text =~ s/>>//g;
	return(reverse($text));
} # treat

sub convert {
	my ($line) = @_;
	# print "line begins as [$line]\n";
	if ($ARGV[0] =~ "-m") { # mac conversions
		# $line =~ s/\xe1\xd8/B/g;  # HEBREW LETTER BET, HEBREW POINT RAFE 
		$line =~ s/\xe5\xc6/u/g;  # HEBREW LETTER VAV, DAGESH OR MAPIQ 
		$line =~ s/\xe5\xe5/v/g;  # HEBREW LETTER VAV, VAV
		$line =~ s/\xeb\xc6/K/g;  # HEBREW LETTER KAF, DAGESH OR MAPIQ 
		# $line =~ s/\xf4\xd8/f/g;  # HEBREW LETTER PE, HEBREW POINT RAFE 
		$line =~ s/\xf4\xc6/p/g;  # HEBREW LETTER PE, HEBREW POINT DAGESH
		$line =~ s/\xfa\xc6/W/g;  # HEBREW LETTER TAV, DAGESH OR MAPIQ 
		$line =~ s/\xe9\xcf/i/g;  # HEBREW LETTER YOD, HEBREW POINT HIRIQ 
		$line =~ s/\xe9\xe9\xcc/I/g;  # HEBREW LETTER YOD, YOD, PATAH 
		$line =~ s/\xe5\xe9/O/g;  # HEBREW LETTER VAV, HEBREW LETTER YOD 
		$line =~ s/\xe9\xe9/A/g;  # HEBREW LETTER YOD [twice] 
		$line =~ s/\xe0\xcc/a/g;  # HEBREW LETTER ALEF, HEBREW POINT PATAH 
		$line =~ s/\xe0\xcb/o/g;  # HEBREW LETTER ALEF, HEBREW POINT QAMATS 
		$line =~ s/</&lt;/g;  # HTML oddity
		if ($ARGV[0] =~ "-mx") { # additional mac conversions
			# the following for Lucas Buryn
			$line =~ s/\xe0\xde/o/g;  # HEBREW LETTER ALEF, HEBREW POINT QAMATS 
			$line =~ s/\xde/o/g;  # HEBREW POINT QAMATS by itself; mistakenly
			$line =~ s/\xc9/.../g;  # three dots in a row
			$line =~ s/
/\n/g;  # carriage return
			$line =~ s/\xc8/u/g;  # melupn vov?
			$line =~ s/\xd1/---/g;  # dash
			$line =~ s/\xd0/-/g;  # hyphen
			$line =~ s/\xa0/ /g;  # inter-word space
			$line =~ s/\xb0/0/g; 
			$line =~ s/\xb1/1/g; 
			$line =~ s/\xb2/2/g; 
			$line =~ s/\xb3/3/g; 
			$line =~ s/\xb4/4/g; 
			$line =~ s/\xb5/5/g; 
			$line =~ s/\xb6/6/g; 
			$line =~ s/\xb7/7/g; 
			$line =~ s/\xb8/8/g; 
			$line =~ s/\xb9/9/g; 
			$line =~ s/(\d+)/reverse($1)/eg;  # reverse number
			# the following for Hirshe-Dovid Katz
			$line =~ s/\x00//g; 
			$line =~ s/\x01//g; 
			$line =~ s/\x02//g; 
			$line =~ s/\x03//g; 
			$line =~ s/\x04//g; 
			$line =~ s/\x05//g; 
			$line =~ s/\xac/o/g; 
			$line =~ s/\xbb/T/g; 
			$line =~ s/\xbc/;/g; 
			$line =~ s/\xbd/W/g; 
			$line =~ s/\xc0/o/g; 
			$line =~ s/\xc1/C/g; 
			$line =~ s/\xc2/a/g; 
			$line =~ s/\xc4/K/g; 
			$line =~ s/\xc5/"/g; # ``
			$line =~ s/\xc7/-/g; 
			$line =~ s/\xfa/,/g;  # not T
			$line =~ s/\xfb/I/g;  # not {
			$line =~ s/\xfc/A/g;  # not [
			$line =~ s/\xfe/f/g;  # not ]
			$line =~ s/\xfd/"/g;  # not }
			$line =~ s/\xf5/./g;  # not C
			$line =~ s/\xff/'/g;
			$line =~ s/ o / --- /g;
		} # -mx
	} elsif ($ARGV[0] eq "-s") { # MS conversions
		$line =~ s/\xe1\xcf/B/g;  # HEBREW LETTER BET, HEBREW PIONT RAFE
		$line =~ s/\xe5\xcc/u/g;  # HEBREW LETTER VAV, HEBREW POINT DAGESH
		$line =~ s/\xeb\xcc/K/g;  # HEBREW LETTER KAF, HEBREW POINT DAGESH
		$line =~ s/\xf4\xcf/f/g;  # HEBREW LETTER PE, HEBREW POINT RAFE
		$line =~ s/\xf4\xcc/p/g;  # HEBREW LETTER PE, HEBREW POINT DAGESH
		$line =~ s/\xf9\xd2/Q/g;  # HEBREW LETTER SHIN, HEBREW POINT SIN DOT
		$line =~ s/\xfa\xcc/W/g;  # HEBREW LETTER TAV, HEBREW POINT DAGESH
		$line =~ s/\xd6\xc7/I/g;  # HEBREW LIGATURE YIDDISH DOUBLE YOD, PATAH
		$line =~ s/\xe0\xc7/a/g;  # HEBREW LETTER ALEF, HEBREW POINT PATAH
		$line =~ s/\xe0\xc8/o/g;  # HEBREW LETTER ALEF, HEBREW POINT QAMATS
		$line =~ s/\xe9\xc4/i/g;  # HEBREW LETTER YOD, HEBREW POINT HIRIQ
	} elsif ($ARGV[0] eq "-q") { # QText conversions
		$line =~ s/<lang=eng>([^<]*)</"\n\\english{" . treat($1).  "}\n<"/eg;
		$line =~ s/\\english\{([^\}]*)\}/"\\english{" . treat($1) ."}"/eg;
		$line =~ s/<[^>]*>//g; # remove QText tags
		$line =~ s//</g; # restore HTML tags inside english sections
		$line =~ s//>/g; # restore HTML tags inside english sections
		$line =~ s/\[//g; # interchange ( ) [ ]; quite tedious
		$line =~ s/]/[/g;
		$line =~ s//]/g;
		$line =~ s/\(//g;
		$line =~ s/\)/(/g;
		$line =~ s//)/g;
	} elsif ($ARGV[0] eq "-u") { # UTF-8 conversions
		# no conversions now; we do them later.
		# system "echo \"$line\" | od -x";
	} elsif ($ARGV[0] eq "-U") { # Unicode-16: convert to UTF-8
		$line =~ s/(.)(.)/ToUTF($1,$2)/eg;
		$ARGV[0] = "-u";
	} elsif ($ARGV[0] eq "-Ur") { # Unicode-16, shuffled: convert to UTF-8
		$line =~ s/(.)(.)/ToUTF($2,$1)/eg;
		$ARGV[0] = "-u";
	}
	foreach $code (keys %CodeTable) {
		# print "trying code $code\n" if $line =~ /$code/;
		$line =~ s/$code/$CodeTable{$code}/eg;
		# print "$line";
	}
	# standard postconversions
	$line =~ s/(^|\s|"|-)A/$1yi/g;  # misspelling
	$line =~ s/\x20\x2c\x20\x2e//g; # remove adjacent PDF/RTO
	$line =~ s/\x20\x2e(\d*)\x20\x2c/reverse($1)/eg; # manual reverse
	if ($ARGV[0] =~ /^-(u|t)$/) { # UTF-8 post-conversions
		$line =~ s/y\x83/i/g; # khirik-yud => i
		$line =~ s/b/B/g; # beys-rafe => veys
		$line =~ s/w\x88/u/g; # vav-mapik => u
		$line =~ s/x\x88/K/g; # khaf-dagesh => K
		$line =~ s/p/f/g; # pe-rafe => feh
		$line =~ s/p\x88/p/g; # pe-dagesh => pe
		$line =~ s/S/Q/g; # shin-sindot => sin
		$line =~ s/T\x88/W/g; # sof-dagesh => tav
		$line =~ s/A\x80/I/g; # tsvey|yud-pasakh => pasakh|tsvey|yud
		$line =~ s/#\x80/a/g; # alef-pasakh => pasakh|alef
		$line =~ s/#\x81/o/g; # alef-komets => komets|alef
		$line =~ s/\x92/---/g; # long dash
		$line =~ s/\x93/,,/g; # double comma
		$line =~ s/\x94/''/g; # double quote
		$line =~ s/\x95/.../g; # three dots
		$line =~ s/\xef\xac\xaf/o/g; # precomposed komets|alef
		$line =~ s/\xef\xac\x9f/I/g; # precomposed pasakh|tsvey|yud
		$line =~ s/\xef\xac\x9d/i/g; # precomposed khirk-yud
		$line =~ s/\xef\xac\xb5/u/g; # precomposed melupn-vov
		$line =~ s/\xef\xac\xae/a/g; # precomposed pasakh|alef
		$line =~ s/\xef\xad\x8c/B/g; # precomposed veys|alef
		$line =~ s/\xef\xad\x84/p/g; # precomposed peh
		$line =~ s/\xef\xad\x8e/f/g; # precomposed feh
		$line =~ s/\xd7\xb1/O/g; # precomposed vov|yud
		$line =~ s//^o/g; # komets 
		$line =~ s//^i/g; # khirik 
		$line =~ s//^a/g; # pasakh 
		$line =~ s/\x87/^O/g; # kholam
		$line =~ s/yy/ey/g; # yud yud => tsvey|yud
		$line =~ s/ww/v/g; # vov vov => tsvey|vov
	}
	$line =~ s/ts(\b|[^h])/t|s$1/s; # prophylactic | between t and s
	$line =~ s/zh/zS/g;
	return($line);
} # convert

# $/ = ""; # read it all in one chunk
$| = 1; # autoflush output (for interactive use)
binmode STDIN, ":raw";
binmode STDOUT, ":raw";
while (defined ($line = <STDIN>)) { # one line
	if (defined($ARGV[1]) && $ARGV[1] eq "-r") { # reverse
		$line = reverse($line);
	}
	# while ($line =~ s/\xa0([^\xa0]*)\xa0\\spell ([^\xa0]*)\xa0/\xa0/) {
		# print "\\spell $2 $1\n";
	# }
	$line = convert($line);
	$line =~ s/\r/\n/g;
	$line =~ s/\\oysleyg\s*(\S+)\s*(\S+)\n/\\spell $1 $2\n/g;
	# for $snippet (split /\n/, $line) { # one actual line at a time, divided
	for $snippet ($line) { # one actual line at a time, divided
		chomp $snippet;
		while ($snippet =~ /^(.{70}\S*)\s(.*)$/) { # divide it
			# $len = length($1);
			print "$1\n";
			$snippet = $2;
			# print "Sublength: $len:";
		}
		print $snippet , "\n";
	}
} # one line

# :vim:fileencoding=latin-1:
