// -*- Mode: C++ -*-
// Dieses Programm durchsucht ein LaTeX-Dokument nach fuchischen
// Textteilen und formatiert diese anhand seines Wissens über
// Ligaturen.  Es wird eine neue LaTeX-Datei ausgegeben.

#include <stdlib.h>
#include <stdio.h>
#include <string.h>

#define forever for(;;)

enum tokentype {
  space    = 0,
  comment  = 1,
  onechar  = 2,
  command  = 3,
  openenv  = 4,
  closeenv = 5,
  math     = 6,
  eof      = 7
};

enum fuchmode {
  fuchtext = 0,
  normtext = 1,
  mathtext = 2
};

#define TRANSPARENT 999999

struct abst {
  int lo,o,b,u,lu,U,LU;
};

struct fuchinfo {
  int  code;               // im Zeichensatz, 0= gibt's nicht
  int  width;
  char cat;
  abst left, right; // Maße
};

struct letter {
  char *name;
  bool needsspace;  // Anfangsbuchstaben haben true, alles sonst false
  char cat;
  fuchinfo form[4];
#define NORM  0
#define LEFT  1
#define RIGHT 2
#define BOTH  3
};

struct fuchstack {
  fuchstack *next;
  int       env;
  fuchmode  mode;

  fuchstack (fuchstack *anext, int aenv, fuchmode amode):
    next (anext), env (aenv), mode (amode) {}

  ~fuchstack ()
  {
    if (next) delete next;
  }
};

letter *table[100];
int    anztable= 0;
letter *fuchchar[256];
FILE   *f= NULL,
       *fdebug= NULL;
int    lx= 1, ly= 1;
#define maxoutx 78
int    outx= 0;
char   *longbracket[50]; // longbracket[i-200] ist der Ausgabestring
bool   nofuch= false;    // nach %F-
char   mypath[200];

void error (const char *err)
{
  fprintf (stderr, "Error: %s\n", err);
  fprintf (stderr, "Line: %d, before column: %d\n", ly, lx);
  exit (1);
}

unsigned char combine (char c1, char c2)
{
  if (c1==')')
    switch (c2)
    {
      case '*': return 211;
      case '-': return 212;
      case '?': return 213;
      case '!': return 214;
      case ':': return 215;
      default:  return 0;
    }
  else
  if (c1==']' && c2=='!') return 216;
  else
  if (c2=='(')
    switch (c1)
    {
      case '*': return 201;
      case '-': return 202;
      case '?': return 203;
      case '!': return 204;
      case ':': return 205;
      default:  return 0;
    }
  else
  if (c2=='[' && c1=='!') return 206;
  else
    return 0;
}


bool isspace (int c)
{
  return (c==' ') || (c=='\n') || (c=='\t');
}

bool oben (char cat)
{
  return cat=='o'||cat=='b'||cat=='B';
}

bool unten (char cat)
{
  return cat=='u'||cat=='U'||cat=='b'||cat=='B';
}

bool UNTEN (char cat)
{
  return cat=='U'||cat=='B';
}

bool nurunten (char cat)
{
  return cat=='U'||cat=='u';
}

bool nextword (FILE *f, char *s)
{
  int c;
  char *os= s;
  for (;;) { // Gähn, jumpspaces + killcomments
    while ((c=fgetc(f))!=EOF && isspace(c));
    if (c=='#')
      while ((c=fgetc(f))!=EOF && c!='\n');
    else
      break;
  }
  *s++= c; // erstes Zeichen
  while ((c=fgetc(f))!=EOF && !isspace(c)) *s++= c; // Wort lesen
  *s= 0;
  return !(s==os && c==EOF);
}

void B(bool b)
{
  if (!b) error ("fuchletter.size: unexpected EOF");
}

int nextint (FILE *f)
{
  char t[200];
  B(nextword (f, t));
  return atoi (t);
}

void readsize (FILE *f)
{
  char t[200];
  char cat;
  B(nextword(f, t));
  table[anztable]= new letter;
  table[anztable]->name= strdup (t);
  table[anztable]->needsspace= false;

  B(nextword(f, t));
  table[anztable]->cat= cat= t[1];

  for (int i=0; i<4; i++) {
    B(nextword(f, t));
    if (!strcasecmp (t, "none")) {
      table[anztable]->form[i].code= 0;
    }
    else {
      if (t[1]=='\\') {
        if (t[3])
          table[anztable]->form[i].code= combine(t[2], t[3]);
        else
          table[anztable]->form[i].code= t[2];
      }
      else
        table[anztable]->form[i].code= atoi (t);

      table[anztable]->form[i].width= nextint (f);

      // LINKS:
      if (oben(cat)) {
        table[anztable]->form[i].left.lo= nextint (f);
        table[anztable]->form[i].left.o= nextint (f);
      }
      else {
        table[anztable]->form[i].left.lo= TRANSPARENT;
        table[anztable]->form[i].left.o= TRANSPARENT;
      }

      table[anztable]->form[i].left.b= nextint (f);

      if (unten (cat)) { 
        table[anztable]->form[i].left.u= nextint (f);
        table[anztable]->form[i].left.lu= nextint (f);
      }
      else {
        table[anztable]->form[i].left.u= TRANSPARENT;
        table[anztable]->form[i].left.lu= TRANSPARENT;
      }
  
      if (UNTEN (cat)) {
        table[anztable]->form[i].left.U= nextint (f);
        table[anztable]->form[i].left.LU= nextint (f);
      }
      else {
        table[anztable]->form[i].left.U= TRANSPARENT;
        table[anztable]->form[i].left.LU= TRANSPARENT;
      }
  
      // RECHTS:
      if (oben(cat)) {
        table[anztable]->form[i].right.lo= nextint (f);
        table[anztable]->form[i].right.o= nextint (f);
      }
      else {
        table[anztable]->form[i].right.lo= TRANSPARENT;
        table[anztable]->form[i].right.o= TRANSPARENT;
      }
  
      table[anztable]->form[i].right.b= nextint (f);
  
      if (unten (cat)) {
        table[anztable]->form[i].right.u= nextint (f);
        table[anztable]->form[i].right.lu= nextint (f);
      }
      else {
        table[anztable]->form[i].right.u= TRANSPARENT;
        table[anztable]->form[i].right.lu= TRANSPARENT;
      }
  
      if (UNTEN (cat)) {
        table[anztable]->form[i].right.U= nextint (f);
        table[anztable]->form[i].right.LU= nextint (f);
      }
      else {
        table[anztable]->form[i].right.U= TRANSPARENT;
        table[anztable]->form[i].right.LU= TRANSPARENT;
      }
    }
  }

  anztable++;
}

void init ()
{
  int i;
  char t[200];
  sprintf (t, "%s/fuchletter.size", mypath);
  FILE *f= fopen (t, "r");
  if (!f)
    error ("file not found: fuchletter.size");

  memset (longbracket, 0, sizeof (longbracket));

  while (nextword (f, t)) {
    if (!strcasecmp (t, "char")) readsize (f);
    else
      error ("fuchletter.size: `char' expected");
  }

  fclose (f);
  exit (1);

  memset (fuchchar, 0, sizeof (fuchchar));

  for (i= 0; i<anztable; i++)
    fuchchar [table [i]->form[NORM].code]= table [i];

}

//////////////////////////////////////////////////////////////////////////////

void push (fuchstack *&st, int env, fuchmode mode)
{
  st= new fuchstack (st, env, mode);
}

void pop (fuchstack *&st) // Fehler, falls nachher Stapel leer
{
  fuchstack *help= st;
  if (st)
  {
    st= st->next;
    if (!st) error ("Stack underflow");
    help->next= NULL;
    delete help;
  }
}

//////////////////////////////////////////////////////////////////////////////

bool iscommandchar (char c)
{
  return (c>='a'&&c<='z') || (c>='A' && c<='Z') || (c=='@');
}

char readchar ()
{
  char result= fgetc (f);
  if (result== '\n')
  {
    ly++;
    lx= 1;
  }
  else
  if (result== '\r') lx= 1;
  else
    lx++;
  return result;
}

void unreadchar (char c)
{
  lx--;
  if (!lx) ly--;
  ungetc (c, f);
}

void readTeXtoken (char *token, tokentype &type)
{
  char c= readchar ();
  int  ii;
  switch (c)
  {
    case EOF:
       type= eof;
       break;

    case '%':
       type= comment;
       *(token++)= c;
       ii= 0;
       do
       {
         c= readchar ();
         if (ii==0 && c=='F') { // Spezialsyntax: #F+  und #F-
           *token++= c;
           c= readchar ();
           if (c=='+') nofuch= false;
           else
           if (c=='-') nofuch= true;
           ii++;
         }
         ii++;
         if (c!=EOF) *(token++)= c;
       }
       while (c!= '\n' && c!= '\r' && c!= EOF);
       break;

    case ' ':
    case '\t':
    case '\r':
    case '\n':
       type= space;
       do
       {
         *(token++)= c;
         c= readchar ();
       }
       while (c==' '||c=='\t'||c=='\n'||c=='\r');
       unreadchar (c);
       break;

    case '\\':
       type= command;
       *(token++)=c;
       c= readchar (); // der nächste gehört sicher dazu.
       do
       {
         *(token++)= c;
         c= readchar ();
       }
       while (iscommandchar (c));
       unreadchar (c);
       break;

    case '$':
       type= math;
       *(token++)= c;
       break;

    case '{':
       type= openenv;
       *(token++)= c;
       break;

    case '}':
       type= closeenv;
       *(token++)= c;
       break;

    default:
       type= onechar;
       *(token++)= c;
  }
  *token= 0;
}

//////////////////////////////////////////////////////////////////////////////

void outtext (const char *steuer, const char *inhalt, bool breakifness)
{
  char buffer[1000], *c;
  sprintf (buffer, steuer, inhalt);
  for (c=buffer; *c; c++)
  {
    if (*c=='\n' || *c=='\r') outx= 0;
    else
      outx++;                                                 
  }
  if (outx>maxoutx && breakifness)  // Ui, ich war überrascht, daß ich das
                                    // schon programmiert hatte, als ich
                                    // es entdeckte.
  {
    printf ("\n");
    outx= 0;
  }
  printf ("%s", buffer);
}

void outnl ()
{
  outtext ("\n", "", false);
}

//////////////////////////////////////////////////////////////////////////////
int xo, xu, xU; // >0: Buchstabe ragt nach rechts
                // <0: nach links ist Platz
                // =0: kein Platz, Buchstabe ragt nicht nach rechts raus
char fuchpuff [1000], *fc= fuchpuff;
char old, oldoldcat;
char sonder;
int  oldform;

void fuchstore (unsigned char c);

void fuchstorestr (const char *s)
{
   const char *c;
   for (c=s; *c; c++)
     fuchstore (*c);
}

void fuchstore (unsigned char c)
{
  char puff[20];
  if (c>=200 && c<=249 && longbracket [c-200])
    fuchstorestr (longbracket [c-200]);
  else
  if (c>=128)
  {
    sprintf (puff, "\\char%d ", c);
    fuchstorestr (puff);
  }
  else
  if (c=='[')
    fuchstorestr ("\\char91 ");
  else
  if (c==']')
    fuchstorestr ("\\char93 ");
  else
  {
    *(fc++)= c;
  }
  // printf("\n %2d %2d %2d %2c %5d %s", xo, xu, xU, c, fc, fuchpuff);
}

void initfuch ()
{
  xo= 0;
  xu= 0;
  xU= 0;
  fc= fuchpuff;
  old= 0;
  oldform= NORM;
  oldoldcat= 'B'; // alles belegt
  sonder= 0;
}

int insertbinde (char cat, int o, int u, int U, bool doinsert)
// fügt Bindezeichen ein, bis der Buchstabe berührungsfrei ans Ende paßt.
{
  int add= -1, i;
  if (oben (cat) && (xo+o>add)) add= xo+o;
  if (unten(cat) && (xu+u>add)) add= xu+u;
  if (UNTEN(cat) && (xU+U>add)) add= xU+U;
  if (add>0) // -1 zeigt an, daß sich die Buchstaben nicht einmal berühren.
  {
    xo-=add;
    xu-=add;
    xU-=add;
    if (doinsert)
      for (i= 0; i<add; i++) fuchstore ('-');

  }
  return add;
}

void insertbindechar ()
{
  xo--;
  xu--;
  xU--;
  fuchstore ('-');
}

void insertletter (char c, char cat, const fuchinfo &f, bool doinsert)
{
  if (oben(cat))
    xo= f.right.o;
  else
    xo-= f.width;
  if (unten(cat))
    xu= f.right.u;
  else
    xu-= f.width;
  if (UNTEN(cat))
    xU= f.right.U;
  else
    xU-= f.width;
  if (doinsert)
    fuchstore (c);  // abspeichern
}

bool testlig (char catone, const fuchinfo &one,
              char cattwo, const fuchinfo &two)
// guckt, ob die Buchstaben als Ligatur an den Text passen.
{
  int rxo= xo,  // alte Werte retten
      rxu= xu,
      rxU= xU,
      anz;

  insertbinde  (catone, one.left.o, one.left.u, one.left.U, false);
  insertletter (0, catone, one, false);

  anz= insertbinde (cattwo, two.left.o, two.left.u, two.left.U, false);
  // wieviele Leerstellen würden eingefügt?

  xo= rxo;
  xu= rxu;
  xU= rxU;

  return anz <= 0;  // wenn gar nichts eingefügt werden muß, true
}

bool touchimpossible (char cat1, char cat2)
{
  return cat1!='b' && cat2!='b' &&
         cat1!='B' && cat2!='B' &&
         (cat1=='o' && nurunten (cat2) ||
          cat2=='o' && nurunten (cat1));
}

void insertbindecharperhaps (char cat, int anz)
{
  if (anz>=0 &&
      oldform==NORM &&
      !touchimpossible (cat, oldoldcat))
         insertbindechar ();
      // dann noch einen mehr, da die Buchstaben nicht zusammenkleben
      // sollen.
}

bool issonder (char c)
{
  return c=='*' || c==':' || c=='!' || c=='-' || c=='?' ||
         c=='(' || c==')' || c=='[' || c==']';
}

const char *special (char old, char c)
{
  static char *kern1= "\\kern-0.08em ";
  if ( (old=='h' && (c=='f' || c=='d' || c=='b')) ||
       (c=='h' && (old=='f' || old=='d' || old=='b'))
     )
    return kern1;
  return NULL;
}

void putzwischen (char cat, int po, int pu, int pU, bool needsspace)
{
  int anz;
  char puff [100];
  if (needsspace)
  {
    anz= insertbinde (cat, po, pu, pU, false);
    if (anz>0)
    {
      sprintf (puff, "\\kern%dpt ", anz);
      fuchstorestr (puff);
    }
  }
  else
  {
    anz= insertbinde (cat, po, pu, pU, true);
    insertbindecharperhaps ('B', anz);
  }
}

void insertfuch (char c)
// dabei gibt es viel zu überprüfen:
{
  int newform= NORM,
      newoldform= oldform,
      v;
  fuchinfo *f;
  const char *specstr= NULL;
  static bool check= true;

  if (check && (sonder || issonder (c)))
  {
    if (sonder)
    {
      v= combine (sonder, c);
      if (v) c= v;
      else
      {
        check= false;
        insertfuch (sonder); // in diesem Falle ist check= false, deshalb
        // c so lassen, wird dann jetzt eingefügt
        check= true;
      }
      sonder= 0;
    }
    else
    {
      sonder= c;
      return;  // raus, sofort!
    }
  }

  if (old)
  {
    if (fuchchar [old])
    {
      newoldform= oldform==NORM ? RIGHT : BOTH;
      if (fuchchar [c] &&
          fuchchar [old]->form[newoldform].code &&
          fuchchar [c]->form[LEFT].code &&
          (fuchchar[old]->form[BOTH].code || // diese Bed. verhindert z.B. l+e
           fuchchar[c]->form[BOTH].code)     //
         )
      {  // Ligatur gefunden?
        if (touchimpossible (fuchchar [old]->cat, fuchchar [c]->cat) &&
            testlig (fuchchar [old]->cat,
                     fuchchar [old]->form[newoldform],
                     fuchchar [c]->cat,
                     fuchchar [c]->form[LEFT])
           )
        { // Ligatur paßt dran. Also einfügen.
          newform= LEFT;
          oldform= newoldform;
        }
      }
      f= &fuchchar [old]->form[oldform];

      putzwischen (fuchchar[old]->cat,
                   f->left.o, f->left.u, f->left.U,
                   fuchchar[old]->needsspace);

      insertletter (fuchchar [old]->form[oldform].code,
                    fuchchar [old]->cat, *f, true);
      oldoldcat= fuchchar [old]->cat;
      if (fuchchar [c] && (specstr= special (old, c)))  // auch noch besonders?
      {
        fuchstorestr (specstr);
        xo= xu= xU= -2000; // c soll im nächsten Durchgang in jedem
                           // Falle passen
      }
    }
    else
    {
      putzwischen ('B', 0, 0, 0, false);
      fuchstore(old);
      oldoldcat= 'B';
      xo= xu= xU= 0;
    }
  }
  else
    oldoldcat= 'B';
  old= c;
  oldform= newform;
}

void flushfuch () // darf (viel) zu oft aufgerufen werden
{
  if (old) insertfuch (0); // letzten Buchstaben rauswerfen
  *fc= 0;                  // nicht hochzählen!
  fc= fuchpuff;
  if (*fuchpuff)
  {
    outtext ("\\wmbox{%s}", fuchpuff, true);
    initfuch ();
  }
}

void startfuch ()
{
  flushfuch ();
  initfuch ();
}

void stopfuch ()
{
  flushfuch ();
}

//////////////////////////////////////////////////////////////////////////////

void dofuch (fuchstack *&st, int envcount, fuchmode mode, bool &fuchfound)
{
  bool old= st->mode==fuchtext;
  push(st, envcount, mode);
  if (st->mode==fuchtext && !old) startfuch();
  if (st->mode!=fuchtext && old) stopfuch();
  fuchfound= false;
}

void fuchaus (fuchstack *&st)
{
  bool old= st->mode==fuchtext;
  pop (st);
  if (st->mode==fuchtext && !old) startfuch();
  if (st->mode!=fuchtext && old) stopfuch();
}

void convert ()
{
  int  envcount= 1;
  bool fuchfound= false;
  fuchstack *st= NULL;
  char token [256];
  tokentype type;

  push (st, envcount, normtext);

  forever
  {
    readTeXtoken (token, type);

    switch (type)
    {
      case command:
        flushfuch ();
        if (fuchfound) // dann ist es diese Umgebung
          dofuch (st, envcount, fuchtext, fuchfound);

        if (!strncmp (token, "\\nofuch", 7))
          dofuch (st, envcount, normtext, fuchfound);

        fuchfound= !nofuch && !strncmp (token, "\\fuch", 5);
        outtext ("%s", token, false);
        break;

      case openenv:
        flushfuch ();
        outtext ("%s", token, false);
        envcount++;

        if (fuchfound)  // dann ist es erst die folgende Umgebung
          dofuch (st, envcount, fuchtext, fuchfound);
        break;

      case closeenv:
        flushfuch ();
        fuchfound= false;    // das gibt es nicht: \fuch}

        while (st->env==envcount)
          fuchaus (st);  // dann vorhergehende Umgebung

        envcount--;
        outtext ("%s", token, false);
        break;

      case math:
        flushfuch ();
        if (st->mode==mathtext)
        {
          outtext ("%s", token, false);
          fuchaus (st);
        }
        else
        {
          dofuch (st, envcount, mathtext, fuchfound);
          outtext ("%s", token, false);
        }
        break;

      case onechar:
        if (fuchfound)
          dofuch (st, envcount, fuchtext, fuchfound);

        if (st->mode==fuchtext)
          insertfuch (token[0]); // es ist nur ein Zeichen!
        else
        {
          flushfuch ();
          outtext ("%s", token, false);
        }
        break;

      case space:
      case comment:
        flushfuch ();
        outtext ("%s", token, false);
        break;

      case eof:
        flushfuch ();
        return;

      default:
        error ("interner Fehler 0001");
    }
  }
  delete st; // muß gehen, da pop darauf achtet, daß der Stapel nicht leer wird
}

void help (const char *name, bool descr)
{
#define fp(A) fprintf(stderr, A)
  fprintf (stderr, "usage: %s [options] file [debugfile] [options]\n", name);
  if (descr)
  {
    fp(" This programme converts fukhian text into fukhian text with\n");
    fp(" inserted ligatures and spaces. It searches the input for all\n");
    fp(" TeX commands beginning with `\\fuch'. If an environment follows\n");
    fp(" immediately this is the scope of operation, otherwise the\n");
    fp(" surrounding environment is. Math mode text embedded in $ is\n");
    fp(" ignored.\n");
    fp(" Additionally, you can mark long sections to be untouchable by this\n");
    fp(" programme by using #F- ... #F+.  Those are TeX-comments with a\n");
    fp(" special meaning to this programme only.\n");
    fp(" Be careful: TeX commands are not understood. Therefore you must\n");
    fp("  handle \\(, \\), \\[, \\], \\mbox, etc. yourself. Use \\nofuch to\n");
    fp("  tell this programme that inside the current environment you want\n");
    fp("  no more fukhian.\n");
    fp(" E.g:\n");
    fp("  wrong: {\\fuch anyfuch \\mbox {mist} morefuch}\n");
    fp("  right: {\\fuch anyfuch \\mbox {\\nofuch mist} morefuch}\n");
    fp("\n");
    fp("  wrong: {\\fuch anyfuch \\( a^2+b^2=x^2 \\) morefuch}\n");
    fp("  wrong: {\\fuch anyfuch \\nofuch\\( a^2+b^2=c^2 \\) morefuch}\n");
    fp("  right: {\\fuch anyfuch {\\nofuch\\( a^2+b^2=c^2 \\)} morefuch}\n");
    fp("  right: {\\fuch anyfuch $ a^2+b^2=c^2 $ morefuch}\n");
    fp("  right: \\fuch{fuch1 {\\nofuch foo {\\fuch fuch2} bar} fuch3}\n");
  }
  fp("options:\n");
  fp("  -h  : help and description\n");
  exit (1);
}

main (int argc, char **argv)
{
  strcpy (mypath, argv[0]);
  char *c= strrchr (mypath, '/');
  if (c) *c= 0;
  else
    strcpy (mypath, ".");

  for (int i=1; i<argc; i++)
  {
    if (argv[i][0]=='-')
    {
      if (argv[i][1])
        switch (argv[i][1])
        {
          case 'h': help (argv [0], true);
          default: help(argv[0], false);
        }
      else
      if (f)
        help(argv[0], false);
      else
        f= stdin;
    }
    else
    if (f)
    {
      if (fdebug) help(argv[0], false);
      else
        fdebug= fopen (argv[i], "w");
    }
    else
      f= fopen (argv[i], "r");
  }
  if (!f) help (argv[0], false);

  init ();
  convert ();
  exit (0);
}

