klex/src/LexFmt.m3


 Copyright (c) 2000 California Institute of Technology 
 All rights reserved. See the file COPYRIGHT for a full description. 
 $Id: LexFmt.m3,v 1.2 2001-09-19 15:05:08 wagner Exp $ 

MODULE LexFmt;
IMPORT Wr, Thread;
IMPORT Rd, TokSpec;
IMPORT LexParse;
IMPORT Text;
IMPORT TextWr;
IMPORT TextSubs;
IMPORT TextList;
IMPORT Bundle, lexformBundle;
IMPORT DFA;
IMPORT Fmt;
IMPORT DFATrans;
IMPORT CharCodes;
FROM Stdio IMPORT stderr;
REVEAL
  T = Public BRANDED OBJECT
    outMN, tokMN: TEXT;
    tok: TokSpec.T;
    lex: LexParse.T;
    dfa: DFA.T;
    form: Bundle.T;
  OVERRIDES
    writeInterface := WriteInterface;
    writeModule := WriteModule;
    test := Test;
  END;
<* FATAL Thread.Alerted, Wr.Failure *>

PROCEDURE New(from: Rd.T; tok: TokSpec.T;
              outMN, tokMN: TEXT): T =
  VAR
    self := NEW(T,
                outMN := outMN,
                tokMN := tokMN,
                tok := tok,
                lex := LexParse.New(from, tok),
                form := lexformBundle.Get());
  BEGIN
    self.dfa := DFA.FromNFA(self.lex.n);
    RETURN self;
  END New;

PROCEDURE Subs(self: T): TextSubs.T =
  VAR
    subs := NEW(TextSubs.T).init();
  BEGIN
    subs.add("\\\n", "");
    subs.add("%lex", self.outMN);
    subs.add("%tok", self.tokMN);
    subs.add("%gen", "(* Generated by klex *)");
    RETURN subs;
  END Subs;

PROCEDURE GuessToken(tok: TokSpec.T; name, tokMN: TEXT): TEXT =
  VAR
    cur := tok.tokens;
    len := Text.Length(name);
    tokName: TEXT;
    tokLen: INTEGER;
    tokLongest: TEXT := NIL;
    tokLongestLen, dummy: INTEGER := 0;
  BEGIN
    WHILE cur # NIL DO
      tokName := cur.head;
      tokLen := Text.Length(tokName);
      IF len >= tokLen THEN
        IF tokLen >= tokLongestLen THEN
          IF Text.Equal(Text.Sub(name, len - tokLen, tokLen), tokName) THEN
            tokLongestLen := tokLen;
            tokLongest := tokName;
          END;
        END;
      END;
      cur := cur.tail;
    END;
    IF tokLongest = NIL THEN
      IF NOT Text.Equal(name, "skip") THEN
        Wr.PutText(stderr, "Warning: " & CharCodes.Q(name) &
          " is not \"skip\" and contains no token suffix\n");
      END;
      RETURN "EVAL self; RETURN NIL";
    ELSE
      IF NOT Text.Equal(name, tokLongest) THEN
        IF tok.constTokens.get(tokLongest, dummy) THEN
        Wr.PutText(stderr, "Warning: " & CharCodes.Q(name) &
          " is constant but not a token\n");
        END;
      END;
      (* RETURN "NEW(" & tokMN & "." & tokLongest & ")"; *)
      RETURN "RETURN " & tokMN & ".NewPT(self.allocate_" & tokLongest &
             ", TYPECODE(" & tokMN & "." & tokLongest & "))";
    END;
  END GuessToken;

PROCEDURE FmtProcs(self: T; form: TEXT;
                   findDefault: BOOLEAN := FALSE;
                   constCodes: BOOLEAN := FALSE): TEXT =
  VAR
    cur := self.lex.names;
    acc := "";
    subs: TextSubs.T;
    i, dummy: INTEGER := 0;
  BEGIN
    WHILE cur # NIL DO
      INC(i);
      IF constCodes = self.tok.constTokens.get(cur.head, dummy) THEN
        IF NOT findDefault OR NOT Text.Equal(cur.head, "char") THEN
          subs := NEW(TextSubs.T).init();
          subs.add("%tok", self.tokMN);
          subs.add("%name", cur.head);
          subs.add("%code", Fmt.Int(i));
          IF findDefault THEN
            subs.add("%default", GuessToken(self.tok, cur.head, self.tokMN));
          END;
          acc := acc & subs.apply(form);
        END;
      END;
      cur := cur.tail;
    END;
    RETURN acc;
  END FmtProcs;

PROCEDURE WriteInterface(self: T; to: Wr.T) =
  VAR
    subs := Subs(self);
  BEGIN
    subs.add("%methods", FmtProcs(self, "    %name(): Token;\n"));
    Wr.PutText(to, subs.apply(Bundle.Get(self.form, "lexform.i3")));
  END WriteInterface;

PROCEDURE FmtTrans(trans: DFATrans.T): TEXT =
  BEGIN
    RETURN Fmt.Int(ORD(trans.keyBegin)) & "," &
           Fmt.Int(ORD(trans.keyEnd)) & "," &
           Fmt.Int(trans.target) & "," &
           Fmt.Int(trans.prio);
  END FmtTrans;

TYPE
  TableKind = {First, States, Trans};
PROCEDURE FmtTable(self: T; kind: TableKind): TEXT =
  CONST
    lmargin = "    ";
  VAR
    dfa := self.dfa;
    wr := TextWr.New();
    lineLen := 0;
  PROCEDURE PutEntry(t: TEXT) =
    VAR
      len := Text.Length(t);
    BEGIN
      IF lineLen + len > 71 THEN
        lineLen := 0;
        Wr.PutText(wr, ",\n" & lmargin);
      END;
      IF lineLen # 0 THEN
        Wr.PutText(wr, ", ");
        lineLen := lineLen + 2;
      END;
      Wr.PutText(wr, t);
      lineLen := lineLen + len;
    END PutEntry;
  BEGIN
    Wr.PutText(wr, lmargin);
    CASE kind OF
    | TableKind.First =>
      FOR i := FIRST(CHAR) TO LAST(CHAR) DO
        PutEntry(Fmt.Int(ORD(dfa.first[i])));
      END;
    | TableKind.States =>
      FOR i := 1 TO dfa.numStates DO
        WITH state = dfa.statesArray[i] DO
          PutEntry("S{" & FmtTrans(state.next.head) & "," &
            Fmt.Int(state.output) & "}");
        END;
      END;
    | TableKind.Trans =>
      FOR i := 1 TO dfa.numTrans DO
        PutEntry("X{" & FmtTrans(dfa.transArray[i]) & "}");
      END;
    END;
    RETURN TextWr.ToText(wr);
  END FmtTable;

PROCEDURE CountBits(maxVal: INTEGER): INTEGER =
  VAR
    bits: INTEGER := 0;
    bitsRep: INTEGER := 1;
  BEGIN
    WHILE bitsRep <= maxVal DO
      INC(bits);
      bitsRep := bitsRep + bitsRep;
    END;
    RETURN bits;
  END CountBits;

PROCEDURE AddIntRange(subs: TextSubs.T; key: TEXT; maxVal, bits: INTEGER) =
  BEGIN
    subs.add(key & "Val", Fmt.Int(maxVal));
    subs.add(key & "Bits", Fmt.Int(bits));
  END AddIntRange;

PROCEDURE AddIntRanges(self: T; subs: TextSubs.T) =
  VAR
    dfa := self.dfa;
    bitsAlready: INTEGER := 16; (*two bytes already in the record*)
    names := ARRAY[1..3] OF TEXT{"%lastStateRef",
                                 "%lastTransRef",
                                 "%lastOut"};
    maxVals := ARRAY[1..3] OF INTEGER{dfa.numStates,
                                      dfa.numTrans,
                                      TextList.Length(self.lex.names)};
    bits: ARRAY [1..3] OF INTEGER;
  BEGIN
    FOR i := 1 TO 3 DO
      bits[i] := CountBits(maxVals[i]);
    END;
    (*SRC restriction: packed types cannot cross word boundary*)
    FOR i := 1 TO 3 DO
      IF bitsAlready + bits[i] > 32 THEN
        <* ASSERT i # 1 *> (* >64K states? *)
        bits[i-1] := bits[i-1] + 32 - bitsAlready;
        bitsAlready := 0;
      END;
      INC(bitsAlready, bits[i]);
    END;
    FOR i := 1 TO 3 DO
      AddIntRange(subs, names[i], maxVals[i], bits[i]);
    END;

  END AddIntRanges;

PROCEDURE WriteModule(self: T; to: Wr.T) =
  VAR
    procForm := Bundle.Get(self.form, "lexform.proc.m3");
    subs := Subs(self);
  BEGIN
    subs.add("%ovr", FmtProcs(self, "    %name := %name;\n"));
    subs.add("%case",
             FmtProcs(self, "      | %code => result := self.%name();\n") &
             FmtProcs(self, "      | %code => result := %tok.NewConstToken(" &
             "%tok.%name);\n", FALSE, TRUE));
    subs.add("%default", FmtProcs(self, procForm, TRUE));
    subs.add("%alloc", self.tok.fmtVar("    allocate_%name: " &
      self.tokMN & ".Allocator := NIL;\n"));
    subs.add("%purge", self.tok.fmtVar("\n    + " &
      self.tokMN & ".Purge(self.allocate_%name)"));
    subs.add("%First", FmtTable(self, TableKind.First));
    subs.add("%States", FmtTable(self, TableKind.States));
    subs.add("%Trans", FmtTable(self, TableKind.Trans));
    AddIntRanges(self, subs);
    Wr.PutText(to, subs.apply(Bundle.Get(self.form, "lexform.m3")));
  END WriteModule;

PROCEDURE Test(self: T) =
  BEGIN
    DFA.Test(self.dfa);
  END Test;

BEGIN
END LexFmt.