klexlib/derived/RegExpParse.m3


MODULE RegExpParse;
Generated by kyacc
IMPORT RegExpTok;
IMPORT IntIntTbl, IntTextTbl;
IMPORT RTType;
IMPORT Env, Thread, Wr, Fmt, Rd;
FROM Stdio IMPORT stdout;
FROM RegExpTok IMPORT NewPT;
<* FATAL Wr.Failure, Thread.Alerted *>

TYPE
  TypedSymbol = RECORD
    code: INTEGER;
    value: RegExpTok.ParseType;
  END;
CONST
  EOFSymbol = TypedSymbol{code := 0, value := NIL};
  NoToken = TypedSymbol{code := -1, value := NIL};
  NotASymbol = TypedSymbol{code := -1000, value := NIL};

TYPE
  StackElem = RECORD
    state: INTEGER;
    value: TypedSymbol;
  END;
  StackElemArray = REF ARRAY OF StackElem;

  Stack = RECORD
    a: StackElemArray;
    ptr: INTEGER;
  END;

REVEAL
  T = Public BRANDED "RegExpParse" OBJECT
    lex: RegExpTok.Lexer;
    tokenLookup: IntIntTbl.T := NIL; (* M3 type code -> SymCode *)
    symbols: IntTextTbl.T;           (* SymCode -> name *)
    allocate_expr: RegExpTok.Allocator;
  OVERRIDES
    setLex := SetLex;
    parse := Parse;
    purge := Purge;
    paren_expr := paren_expr;
    concat_expr := concat_expr;
    or_expr := or_expr;
    plus_expr := plus_expr;
    star_expr := star_expr;
    quest_expr := quest_expr;
    repeat_expr := repeat_expr;
    ident_expr := ident_expr;
    string_expr := string_expr;
    charRange_expr := charRange_expr;
  END;

TYPE
  SymCode = BITS 9 FOR [0..262];
  (* symbol code:  0 .. 261
     set default:  262 *)

  Action = BITS 6 FOR [0..32];
  (* error:        -1   (not stored in table)
     shift:        1 .. 10
     accept:       11
     reduce:       12 .. 21
     shift&accept: 22
     shift&reduce: 23 .. 32  *)

  StateRef = BITS 5 FOR [0..28];
  (* no more:      0
     next state:   1..28 *)

  S = RECORD
    key: SymCode;
    action: Action;
    next: StateRef;
  END;

  R = RECORD
    length: INTEGER;
    returnCode: INTEGER;
    name: TEXT;
  END;

  Y = RECORD
    code: INTEGER;
    name: TEXT;
  END;

CONST
  States = ARRAY [1..28] OF S {
    S{257,3,11}, S{257,8,11}, S{262,11,12}, S{257,9,11}, S{262,13,13},
    S{257,10,11}, S{262,13,14}, S{41,23,15}, S{262,14,16}, S{262,14,17},
    S{40,2,18}, S{124,6,17}, S{257,5,19}, S{257,7,19}, S{124,4,16},
    S{257,7,20}, S{257,5,20}, S{259,30,21}, S{42,27,22}, S{42,27,23},
    S{260,31,24}, S{43,26,25}, S{43,26,26}, S{261,32,0}, S{63,28,27},
    S{63,28,28}, S{258,29,0}, S{258,29,11}};

  Rules = ARRAY [12..21] OF R {
    R{3, 257, "paren_expr : '(' expr ')'"},
    R{2, 257, "concat_expr : expr expr"},
    R{3, 257, "or_expr : expr '|' expr"},
    R{2, 257, "plus_expr : expr '+'"},
    R{2, 257, "star_expr : expr '*'"},
    R{2, 257, "quest_expr : expr '?'"},
    R{2, 257, "repeat_expr : expr COUNT"},
    R{1, 257, "ident_expr : IDENTIFIER"},
    R{1, 257, "string_expr : STRING"},
    R{1, 257, "charRange_expr : CHAR_RANGE"}
};

  Symbols = ARRAY [1..18] OF Y {
    Y{0,"EOF"}, Y{10,"'\\n'"}, Y{40,"'('"}, Y{41,"')'"}, Y{42,"'*'"},
    Y{43,"'+'"}, Y{45,"'-'"}, Y{63,"'?'"}, Y{91,"'['"}, Y{93,"']'"},
    Y{94,"'^'"}, Y{124,"'|'"}, Y{256,"ERROR"}, Y{257,"expr"}, Y{258,"COUNT"},
    Y{259,"IDENTIFIER"}, Y{260,"STRING"}, Y{261,"CHAR_RANGE"}};

VAR
  Debug := Env.Get("RegExpParseDEBUG") # NIL;

PROCEDURE SetLex(self: T; lex: RegExpTok.Lexer): T =
  BEGIN self.lex := lex; RETURN self; END SetLex;

PROCEDURE Init(self: T) =
  BEGIN (* called on first parse *)
    self.tokenLookup := NEW(IntIntTbl.Default).init(18);
    IF Debug THEN
      self.symbols := NEW(IntTextTbl.Default).init(18);
      FOR i := 1 TO 18 DO
        EVAL self.symbols.put(Symbols[i].code, Symbols[i].name);
      END;
    END;
  END Init;

PROCEDURE NextToken(self: T): TypedSymbol =
  VAR
    symCode, m3code: INTEGER;
    token: RegExpTok.Token;
    found := FALSE;
  BEGIN
    TRY
      token := self.lex.get();
    EXCEPT
      Rd.EndOfFile => RETURN EOFSymbol;
    END;
    m3code := TYPECODE(token);
    IF NOT self.tokenLookup.get(m3code, symCode) THEN
      REPEAT
        m3code := RTType.Supertype(m3code);
        IF m3code = RTType.NoSuchType THEN
          TYPECASE token OF
          | ConstToken => symCode := -1;
          | COUNT => symCode := 258;
          | CHAR_RANGE => symCode := 261;
          | STRING => symCode := 260;
          | IDENTIFIER => symCode := 259;
          ELSE
            <* ASSERT FALSE *>
          END;
          found := TRUE;
        ELSE
          found := self.tokenLookup.get(m3code, symCode);
        END;
      UNTIL found;
      EVAL self.tokenLookup.put(TYPECODE(token), symCode);
    END;
    IF symCode = -1 THEN
      symCode := NARROW(token, ConstToken).val;
    END;
    RETURN TypedSymbol{code := symCode, value := token};
  END NextToken;

PROCEDURE AllocStack(): Stack =
  VAR
    a :=NEW(StackElemArray, 16);
  BEGIN
    a[0] := StackElem{state := 1, value := EOFSymbol};
    RETURN Stack{a := a, ptr := 0};
  END AllocStack;

PROCEDURE Push(VAR stack: Stack; elem: StackElem) =
  VAR
    new: StackElemArray;
  BEGIN
    INC(stack.ptr);
    IF stack.ptr > LAST(stack.a^) THEN
      new := NEW(StackElemArray, NUMBER(stack.a^) * 2);
      SUBARRAY(new^, 0, NUMBER(stack.a^)) := stack.a^;
      stack.a := new;
    END;
    stack.a[stack.ptr] := elem;
  END Push;

PROCEDURE ActionLookup(curState: INTEGER; symbol: TypedSymbol): INTEGER =
  VAR
    cur := curState;
    state: S;
    default := -1;
  BEGIN
    REPEAT
      state := States[cur];
      IF state.key = 262 THEN
        default := state.action;
      ELSIF state.key = symbol.code THEN
        RETURN state.action;
      END;
      cur := state.next;
    UNTIL cur = 0;
    RETURN default;
  END ActionLookup;

PROCEDURE Parse(self: T; exhaustInput: BOOLEAN := TRUE): StartType =
  VAR
    curState: INTEGER := 1;
    stack := AllocStack();
    action: INTEGER;
    symbol, preservedToken: TypedSymbol;
    skipTokenGets: INTEGER := 0;

  PROCEDURE DebugPrint(message: TEXT) = BEGIN
    IF Debug THEN Wr.PutText(stdout,"RegExpParseDEBUG: "&message&"\n");
    END;END DebugPrint;
  PROCEDURE DebugSymbol(message: TEXT) = VAR name: TEXT; BEGIN
   IF Debug THEN EVAL self.symbols.get(symbol.code, name);
    DebugPrint(message & " " & name & "(" &
      Fmt.Int(symbol.code) & ")"); END; END DebugSymbol;
  PROCEDURE DebugState(message: TEXT) = BEGIN IF Debug THEN
    DebugPrint(message & " " & Fmt.Int(curState));END;END DebugState;
  PROCEDURE DebugRule(message: TEXT) = BEGIN IF Debug THEN
    DebugPrint(message&" "&Rules[action].name);END;END DebugRule;

  BEGIN
    IF self.tokenLookup = NIL THEN Init(self); END;
    stack.a[0] := StackElem{state := curState, value := NotASymbol};
    DebugState("starting in state");
    LOOP
      IF skipTokenGets = 2 THEN
        skipTokenGets := 1;
        DebugSymbol("scanning reduced symbol");
      ELSIF skipTokenGets = 1 AND preservedToken # NoToken THEN
        skipTokenGets := 0;
        symbol := preservedToken;
        DebugSymbol("re-scanning input token");
      ELSE
        skipTokenGets := 0;
        symbol := NextToken(self);
        preservedToken := symbol;
        DebugSymbol("input token");
      END;
      action := ActionLookup(curState, symbol);
      IF action >= 22 THEN
        DebugPrint("shifting anonymously");
        Push(stack, StackElem{state := 0, value := symbol});
        DEC(action, 11);
        IF skipTokenGets = 0 THEN
          preservedToken := NoToken;
        END;
      END;
      IF action = -1 THEN
        DebugPrint("syntax error");
        self.lex.error("RegExpParse: syntax error");RETURN NIL;
      ELSIF action <= 10 THEN
        curState := action;
        DebugState("shifting to state");
        Push(stack, StackElem{state := curState, value := symbol});
      ELSIF action = 11 THEN
        DebugPrint("parsing stopped with singleton start symbol on stack");
        <* ASSERT stack.ptr = 1 *>
        IF exhaustInput AND preservedToken = NoToken THEN
          symbol := NextToken(self);
          DebugPrint("getting token to check that it's an EOF");
        END;
        IF symbol.code # 0 THEN
          IF exhaustInput THEN
            DebugPrint("Error: last token was not EOF");
            self.lex.error("RegExpParse: syntax error (parsing stopped before EOF)");
            RETURN NIL;
          END;
          IF preservedToken # NoToken THEN
            self.lex.unget();
            DebugPrint("ungetting last token");
          END;
        END;
        symbol := stack.a[1].value;
        DebugSymbol("returning symbol");
        RETURN symbol.value;
      ELSE
        DebugRule("reducing by rule");
        WITH p=stack.ptr, a=stack.a, v=symbol.value, l=Rules[action].length DO
          CASE action OF
          | 12 => VAR w: expr := NIL;
            p1:expr:=a[p-1].value.value;
            BEGIN self.paren_expr(w, p1); v:=w; END;
          | 13 => VAR w: expr := NIL;
            p1:expr:=a[p-1].value.value;p2:expr:=a[p].value.value;
            BEGIN self.concat_expr(w, p1, p2); v:=w; END;
          | 14 => VAR w: expr := NIL;
            p1:expr:=a[p-2].value.value;p2:expr:=a[p].value.value;
            BEGIN self.or_expr(w, p1, p2); v:=w; END;
          | 15 => VAR w: expr := NIL;
            p1:expr:=a[p-1].value.value;
            BEGIN self.plus_expr(w, p1); v:=w; END;
          | 16 => VAR w: expr := NIL;
            p1:expr:=a[p-1].value.value;
            BEGIN self.star_expr(w, p1); v:=w; END;
          | 17 => VAR w: expr := NIL;
            p1:expr:=a[p-1].value.value;
            BEGIN self.quest_expr(w, p1); v:=w; END;
          | 18 => VAR w: expr := NIL;
            p1:expr:=a[p-1].value.value;p2:COUNT:=a[p].value.value;
            BEGIN self.repeat_expr(w, p1, p2); v:=w; END;
          | 19 => VAR w: expr := NIL;
            p1:IDENTIFIER:=a[p].value.value;
            BEGIN self.ident_expr(w, p1); v:=w; END;
          | 20 => VAR w: expr := NIL;
            p1:STRING:=a[p].value.value;
            BEGIN self.string_expr(w, p1); v:=w; END;
          | 21 => VAR w: expr := NIL;
            p1:CHAR_RANGE:=a[p].value.value;
            BEGIN self.charRange_expr(w, p1); v:=w; END;
          ELSE
            <* ASSERT FALSE *>
          END;
          FOR i := p - l + 1 TO p DO a[i].value.value.discard(); END;
          DEC(p, l);
          curState := a[p].state;
        END;
        DebugState("popping to state");
        symbol.code := Rules[action].returnCode;
        skipTokenGets := 2;
      END;
    END;
  END Parse;

PROCEDURE Purge(self: T): INTEGER =
  BEGIN
    RETURN 0
      + RegExpTok.Purge(self.allocate_expr);
  END Purge;
default methods
PROCEDURE paren_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: expr) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END paren_expr;

PROCEDURE concat_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: expr;<*UNUSED*>p2: expr) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END concat_expr;

PROCEDURE or_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: expr;<*UNUSED*>p2: expr) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END or_expr;

PROCEDURE plus_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: expr) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END plus_expr;

PROCEDURE star_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: expr) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END star_expr;

PROCEDURE quest_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: expr) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END quest_expr;

PROCEDURE repeat_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: expr;<*UNUSED*>p2: COUNT) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END repeat_expr;

PROCEDURE ident_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: IDENTIFIER) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END ident_expr;

PROCEDURE string_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: STRING) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END string_expr;

PROCEDURE charRange_expr(self: T;
 VAR result: expr;<*UNUSED*>p1: CHAR_RANGE) = BEGIN
 IF result=NIL THEN
   result:=NewPT(self.allocate_expr,TYPECODE(expr));
 END;END charRange_expr;

BEGIN
END RegExpParse.