{   obcc, a small oberoncompiler for the C16x-family
      based on Oberon-0 by N. Wirth

    Copyright Guido B. (user "guido-b" at http://www.mikrocontroller.net)

    obcs.pas, the scanner, is part of obcc.
    
    obcc is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation, either version 3 of the License, or
    (at your option) any later version.

    obcc is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with obcc.  If not, see <http://www.gnu.org/licenses/>.                       }

unit obcs; { Der Scanner }

interface
uses obcd; { Deklarationen einbinden }

CONST 	KW = 34;  { Idlen ist jetzt in obcd }
	(*symbols*) null_ = 0;
	times_ = 1; div_ = 3; mod_ = 4; and_ = 5; plus_ = 6; minus_ = 7; 
	andb_ = 8; orb_ = 9; xorb_ = 10; or_ = 11;
	eql_ = 12; neq_ = 13; lss_ = 14; geq_ = 15; leq_ = 16; gtr_ = 17;	
	period_ = 18; comma_ = 19; colon_ = 20; rparen_ = 22; rbrak_ = 23; 
	of_ = 25; then_ = 26; do_ = 27; to_ = 28;
	lparen_ = 29; lbrak_ = 30; not_ = 32; becomes_ = 33; number_ = 34; ident_ = 37; 
	semicolon_ = 38; end_ = 40; else_ = 41; elsif_ = 42; until_ = 43;
	if_ = 44; for_ = 45; while_ = 46; repeat_ = 47;
	array_ = 54; record_ = 55;
	sfr_ = 56; const_ = 57; type_ = 58; var_ = 59; procedure_ = 60; procedureR_ = 61;
	begin_ = 62; 
	module_ = 63; eof_ = 64; str_ = 66; import_ = 67;
	in_ = 68; upto_ = 69; zero_ = 70; lbrace_ = 71; rbrace_ = 72; return_ = 73;
	pointer_ = 74;

VAR 	val: LONGINT;
	id: Ident;
	error: BOOLEAN;
	ch: CHAR;
	nkw: INTEGER;
	errpos: LONGINT;
	keyTab  : ARRAY [1..KW] OF
          RECORD sym: INTEGER; id: String[12]; end;

	Tmod1, Tlog, Taus, Tinc: Text;
	Tpos: Integer;
	incfileopen: BOOLEAN; incpos: INTEGER;
	MainFilename, IncFilename: String;

PROCEDURE Mark(msg: String);
PROCEDURE Warn(msg: String);
PROCEDURE Get(VAR sym: INTEGER);
PROCEDURE Init(Filename:String; pos: LONGINT);
PROCEDURE closefiles;
FUNCTION Readmod(var ch:char):Boolean;

implementation

PROCEDURE Mark(msg: String);
VAR p: LONGINT;
BEGIN p := TPos;
  IF p > errpos THEN 
    IF incfileopen THEN Begin
      Writeln(Tlog, '  Line ', p:4, '/',incpos:4,' : ', msg);
      Writeln(IncFilename,'(', incpos, ',1) Error: ', msg); 
    END
    ELSE Begin 
      Writeln(Tlog, '  Line ', p:4, ' : ', msg);
      Writeln(MainFilename,'(', p, ',1) Error: ', msg); 
    End;
  errpos := p; error := TRUE
END {Mark};


PROCEDURE Warn(msg: String);
VAR p: LONGINT;
BEGIN p := TPos;
  IF incfileopen THEN
    Writeln('Warning:  Line ', p:4, '/',incpos:4,' : ', msg)
  ELSE Writeln('Warning: Line ', p:4, ' : ', msg); 
END {Warn};

Function Cap(c:Char):Char;
BEGIN 	if (ord(c) > 96) AND (ord(c)<123) then
	  CAP :=  chr(ord(c) - 32)
	else CAP := c;
end;

FUNCTION Readmod(var ch:char):Boolean;

BEGIN Readmod := TRUE;
      IF incfileopen  THEN BEGIN
        IF eof(Tinc) THEN BEGIN
          close(Tinc); incfileopen := FALSE; ch := chr(59); { semicolon } END
        ELSE
          IF eoln(Tinc) THEN BEGIN
            readln(Tinc); incpos := incpos + 1; ch := chr(13); END
          ELSE read(Tinc, ch);
      END
      ELSE BEGIN
	if eof(Tmod1) then 
	  IF ch <>'.' THEN BEGIN
	     Mark('unexpected EOF'); close(Tlog); close(Taus); halt(1);
	  END
	  ELSE BEGIN
	    ch := ' ';
	  END
	else if eoln(Tmod1) then begin
	  readln(Tmod1); Tpos := Tpos +1; ch := chr(13); 
	end
	else Begin
	  Read(Tmod1, ch);
	  If UseUpper then ch := Cap(ch); End;
      END;
END {Readmod};

PROCEDURE Get(VAR sym: INTEGER);

var ok: BOOLEAN;

	PROCEDURE Ident;
	  VAR i, k: INTEGER; s: string;
	BEGIN id:='';
	  REPEAT 
	    IF length(id) < IdLen THEN id := id + ch;
	    ok := Readmod(ch);
	  UNTIL (ch < '0') OR (ch > '9') AND (CAP(ch) < 'A') OR (CAP(ch) > 'Z') AND (ch <> '_'); 
	  If id = 'INSERT' THEN 
	    IF incfileopen THEN Mark('Nested INSERT!')
	    ELSE BEGIN
	      s := '';
	      REPEAT ok := Readmod(ch); 
	        IF (ch <>';') THEN s := s + ch;
	      UNTIL (ch = ';');
	      {$i-}
	      assign(Tinc, s); reset(Tinc); {$i+}
	      i := IOresult; 
	      IF i = 0 THEN BEGIN 
	        incfileopen := TRUE; incpos := 1; IncFilename := s;
	        sym := semicolon_; ok := Readmod(ch);
	        Writeln('       INSERT: ', s); END
	      ELSE mark('File not found: ' + s);
	    END
	  ELSE BEGIN
	    IF id = 'PROCEDURE' THEN
	      IF ch = '*' THEN BEGIN
	        id := id + '*'; ok := Readmod(ch); END;
	    k := 0;
	    WHILE (k < nkw) AND (id <> keyTab[k].id) DO INC(k);
	    IF k < nkw THEN sym := keyTab[k].sym ELSE sym := ident_;
	  END;
	END {Ident};

	PROCEDURE Number;
	BEGIN val := 0; sym := number_;
	  REPEAT
	    IF val <= ($FFFF - ORD(ch) + ORD('0')) DIV 10 THEN
	      val := 10 * val + (ORD(ch) - ORD('0'))
	    ELSE BEGIN Mark('number too large'); val := 0
	    END ;
	    ok:=Readmod(ch);
	  UNTIL (ch < '0') OR (ch > '9')
	END {Number};

	PROCEDURE HexNumber;
	  VAR i:integer;
	BEGIN val := 0; sym := number_;
	  REPEAT
	    ch := cap(ch); 
	    IF (ch <= '9') THEN  i := ord(ch) - ord('0')
	    ELSE i := ord(ch) - ord('A') + 10;
	    IF (i >= 0) AND (i < 16) THEN BEGIN
	      IF val <= ($FFFF - i) DIV 16 THEN
	        val := val * 16 + i
	      ELSE Mark('number too large');
	    END
	    ELSE BEGIN mark(' illegal hexnumber'); val := 0; END;
	    ok := Readmod(ch);
	  UNTIL (ch < '0') OR ((ch > '9') AND (ch < 'A')) or (ch > 'F');
	END {HexNumber};

	PROCEDURE BinNumber;
	BEGIN val := 0; sym := number_;
	  REPEAT
	    IF (ch = '0') or (ch = '1') THEN BEGIN
	      IF val <= ($FFFF - ORD(ch) + ORD('0')) DIV 2 THEN
	        val := 2 * val + (ORD(ch) - ORD('0'))
	      ELSE BEGIN Mark('number too large'); val := 0 END;
	    END
	    ELSE BEGIN Mark('illegal binnumber'); val := 0; END;
	    ok:=Readmod(ch);
	  UNTIL (ch < '0') OR (ch > '1');
	END{BinNumber};
	
	PROCEDURE do_String;
	BEGIN
	  id := ''; sym := str_;
	  WHILE (ch <> '"') DO BEGIN
	    id := id + ch; ok := Readmod(ch);
	    IF (length(id) = Idlen - 1) AND (ch <> '"') THEN BEGIN
	      Mark('string too long!'); exit; end;
	  END;
	  IF ODD(LENGTH(id)) THEN  id := id + chr(0)
	  ELSE id := id + chr(0) + chr(0);
	  ok := readmod(ch);
	END {do_String};
	
	PROCEDURE Comment;
	BEGIN ok:=Readmod(ch);
	  REPEAT
	    REPEAT
	      while (ch = '(') do begin
	        ok := Readmod(ch); if (ch = '*') then comment; end;
	        ok := Readmod(ch);
	    UNTIL (ch = '*') OR (not ok);
	    if ok then ok := Readmod(ch);
	  UNTIL (ch = ')') OR (not ok);
	  if not ok then mark('comment not terminated')
	  else ok := Readmod(ch);	
	END {Comment};


BEGIN {Get} ok := TRUE;
	While ok AND (ORD(ch) <= 32) do ok := Readmod(ch);
	If NOT ok then sym:=eof_
	else 
	CASE ch OF
		'&': BEGIN ok:=Readmod(ch); sym := and_; END;
		'*': BEGIN ok:=Readmod(ch); sym := times_; END;
		'+': BEGIN ok:=Readmod(ch); sym := plus_; END;
		'-': BEGIN ok:=Readmod(ch); sym := minus_; END;
		'=': BEGIN ok:=Readmod(ch); sym := eql_; END;
		'#': BEGIN ok:=Readmod(ch); sym := neq_; END;
		'<': BEGIN ok:=Readmod(ch);
			if ch = '=' then begin 
			  ok:=Readmod(ch); sym := leq_; end
			else sym := lss_;
		     end;
		'>': BEGIN ok:=Readmod(ch); 
			if ch = '=' then begin
			  ok:=Readmod(ch); sym := geq_; end
			else sym := gtr_;
		     end;
		';': BEGIN ok:=Readmod(ch); sym := semicolon_; END;
		',': BEGIN ok:=Readmod(ch); sym := comma_; END;
		':': BEGIN ok:=Readmod(ch);
			if ch = '=' then begin 
			  ok:=Readmod(ch); sym := becomes_; end
			else sym := colon_;
		     end;
		'.': BEGIN ok:=Readmod(ch); 
			  IF ch = '.' THEN BEGIN
			    sym := upto_; ok := ReadMod(ch); END
			  ELSE sym := period_; END;
		'(': BEGIN ok:=Readmod(ch);
			if ch = '*' then begin 
			  Comment; Get(sym); end
			else sym := lparen_;
		     end;
		')': BEGIN ok:=Readmod(ch); sym := rparen_; END;
		'[': BEGIN ok:=Readmod(ch); sym := lbrak_; END;
		']': BEGIN ok:=Readmod(ch); sym := rbrak_; END;
		'{': BEGIN ok:=ReadMod(ch); sym := lbrace_; END;
		'}': BEGIN ok:=ReadMod(ch); sym := rbrace_; END;
		'$': BEGIN ok := Readmod(ch); Hexnumber; END;
		'%': BEGIN ok := Readmod(ch); BinNumber; END;
		'"': BEGIN ok := Readmod(ch); do_String; END;
		'0'..'9': Number;
		'A'..'Z', 'a'..'z', '_': Ident;
		'~': BEGIN ok:=Readmod(ch); sym := not_; END;
		else begin ok:=Readmod(ch); sym := null_; end;
	end;
END; {Get}

PROCEDURE Init(Filename:String; pos: LONGINT);

BEGIN 
	error := FALSE; errpos := pos; 
	Assign(Tmod1, Filename+'.mod'); reset(Tmod1);
	Assign(Tlog, filename+'.log'); rewrite(Tlog);
	Assign(Taus, filename+'.asm'); rewrite(Taus);
	MainFilename := Filename + '.mod';
	Read(Tmod1,ch);
	Tpos := 1;
END {Init};

PROCEDURE EnterKW(sym: INTEGER; name: String);

BEGIN keyTab[nkw].sym := sym; keyTab[nkw].id:= Name; INC(nkw)
END {EnterKW};


PROCEDURE closefiles;
Begin close(Taus); close(Tlog); if errpos = 0 then erase(Tlog);
  close(Tmod1);
end;

initialization

	error := TRUE; nkw := 1;  errpos := 0;
	incfileopen := FALSE;
	
	EnterKW(do_, 'DO');
	EnterKW(if_, 'IF');
	EnterKW(of_, 'OF');
	EnterKW(or_, 'OR');
	EnterKW(xorb_, 'XORB');
	EnterKW(orb_, 'ORB');
	EnterKW(andb_, 'ANDB');
	EnterKW(end_, 'END');
	EnterKW(mod_, 'MOD');
	EnterKW(var_, 'VAR');

	EnterKW(else_, 'ELSE');
	EnterKW(then_, 'THEN');
	EnterKW(type_, 'TYPE');	
	EnterKW(array_, 'ARRAY');
	EnterKW(begin_, 'BEGIN');
	EnterKW(const_, 'CONST');
	EnterKW(elsif_, 'ELSIF');
	EnterKW(while_, 'WHILE');
	EnterKW(record_, 'RECORD');
	EnterKW(procedure_, 'PROCEDURE');

	EnterKW(procedureR_, 'PROCEDURE*');
	EnterKW(div_, 'DIV');
	EnterKW(module_, 'MODULE');
	EnterKW(for_, 'FOR');
	EnterKW(repeat_, 'REPEAT');
	EnterKW(to_, 'TO');
	EnterKW(until_, 'UNTIL');
	EnterKW(sfr_, 'SFR');
	EnterKW(import_, 'IMPORT');
	EnterKW(in_, 'IN');
	EnterKW(return_, 'RETURN');
	EnterKW(pointer_, 'POINTER');

end.