Friday 3 August 2012

Natural/ADABAS parser example

options {
IGNORE_CASE = true;
DEBUG_PARSER = false;
DEBUG_LOOKAHEAD = false;
DEBUG_TOKEN_MANAGER = false;
LOOKAHEAD = 2;
FORCE_LA_CHECK=true;
}
PARSER_BEGIN(ARSparser)

public class ARSparser {
//Parser execution
public static void main ( String args [ ] ) {
//Parser initialization
ARSparser parser;

if(args.length == 0){
System.out.println ("\nARSparser: GIRIS OKUNUYOR ...\n\n");
parser = new ARSparser(System.in);
} else if(args.length == 1){
System.out.println ("ARSparser: Reading the file " + args[0] + " ..." );
try {
parser = new ARSparser(new java.io.FileInputStream(args[0]));
} catch(java.io.FileNotFoundException e) {
System.out.println ("ARSparser: The file " + args[0] + " was not found.");
return;
}
} else {
System.out.println ("ARSparser: You must use one of the following:");
System.out.println (" java ARSparser < file");
System.out.println ("Or");
System.out.println (" java ARSparser file");
return ;
}

try {
SimpleNode n = parser.Start();
//n.dump("");
System.out.println ("\n\nARSparser: PARSE ISLEMI BASARILDI");
} catch(ParseException e){
System.out.println ("ARSparser: There was an error during the parse.");
System.out.println (e.getMessage());
} catch(TokenMgrError e){
System.out.println ("ARSparser: There was an error.");
System.out.println (e.getMessage());
}
}

public static String indentString(int l) {
StringBuffer sb = new StringBuffer();
for (int i = 0; i < l; ++i) {
sb.append(' ');
}
return sb.toString();
}
}
PARSER_END(ARSparser)

//STRUCTURES AND CHARACTERS TO SCAPE

SKIP : {
" "
| "\t"
// | "\n"
// | "\r"
// | "*\r\n"
// | <"rem" (~["\n","\r"])* ("\n" | "\r" | "\r\n")>
}

//STATIC TOKENS
TOKEN : {
<INTEGER_CONSTANT: (<DIGIT>)+>
| <LOGIC_CONSTANT: "true" | "false" | "-1">
| <#DIGIT: ["0"-"9"]>
}

//RESERVED WORDS
TOKEN : {
/* A */
<AND: "and">
| <ADD: "add">
| <ALARM: "alarm">
| <AT_END_OF_DATA: "at end of data">
| <AT_BREAK_OF: "at break of">
| <AT_TOP_OF_PAGE: "at top of page">
/* B */
| <BY: "by">
| <BOTTOM: "bottom">
/* C */
| <COMPUTE: "compute">
| <COMMENT: "CCMMNNTT">
| <CONTROL: "control">
/* D */
| <DEFINE: "define">
| <DATA: "data">
| <DISPLAY: "display">
/* E */
| <ELSE: "else">
| <END: "end">
| <END_BREAK: "end-break">
| <END_DEFINE: "end-define">
| <END_ENDDATA: "end-enddata">
| <END_FIND: "end-find">
| <END_FOR: "end-for">
| <END_IF: "end-if">
| <END_READ: "end-read">
| <END_ALL: "end-all">
| <END_NOREC: "end-norec">
| <END_REPEAT: "end-repeat">
| <END_TOPPAGE: "end-toppage">
| <ESCAPE: "escape">
/* F */
| <FIND: "find">
| <FROM: "from">
| <FOR: "for">
| <FORMAT: "format">
/* G */
| <GET: "get">
/* I */
| <IF: "if">
| <IN: "in">
| <INPUT : "input">
/* K */
| <KEY: "key">
/* L */
| <LIMIT: "limit">
| <LOGICAL: "logical">
| <LOCAL: "local">
/* M */
| <MOVE: "move">
| <MAP: "map">
| <MARK: "mark">
/* N */
| <NAMED: "named">
| <NOTITLE: "notitle">
| <NOHDR: "nohdr">
| <NOT: "not">
| <NO_REC_FOUND: "no record found">
| <NO_ERASE: "no erase">
/* O */
| <OR: "or">
/* P */
| <PERFORM: "perform">
/* R */
| <READ: "read">
| <RECORD: "record">
| <REPEAT: "repeat">
| <RESET: "reset">
| <REINPUT: "reinput">
| <ROUTINE: "routine">
/* S */
| <SET: "set">
| <SIZE: "size">
| <SORTED: "sorted">
| <STORE: "store">
| <SUBTRACT: "subtract">
| <SUBROUTINE: "subroutine">
/* T */
| <TO: "to">
| <TOP: "top">
| <THRU: "thru">
/* U */
| <USING: "using">
| <UPDATE: "update">
/* W */
| <WINDOW: "window">
| <WITH: "with">
| <WHERE: "where">
| <WRITE: "write">
}
TOKEN :
{
<SG : ("SG=" ("OFF" | "ON" )+)>
| <LS : ("LS=" (<INTEGER_CONSTANT>)+)>
| <PS : ("PS=" (<INTEGER_CONSTANT>)+)>

| <AD : ("AD=" (("M'_'") | ("MI'_'") | "P" | "I")) >
| <ASSIGNCHARS : <COLONCHAR><EQUALCHAR>>
| <IP : ("IP=" ("OFF" | "ON" )+)>
| <MASK : ( "MASK" <LPARENCHAR><NATURAL_WORD><RPARENCHAR>)>
}
TOKEN :
{
// < LEVEL_66: "66" >
// < LEVEL_NUMBER: ( (("0")? ["1"-"9"]) | (["1"-"4"]["0"-"9"]) | "78" ) >
// < INTEGER: (["0"-"9"])+ >
< MINUSCHAR: "-" > // a.k.a. dash
| < LPARENCHAR: "(" >
| < RPARENCHAR: ")" >
| < COLONCHAR: ":" >
| < DOTCHAR: "." >
| < COMMACHAR: "," >
| < DOUBLEDQUOTECHAR: "\"\"" >
| < QUOTECHAR: "\"" >
| < DOUBLEDAPOSTROPHE: "''" >
| < APOSTROPHE: "'" >
| < PLUSCHAR: "+" >
| < ASTERISKCHAR: "*" >
//| < POWEROF: "**" >
| < SLASHCHAR: "/" >
| < DOLLARCHAR: "$" >
| < LESSTHANOREQUAL: "<=" >
| < LESSTHANCHAR: "<" >
| < MORETHANOREQUAL: ">=" >
| < MORETHANCHAR: ">" >
| < EQUALCHAR: "=" >
| < NOTEQUAL: "<>" >
| < WNOTEQUAL: "NE" >
| < SHARPCHAR: "#" >
//| < COMPARISON_OP: (["=","<",">"])+>
| < HEXNUMBER: ["h","x"] ( ( <QUOTECHAR> ( ["0"-"9","a"-"f"] )+ <QUOTECHAR> )
| ( <APOSTROPHE> ( ["0"-"9","a"-"f"] )+ <APOSTROPHE> )
)
>
| < QUOTEDSTRING: ( <QUOTECHAR> (~["\""] | <DOUBLEDQUOTECHAR> )* <QUOTECHAR>
| <APOSTROPHE> (~["'"] | <DOUBLEDAPOSTROPHE> )* <APOSTROPHE>
)
>
| <REINPUT_ASTERISK_PARM: <ASTERISKCHAR> ((["0"-"9"])(["0"-"9"])(["0"-"9"])) >
// | <REINPUT_MARK_PARM: (<SHARPCHAR>| <PLUSCHAR>)? <NATURAL_WORD> >
| < NATURAL_WORD: (["a"-"z","0"-"9"])+ ( (<MINUSCHAR> | "_" )+ (["a"-"z","0"-"9"])+ )* >
| < OTHER_CHARS: ~[] >
}
TOKEN: {
<COMMENT_LINE: (<COMMENT> <ASTERISKCHAR> (<GENERAL>)* "\r\n")>
| <#GENERAL: (["a"-"z","A"-"Z", "0"-"9", "(", ")", "[", "]", ":", "\'", "*", "|", ";",
"%", "-", "/", "."," ", "=", "#", "@", "+", "_", "<", ">", "^", "$",
"?", "&", ",", "ı", "İ", "ş", "Ş", "ü", "Ü", "ç", "Ç", "ğ", "Ğ", "ö", "Ö"])>
}

void CodeLine() :
{//System.out.println("\nCodeLine");
Token t;}
{
(<COMMENT_LINE> | NaturalStatementLine())
}
void NaturalStatementLine() :
{}
{
NaturalStatement()
}
void NaturalStatement() :
{}
{
ComputationStatement()
| ComputeStatement()
| MoveStatement()
| IfStatement()
| DotStatament()
}
void compare() :
{}
{ <EQUALCHAR>
| <LESSTHANCHAR>
| <LESSTHANOREQUAL>
| <MORETHANCHAR>
| <MORETHANOREQUAL>
| <NOTEQUAL>
| <WNOTEQUAL>}

void DotStatament() :
{System.out.println("Dot statement");}
{
<DOTCHAR> ("\r\n")
}
void MoveStatement() :
{System.out.println("Move statement");}
{
<MOVE> (<INTEGER_CONSTANT>
| (<NATURAL_WORD><DOTCHAR>)? ((<SHARPCHAR>| <PLUSCHAR> | <ASTERISKCHAR>)?)<NATURAL_WORD>
| <QUOTEDSTRING>
| <LPARENCHAR> <AD> <RPARENCHAR>)
<TO> (
(<SHARPCHAR>| <PLUSCHAR>)? <NATURAL_WORD>
(<LPARENCHAR>
(<ASTERISKCHAR>| <INTEGER_CONSTANT> | (<SHARPCHAR>| <PLUSCHAR>)? <NATURAL_WORD>)
<RPARENCHAR>)?
)+
("\r\n")?
}

void ComputeStatement() :
{System.out.println("Compute statement");}
{ <COMPUTE>
((<SHARPCHAR>| <PLUSCHAR>)? <NATURAL_WORD>)
<EQUALCHAR>
( <INTEGER_CONSTANT> | (<PLUSCHAR> |<SHARPCHAR>)? <NATURAL_WORD> | <QUOTEDSTRING>)
(
(<PLUSCHAR> | <MINUSCHAR> | <ASTERISKCHAR> | <SLASHCHAR>)
( <INTEGER_CONSTANT> | (<SHARPCHAR>)? <NATURAL_WORD>)
)* ("\r\n")?
}
void ComputationStatement() :
{System.out.println("Compute statement");}
{
(<SHARPCHAR>| <PLUSCHAR> | <ASTERISKCHAR>)? <NATURAL_WORD>
<ASSIGNCHARS>
(<INTEGER_CONSTANT> | <QUOTEDSTRING> | ((<SHARPCHAR>| <PLUSCHAR> | <ASTERISKCHAR>)? <NATURAL_WORD>))
("\r\n")?
}
void IfStatement() :
{System.out.println("If statement");}
{
<IF> ("\r\n")? (<NOT>)* ("\r\n")?
(
(
<NO_REC_FOUND>("\r\n")?
)
|
(
(
((<NATURAL_WORD><DOTCHAR>)?(<SHARPCHAR>| <PLUSCHAR>| <ASTERISKCHAR>)? <NATURAL_WORD>) ("\r\n")?
(<LPARENCHAR>
(<INTEGER_CONSTANT> | <QUOTEDSTRING>| (<SHARPCHAR>| <PLUSCHAR>| <ASTERISKCHAR>)? <NATURAL_WORD>)
<RPARENCHAR>
)? ("\r\n")?
compare() ("\r\n")?
(
( <INTEGER_CONSTANT> | <QUOTEDSTRING>| (<SHARPCHAR>| <PLUSCHAR>| <ASTERISKCHAR>)? <NATURAL_WORD>)+ ("\r\n")?
| <MASK> ("\r\n")?
)
)
(<THRU>
( <INTEGER_CONSTANT> | <QUOTEDSTRING>| (<SHARPCHAR>| <PLUSCHAR>| <ASTERISKCHAR>)? <NATURAL_WORD>)+)? ("\r\n")?
( ("\r\n")?
(<AND> | <OR>) ("\r\n")?
((<SHARPCHAR>| <PLUSCHAR> | <ASTERISKCHAR>)? <NATURAL_WORD>)? ("\r\n")?
(<LPARENCHAR>
(<INTEGER_CONSTANT> | <QUOTEDSTRING>| (<SHARPCHAR>| <PLUSCHAR>| <ASTERISKCHAR>)? <NATURAL_WORD>)
<RPARENCHAR>
)? ("\r\n")?
compare() ("\r\n")?
( <INTEGER_CONSTANT> | <QUOTEDSTRING> | (<SHARPCHAR>| <PLUSCHAR>| <ASTERISKCHAR>)? <NATURAL_WORD>)?
)*
) ("\r\n")?
)
(NaturalStatement())*
(<ELSE> ("\r\n")?
(NaturalStatement())+)*
(<END_IF> | <END_NOREC>) ("\r\n")?

}
//MAIN UNIT
SimpleNode Start () :
{
System.out.println("Start");
}
{
(
CodeLine()
)*
<EOF>
{ return jjtThis; }
}