|
Recursive descent is a way to implement a top-down parser.
The idea is to define a function for each nonterminal in the grammar.
The function for nonterminal N uses the lookahead to make a prediction about which production for N to use, then reads each thing on the right-hand side of that production.
A function match(t) is created to read a token. First, it checks whether the lookahead is t, then it gets the next token, updating the lookahead.
For example, if the right-hand side of the production is if ( E ) S then the parser does the following.
match(TOK_IF);
match('(');
E();
match(')');
S();
The following example not only parses simple expressions but also does some semantic processing: it evaluates the expression.
#include <stdio.h>
#include <ctype.h>
#include <stdlib.h>
#include <limits.h>
/* This is an example of a
* recursive descent parser.
* It evaluates an expression
* using the following
* grammar.
*
* Expr -> Term ExprRest
*
* ExprRest -> (empty)
* | + Expr
* | - Expr
*
* Term -> Factor TermRest
*
* TermRest -> (empty)
* | * Term
* | / Term
*
* Factor -> number
* | ( Expr )
*
* where a number is an
* integer and all operations
* are done using integer
* arithmetic.
*/
#define MAX_DIGITS 10
#define TOK_NUMBER 256
typedef int TOKEN;
TOKEN lookahead = 0;
int attribute = 0;
int nextch = EOF;
void syntaxerror(void);
void advance(void);
TOKEN lex(void);
void match(TOKEN t);
int Expr(void);
int ExprRest(int n);
int Term(void);
int TermRest(int n);
int Factor(void);
/*============================
* syntaxerror():
* Report a syntax error.
*
* Error reporting is very
* crude here.
*============================*/
void syntaxerror(void)
{
printf("Syntax error");
exit(1);
}
/*============================
* advance():
* Skip over white space,
* then read a character.
*
* Store the character that
* was read into nextch.
*============================*/
void advance(void)
{
nextch = getchar();
while(isspace(nextch)) {
nextch = getchar();
}
}
/*===========================
* lex():
* A simple ad-hoc lexer
*
* Read a token and store it
* into lookahead.
*===========================*/
TOKEN lex(void)
{
if(nextch == EOF) {
return EOF;
}
if(nextch == '+' ||
nextch == '-' ||
nextch == '*' ||
nextch == '/' ||
nextch == '(' ||
nextch == ')') {
int tok = nextch;
advance();
return tok;
}
if(isdigit(nextch)) {
int v = 0;
while(isdigit(nextch)) {
v = 10*v + (nextch - '0');
advance();
}
attribute = (int) v;
return TOK_NUMBER;
}
syntaxerror();
return 0;
}
/*===========================
* match(t):
* Check that the current
* lookahead is t.
*
* If so, get the next token
* (which becomes the next
* lookahead token).
*
* If there is a mismatch,
* it is a syntax error.
*===========================*/
void match(TOKEN t)
{
if(lookahead == t) {
lookahead = lex();
}
else {
syntaxerror();
}
}
/*===========================
* Expr():
* Read an expression
* and return its value.
*
* Expr -> Term ExprRest
*===========================*/
int Expr(void)
{
int x = Term();
int y = ExprRest(x);
return y;
}
/*===========================
* ExprRest(n):
*
* ExprRest
* -> (empty)
* Return n
* | + Expr
* Return n + val(Expr)
* | - Expr
* Return n - val(Expr)
*===========================*/
int ExprRest(int n)
{
int x;
if(lookahead == '+') {
match('+');
x = Expr();
return n + x;
}
if(lookahead == '-') {
match('-');
x = Expr();
return n - x;
}
return n;
}
/*===========================
* Term():
* Read a term and
* return its value.
*
* Term -> Factor TermRest
*===========================*/
int Term(void)
{
int x = Factor();
int y = TermRest(x);
return y;
}
/*===========================
* TermRest(n):
*
* TermRest
* -> (empty)
* Return n
* | * Term
* Return n * val(Term)
* | / Term
* Return n / val(Term)
*===========================*/
int TermRest(int n)
{
int x;
if(lookahead == '*') {
match('*');
x = Term();
return n * x;
}
if(lookahead == '/') {
match('/');
x = Term();
return n / x;
}
return n;
}
/*===========================
* Factor():
* Read a factor and return
* its value.
*
* Factor -> number
* | ( Expr )
*===========================*/
int Factor(void)
{
int x;
if(lookahead == '(') {
match('(');
x = Expr();
match(')');
return x;
}
if(lookahead == TOK_NUMBER) {
x = attribute;
match(TOK_NUMBER);
return x;
}
syntaxerror();
return 0;
}
/*===========================
* main():
* Read an expression and
* print its value, or print
* "syntax error" if the
* expression is poorly formed.
*
* The expression ends at the
* end of file.
*===========================*/
int main()
{
int val;
/* Prime the lexer. */
advance();
lookahead = lex();
/* Evaluate the expression. */
val = Expr();
if(lookahead != EOF) {
printf("%d\n", val);
}
else {
syntaxerror();
}
return 0;
}
|