Title: | Tools to Create Formal Language Parser |
---|---|
Description: | R implementation of the common parsing tools 'lex' and 'yacc'. |
Authors: | Marek Jagielski [aut, cre, cph], David M. Beazley [aut, cph], Yasutaka Tanaka [ctb], Henrico Witvliet [ctb] |
Maintainer: | Marek Jagielski <[email protected]> |
License: | MIT + file LICENSE |
Version: | 1.7.3 |
Built: | 2025-02-05 03:10:04 UTC |
Source: | https://github.com/systemincloud/rly |
Build all of the regular expression rules from definitions in the supplied module
lex(module = NA, args = list(), debug = FALSE, debuglog = NA, errorlog = NA)
lex(module = NA, args = list(), debug = FALSE, debuglog = NA, errorlog = NA)
module |
R6 class containing lex rules |
args |
list of arguments that should be passed to constructor |
debug |
on and off debug mode |
debuglog |
custom logger for debug messages |
errorlog |
custom logger for error messages |
Lexer ready to use
TOKENS = c('NAME', 'NUMBER') LITERALS = c('=','+','-','*','/', '(',')') Lexer <- R6::R6Class("Lexer", public = list( tokens = TOKENS, literals = LITERALS, t_NAME = '[a-zA-Z_][a-zA-Z0-9_]*', t_NUMBER = function(re='\\d+', t) { t$value <- strtoi(t$value) return(t) }, t_ignore = " \t", t_newline = function(re='\\n+', t) { t$lexer$lineno <- t$lexer$lineno + nchar(t$value) return(NULL) }, t_error = function(t) { cat(sprintf("Illegal character '%s'", t$value[1])) t$lexer$skip(1) return(t) } ) ) lexer <- rly::lex(Lexer) lexer$input("5 + 3") print(lexer$token()$value) # [1] 5 print(lexer$token()$value) # [1] "+" print(lexer$token()$value) # [1] 3
TOKENS = c('NAME', 'NUMBER') LITERALS = c('=','+','-','*','/', '(',')') Lexer <- R6::R6Class("Lexer", public = list( tokens = TOKENS, literals = LITERALS, t_NAME = '[a-zA-Z_][a-zA-Z0-9_]*', t_NUMBER = function(re='\\d+', t) { t$value <- strtoi(t$value) return(t) }, t_ignore = " \t", t_newline = function(re='\\n+', t) { t$lexer$lineno <- t$lexer$lineno + nchar(t$value) return(NULL) }, t_error = function(t) { cat(sprintf("Illegal character '%s'", t$value[1])) t$lexer$skip(1) return(t) } ) ) lexer <- rly::lex(Lexer) lexer$input("5 + 3") print(lexer$token()$value) # [1] 5 print(lexer$token()$value) # [1] "+" print(lexer$token()$value) # [1] 3
The following Lexer class implements the lexer runtime. There are only a few public methods and attributes:
input() - Store a new string in the lexer
token() - Get the next token
clone() - Clone the lexer
lineno - Current line number
lexpos - Current position in the input string
Lexer
Lexer
An R6Class
generator object
The LR Parsing engine
LRParser
LRParser
An R6Class
generator object
Does nothing.
NullLogger
NullLogger
A R6Class
object
debuglog <- NullLogger$new() debuglog$info('This will not print')
debuglog <- NullLogger$new() debuglog$info('This will not print')
This object is a stand-in for a logging object created by the logging module. RLY will use this by default to create things such as the parser.out file. If a user wants more detailed information, they can create their own logging object and pass it into RLY. '
RlyLogger
RlyLogger
A R6Class
object
debuglog <- rly::RlyLogger$new(".", "file.out") debuglog$info('This is info message') file.remove("file.out")
debuglog <- rly::RlyLogger$new(".", "file.out") debuglog$info('This is info message') file.remove("file.out")
This function is entry point to the library
yacc(module = NA, args = list(), method = "LALR", debug = FALSE, start = NA, check_recursion = TRUE, debugfile = "parser.out", outputdir = NA, debuglog = NA, errorlog = NA)
yacc(module = NA, args = list(), method = "LALR", debug = FALSE, start = NA, check_recursion = TRUE, debugfile = "parser.out", outputdir = NA, debuglog = NA, errorlog = NA)
module |
R6 class containing rules |
args |
list of arguments that should be passed to constructor |
method |
type of algorithm |
debug |
on and off debug mode |
start |
provide custom start method |
check_recursion |
should yacc look for recursions in rules |
debugfile |
the name of the custom debug output logs |
outputdir |
the dierectory of custom debug logs |
debuglog |
custom logger for debug messages |
errorlog |
custom logger for error messages |
Parser ready to use
TOKENS = c('NAME', 'NUMBER') LITERALS = c('=','+','-','*','/', '(',')') Parser <- R6::R6Class("Parser", public = list( tokens = TOKENS, literals = LITERALS, # Parsing rules precedence = list(c('left','+','-'), c('left','*','/'), c('right','UMINUS')), # dictionary of names names = new.env(hash=TRUE), p_statement_assign = function(doc='statement : NAME "=" expression', p) { self$names[[as.character(p$get(2))]] <- p$get(4) }, p_statement_expr = function(doc='statement : expression', p) { cat(p$get(2)) cat('\n') }, p_expression_binop = function(doc="expression : expression '+' expression | expression '-' expression | expression '*' expression | expression '/' expression", p) { if(p$get(3) == '+') p$set(1, p$get(2) + p$get(4)) else if(p$get(3) == '-') p$set(1, p$get(2) - p$get(4)) else if(p$get(3) == '*') p$set(1, p$get(2) * p$get(4)) else if(p$get(3) == '/') p$set(1, p$get(2) / p$get(4)) }, p_expression_uminus = function(doc="expression : '-' expression %prec UMINUS", p) { p$set(1, -p$get(3)) }, p_expression_group = function(doc="expression : '(' expression ')'", p) { p$set(1, p$get(3)) }, p_expression_number = function(doc='expression : NUMBER', p) { p$set(1, p$get(2)) }, p_expression_name = function(doc='expression : NAME', p) { p$set(1, self$names[[as.character(p$get(2))]]) }, p_error = function(p) { if(is.null(p)) cat("Syntax error at EOF") else cat(sprintf("Syntax error at '%s'", p$value)) } ) ) parser <- rly::yacc(Parser)
TOKENS = c('NAME', 'NUMBER') LITERALS = c('=','+','-','*','/', '(',')') Parser <- R6::R6Class("Parser", public = list( tokens = TOKENS, literals = LITERALS, # Parsing rules precedence = list(c('left','+','-'), c('left','*','/'), c('right','UMINUS')), # dictionary of names names = new.env(hash=TRUE), p_statement_assign = function(doc='statement : NAME "=" expression', p) { self$names[[as.character(p$get(2))]] <- p$get(4) }, p_statement_expr = function(doc='statement : expression', p) { cat(p$get(2)) cat('\n') }, p_expression_binop = function(doc="expression : expression '+' expression | expression '-' expression | expression '*' expression | expression '/' expression", p) { if(p$get(3) == '+') p$set(1, p$get(2) + p$get(4)) else if(p$get(3) == '-') p$set(1, p$get(2) - p$get(4)) else if(p$get(3) == '*') p$set(1, p$get(2) * p$get(4)) else if(p$get(3) == '/') p$set(1, p$get(2) / p$get(4)) }, p_expression_uminus = function(doc="expression : '-' expression %prec UMINUS", p) { p$set(1, -p$get(3)) }, p_expression_group = function(doc="expression : '(' expression ')'", p) { p$set(1, p$get(3)) }, p_expression_number = function(doc='expression : NUMBER', p) { p$set(1, p$get(2)) }, p_expression_name = function(doc='expression : NAME', p) { p$set(1, self$names[[as.character(p$get(2))]]) }, p_error = function(p) { if(is.null(p)) cat("Syntax error at EOF") else cat(sprintf("Syntax error at '%s'", p$value)) } ) ) parser <- rly::yacc(Parser)
This class is a wrapper around the objects actually passed to each grammar rule. Index lookup and assignment actually assign the .value attribute of the underlying YaccSymbol object. The lineno() method returns the line number of a given item (or 0 if not defined). The linespan() method returns a tuple of (startline,endline) representing the range of lines for a symbol. The lexspan() method returns a tuple (lexpos,endlexpos) representing the range of positional information for a symbol.
YaccProduction
YaccProduction
An R6Class
generator object