wip
This commit is contained in:
parent
7789e28631
commit
abd7bcda7f
211
src/grammar.peg
211
src/grammar.peg
@ -1,8 +1,10 @@
|
||||
R"===(
|
||||
|
||||
Root <- skip Statement* eof
|
||||
|
||||
Fn <- KEYWORD_fn LPAREN ParamDeclList RPAREN Block
|
||||
|
||||
VarDecl <- IDENTIFIER (EQUAL Expr)? SEMICOLON
|
||||
VarDecl <- KEYWORD_var IDENTIFIER (EQUAL Expr)? SEMICOLON
|
||||
|
||||
# *** Block Level ***
|
||||
Statement
|
||||
@ -73,7 +75,7 @@ SuffixExpr
|
||||
<- PrimaryTypeExpr (SuffixOp / FnCallArguments)*
|
||||
|
||||
PrimaryTypeExpr
|
||||
<- BUILTINIDENTIFIER FnCallArguments
|
||||
<- BUILTINCALL
|
||||
/ CHAR_LITERAL
|
||||
/ DOT IDENTIFIER
|
||||
/ FLOAT
|
||||
@ -86,6 +88,8 @@ PrimaryTypeExpr
|
||||
/ KEYWORD_true
|
||||
/ STRINGLITERAL
|
||||
|
||||
BUILTINCALL <- BUILTINIDENTIFIER FnCallArguments
|
||||
|
||||
GroupedExpr <- LPAREN Expr RPAREN
|
||||
|
||||
# *** Helper grammar ***
|
||||
@ -93,12 +97,10 @@ BreakLabel <- COLON IDENTIFIER
|
||||
|
||||
BlockLabel <- IDENTIFIER COLON
|
||||
|
||||
WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN
|
||||
|
||||
# Control flow prefixes
|
||||
IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload?
|
||||
|
||||
WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr?
|
||||
WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload?
|
||||
|
||||
ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload
|
||||
|
||||
@ -141,7 +143,6 @@ BitShiftOp
|
||||
AdditionOp
|
||||
<- PLUS
|
||||
/ MINUS
|
||||
/ PLUS2
|
||||
|
||||
MultiplyOp
|
||||
<- ASTERISK
|
||||
@ -168,15 +169,13 @@ ParamDeclList <- (IDENTIFIER COMMA)* IDENTIFIER?
|
||||
|
||||
# *** Tokens ***
|
||||
INTEGER
|
||||
<- "0b" bin_int skip
|
||||
/ "0o" oct_int skip
|
||||
/ "0x" hex_int skip
|
||||
/ dec_int skip
|
||||
<- "0b" < bin_int > skip
|
||||
/ "0o" < oct_int > skip
|
||||
/ "0x" < hex_int > skip
|
||||
/ < dec_int > skip
|
||||
|
||||
IDENTIFIER
|
||||
<- !keyword [A-Za-z_] [A-Za-z0-9_]* skip
|
||||
/ "@\"" string_char* "\"" skip
|
||||
BUILTINIDENTIFIER <- "@"[A-Za-z_][A-Za-z0-9_]* skip
|
||||
IDENTIFIER <- !keyword < [A-Za-z_] [A-Za-z0-9_]* > skip
|
||||
BUILTINIDENTIFIER <- "@" < [A-Za-z_][A-Za-z0-9_]* > skip
|
||||
|
||||
CHAR_LITERAL <- "'" char_char "'" skip
|
||||
FLOAT
|
||||
@ -190,38 +189,38 @@ STRINGLITERAL
|
||||
/ (line_string skip)+
|
||||
STRINGLITERALSINGLE <- "\"" string_char* "\"" skip
|
||||
|
||||
eof <- !.
|
||||
bin <- [01]
|
||||
bin_ <- '_'? bin
|
||||
oct <- [0-7]
|
||||
oct_ <- '_'? oct
|
||||
hex <- [0-9a-fA-F]
|
||||
hex_ <- '_'? hex
|
||||
dec <- [0-9]
|
||||
dec_ <- '_'? dec
|
||||
~bin_int <- bin bin_*
|
||||
~oct_int <- oct oct_*
|
||||
~dec_int <- dec dec_*
|
||||
~hex_int <- hex hex_*
|
||||
|
||||
bin_int <- bin bin_*
|
||||
oct_int <- oct oct_*
|
||||
dec_int <- dec dec_*
|
||||
hex_int <- hex hex_*
|
||||
~eof <- !.
|
||||
~bin <- [01]
|
||||
~bin_ <- '_'? bin
|
||||
~oct <- [0-7]
|
||||
~oct_ <- '_'? oct
|
||||
~hex <- [0-9a-fA-F]
|
||||
~hex_ <- '_'? hex
|
||||
~dec <- [0-9]
|
||||
~dec_ <- '_'? dec
|
||||
|
||||
ox80_oxBF <- [\200-\277]
|
||||
oxF4 <- '\364'
|
||||
ox80_ox8F <- [\200-\217]
|
||||
oxF1_oxF3 <- [\361-\363]
|
||||
oxF0 <- '\360'
|
||||
ox90_0xBF <- [\220-\277]
|
||||
oxEE_oxEF <- [\356-\357]
|
||||
oxED <- '\355'
|
||||
ox80_ox9F <- [\200-\237]
|
||||
oxE1_oxEC <- [\341-\354]
|
||||
oxE0 <- '\340'
|
||||
oxA0_oxBF <- [\240-\277]
|
||||
oxC2_oxDF <- [\302-\337]
|
||||
~ox80_oxBF <- [\200-\277]
|
||||
~oxF4 <- '\364'
|
||||
~ox80_ox8F <- [\200-\217]
|
||||
~oxF1_oxF3 <- [\361-\363]
|
||||
~oxF0 <- '\360'
|
||||
~ox90_0xBF <- [\220-\277]
|
||||
~oxEE_oxEF <- [\356-\357]
|
||||
~oxED <- '\355'
|
||||
~ox80_ox9F <- [\200-\237]
|
||||
~oxE1_oxEC <- [\341-\354]
|
||||
~oxE0 <- '\340'
|
||||
~oxA0_oxBF <- [\240-\277]
|
||||
~oxC2_oxDF <- [\302-\337]
|
||||
|
||||
# From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
|
||||
|
||||
mb_utf8_literal <-
|
||||
~mb_utf8_literal <-
|
||||
oxF4 ox80_ox8F ox80_oxBF ox80_oxBF
|
||||
/ oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF
|
||||
/ oxF0 ox90_0xBF ox80_oxBF ox80_oxBF
|
||||
@ -231,84 +230,84 @@ mb_utf8_literal <-
|
||||
/ oxE0 oxA0_oxBF ox80_oxBF
|
||||
/ oxC2_oxDF ox80_oxBF
|
||||
|
||||
ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177]
|
||||
~ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177]
|
||||
|
||||
char_escape
|
||||
~char_escape
|
||||
<- "\\x" hex hex
|
||||
/ "\\u{" hex+ "}"
|
||||
/ "\\" [nr\\t'"]
|
||||
char_char
|
||||
~char_char
|
||||
<- mb_utf8_literal
|
||||
/ char_escape
|
||||
/ ascii_char_not_nl_slash_squote
|
||||
|
||||
string_char
|
||||
~string_char
|
||||
<- char_escape
|
||||
/ [^\\"\n]
|
||||
|
||||
line_comment <- '//' ![!/][^\n]* / '////' [^\n]*
|
||||
line_string <- ("\\\\" [^\n]* [ \n]*)+
|
||||
skip <- ([ \n] / line_comment)*
|
||||
~line_comment <- '//' ![!/][^\n]* / '////' [^\n]*
|
||||
~line_string <- ("\\\\" [^\n]* [ \n]*)+
|
||||
~skip <- ([ \n] / line_comment)*
|
||||
|
||||
|
||||
AMPERSAND <- '&' ![=] skip
|
||||
AMPERSANDEQUAL <- '&=' skip
|
||||
ASTERISK <- '*' ![*%=] skip
|
||||
ASTERISK2 <- '**' skip
|
||||
ASTERISKEQUAL <- '*=' skip
|
||||
ASTERISKPERCENT <- '*%' ![=] skip
|
||||
ASTERISKPERCENTEQUAL <- '*%=' skip
|
||||
CARET <- '^' ![=] skip
|
||||
CARETEQUAL <- '^=' skip
|
||||
COLON <- ':' skip
|
||||
COMMA <- ',' skip
|
||||
DOT <- '.' ![*.?] skip
|
||||
EQUAL <- '=' ![>=] skip
|
||||
EQUALEQUAL <- '==' skip
|
||||
EXCLAMATIONMARK <- '!' ![=] skip
|
||||
EXCLAMATIONMARKEQUAL <- '!=' skip
|
||||
LARROW <- '<' ![<=] skip
|
||||
LARROW2 <- '<<' skip
|
||||
LARROWEQUAL <- '<=' skip
|
||||
LBRACE <- '{' skip
|
||||
LBRACKET <- '[' skip
|
||||
LPAREN <- '(' skip
|
||||
MINUS <- '-' ![=>] skip
|
||||
MINUSEQUAL <- '-=' skip
|
||||
PERCENT <- '%' ![=] skip
|
||||
PERCENTEQUAL <- '%=' skip
|
||||
PIPE <- '|' ![|=] skip
|
||||
PIPEEQUAL <- '|=' skip
|
||||
PLUS <- '+' ![+=] skip
|
||||
PLUS2 <- '++' skip
|
||||
PLUSEQUAL <- '+=' skip
|
||||
RARROW <- '>' skip
|
||||
RARROW2 <- '>>' skip
|
||||
RARROWEQUAL <- '>=' skip
|
||||
RBRACE <- '}' skip
|
||||
RBRACKET <- ']' skip
|
||||
RPAREN <- ')' skip
|
||||
SEMICOLON <- ';' skip
|
||||
SLASH <- '/' ![=] skip
|
||||
~AMPERSAND <- '&' ![=] skip
|
||||
~AMPERSANDEQUAL <- '&=' skip
|
||||
~ASTERISK <- '*' ![*%=] skip
|
||||
~ASTERISK2 <- '**' skip
|
||||
~ASTERISKEQUAL <- '*=' skip
|
||||
~ASTERISKPERCENT <- '*%' ![=] skip
|
||||
~ASTERISKPERCENTEQUAL <- '*%=' skip
|
||||
~CARET <- '^' ![=] skip
|
||||
~CARETEQUAL <- '^=' skip
|
||||
~COLON <- ':' skip
|
||||
~COMMA <- ',' skip
|
||||
~DOT <- '.' ![*.?] skip
|
||||
~EQUAL <- '=' ![>=] skip
|
||||
~EQUALEQUAL <- '==' skip
|
||||
~EXCLAMATIONMARK <- '!' ![=] skip
|
||||
~EXCLAMATIONMARKEQUAL <- '!=' skip
|
||||
~LARROW <- '<' ![<=] skip
|
||||
~LARROW2 <- '<<' skip
|
||||
~LARROWEQUAL <- '<=' skip
|
||||
~LBRACE <- '{' skip
|
||||
~LBRACKET <- '[' skip
|
||||
~LPAREN <- '(' skip
|
||||
~MINUS <- '-' ![=>] skip
|
||||
~MINUSEQUAL <- '-=' skip
|
||||
~PERCENT <- '%' ![=] skip
|
||||
~PERCENTEQUAL <- '%=' skip
|
||||
~PIPE <- '|' ![|=] skip
|
||||
~PIPEEQUAL <- '|=' skip
|
||||
~PLUS <- '+' ![+=] skip
|
||||
~PLUSEQUAL <- '+=' skip
|
||||
~RARROW <- '>' skip
|
||||
~RARROW2 <- '>>' skip
|
||||
~RARROWEQUAL <- '>=' skip
|
||||
~RBRACE <- '}' skip
|
||||
~RBRACKET <- ']' skip
|
||||
~RPAREN <- ')' skip
|
||||
~SEMICOLON <- ';' skip
|
||||
~SLASH <- '/' ![=] skip
|
||||
|
||||
end_of_word <- ![a-zA-Z0-9_] skip
|
||||
KEYWORD_and <- 'and' end_of_word
|
||||
KEYWORD_break <- 'break' end_of_word
|
||||
KEYWORD_continue <- 'continue' end_of_word
|
||||
KEYWORD_else <- 'else' end_of_word
|
||||
KEYWORD_export <- 'export' end_of_word
|
||||
KEYWORD_false <- 'false' end_of_word
|
||||
KEYWORD_fn <- 'fn' end_of_word
|
||||
KEYWORD_for <- 'for' end_of_word
|
||||
KEYWORD_if <- 'if' end_of_word
|
||||
KEYWORD_nil <- 'nil' end_of_word
|
||||
KEYWORD_or <- 'or' end_of_word
|
||||
KEYWORD_return <- 'return' end_of_word
|
||||
KEYWORD_test <- 'test' end_of_word
|
||||
KEYWORD_true <- 'true' end_of_word
|
||||
KEYWORD_while <- 'while' end_of_word
|
||||
~end_of_word <- ![a-zA-Z0-9_] skip
|
||||
~KEYWORD_and <- 'and' end_of_word
|
||||
~KEYWORD_break <- 'break' end_of_word
|
||||
~KEYWORD_continue <- 'continue' end_of_word
|
||||
~KEYWORD_else <- 'else' end_of_word
|
||||
~KEYWORD_export <- 'export' end_of_word
|
||||
~KEYWORD_false <- 'false' end_of_word
|
||||
~KEYWORD_fn <- 'fn' end_of_word
|
||||
~KEYWORD_for <- 'for' end_of_word
|
||||
~KEYWORD_if <- 'if' end_of_word
|
||||
~KEYWORD_nil <- 'nil' end_of_word
|
||||
~KEYWORD_or <- 'or' end_of_word
|
||||
~KEYWORD_return <- 'return' end_of_word
|
||||
~KEYWORD_test <- 'test' end_of_word
|
||||
~KEYWORD_true <- 'true' end_of_word
|
||||
~KEYWORD_var <- 'var' end_of_word
|
||||
~KEYWORD_while <- 'while' end_of_word
|
||||
|
||||
keyword <- KEYWORD_and
|
||||
~keyword <- KEYWORD_and
|
||||
/ KEYWORD_break
|
||||
/ KEYWORD_continue / KEYWORD_else
|
||||
/ KEYWORD_export
|
||||
@ -316,4 +315,6 @@ keyword <- KEYWORD_and
|
||||
/ KEYWORD_nil / KEYWORD_or
|
||||
/ KEYWORD_return
|
||||
/ KEYWORD_test / KEYWORD_true
|
||||
/ KEYWORD_while
|
||||
/ KEYWORD_var / KEYWORD_while
|
||||
|
||||
)==="
|
209
src/main.cpp
209
src/main.cpp
@ -1,13 +1,15 @@
|
||||
#include <cpp-peglib/peglib.h>
|
||||
#include <assert.h>
|
||||
#include <iostream>
|
||||
#include <unordered_map>
|
||||
#include <exception>
|
||||
|
||||
using namespace peg;
|
||||
using namespace std;
|
||||
using peg::SemanticValues;
|
||||
using std::any_cast;
|
||||
|
||||
int main(void) {
|
||||
int main_old(void) {
|
||||
// (2) Make a parser
|
||||
parser parser(R"(
|
||||
peg::parser parser(R"(
|
||||
# Grammar for Calculator...
|
||||
Additive <- Multitive '+' Additive / Multitive
|
||||
Multitive <- Primary '*' Multitive / Primary
|
||||
@ -49,5 +51,202 @@ int main(void) {
|
||||
|
||||
assert(val == 9);
|
||||
|
||||
std::cout << "hello" << std::endl;
|
||||
return 0;
|
||||
}
|
||||
|
||||
class Func;
|
||||
class Scope;
|
||||
class Identifier;
|
||||
|
||||
// represents the 'nil' type
|
||||
class Nil {
|
||||
};
|
||||
|
||||
class Context {
|
||||
public:
|
||||
std::unordered_map<std::string_view, Func> builtins;
|
||||
std::shared_ptr<Scope> currentScope;
|
||||
public:
|
||||
void error(std::string_view msg) {
|
||||
std::cout << "Error! " << msg << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
class Identifier {
|
||||
public:
|
||||
std::string identifier;
|
||||
Identifier(std::string id) : identifier(id) {}
|
||||
};
|
||||
|
||||
// table, function, number, string
|
||||
// enum Type {
|
||||
// Type_Table,
|
||||
// Type_Function,
|
||||
// Type_Number,
|
||||
// Type_String,
|
||||
// };
|
||||
|
||||
// Scope - parent scope, local variables, return handler, continue handler, break handler
|
||||
class Scope {
|
||||
public:
|
||||
std::shared_ptr<Scope> parentScope;
|
||||
std::unordered_map<std::string_view, std::any&> vars;
|
||||
// std::function<bool(Context&,Args&)> returnHandler;
|
||||
// std::function<bool(Context&)> breakHandler;
|
||||
// std::function<bool(Context&)> continueHandler;
|
||||
|
||||
public:
|
||||
Scope(std::shared_ptr<Scope> parentScope) : parentScope(parentScope) {
|
||||
}
|
||||
|
||||
bool varExists(Context& ctx, std::string_view name) {
|
||||
if (vars.find(name) == vars.end()) {
|
||||
if (parentScope.get() == nullptr) {
|
||||
return false;
|
||||
} else {
|
||||
return parentScope.get()->varExists(ctx, name);
|
||||
}
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Looks up var and creates it in the current scope if it doesn't exist
|
||||
*/
|
||||
std::any& getVar(Context& cxt, std::string_view name) {
|
||||
if (vars.find(name) == vars.end()) {
|
||||
if (varExists(cxt, name)) {
|
||||
return parentScope.get()->getVar(cxt, name);
|
||||
} else {
|
||||
// doesn't exist at all
|
||||
throw new std::exception();
|
||||
}
|
||||
}
|
||||
return vars.at(name);
|
||||
}
|
||||
|
||||
void writeVar(Context& cxt, std::string_view name, std::any& var) {
|
||||
if (vars.find(name) == vars.end()) {
|
||||
if (varExists(cxt, name)) {
|
||||
parentScope.get()->writeVar(cxt, name, var);
|
||||
}
|
||||
}
|
||||
vars.insert_or_assign(name, var);
|
||||
}
|
||||
};
|
||||
|
||||
/**
|
||||
* Arguments passed to a function and also results returned from a function
|
||||
*/
|
||||
class Args {
|
||||
public:
|
||||
std::vector<std::any> args;
|
||||
static Args toArgs(Context& cxt, std::any input) {
|
||||
Args args;
|
||||
if (input.type() == typeid(Identifier)) {
|
||||
auto id = any_cast<Identifier>(input);
|
||||
auto value = cxt.currentScope->getVar(cxt, id.identifier);
|
||||
args.args.push_back(value);
|
||||
}
|
||||
return args;
|
||||
}
|
||||
};
|
||||
|
||||
// Function - captured scope, ast ptr OR native function
|
||||
class Func {
|
||||
public:
|
||||
std::shared_ptr<Scope> capturedScope;
|
||||
// ast ptr
|
||||
std::function<void(Context&,Args&)> nativeFunction;
|
||||
|
||||
public:
|
||||
Func(std::function<void(Context&,Args&)> nativeFunction)
|
||||
: capturedScope(nullptr), nativeFunction(nativeFunction) {}
|
||||
|
||||
void execute(Context& cxt, Args& args) {
|
||||
// TODO implement AST executer
|
||||
nativeFunction(cxt, args);
|
||||
}
|
||||
};
|
||||
|
||||
class BuiltinFunctions {
|
||||
public:
|
||||
static void print(Context& cxt, Args& args) {
|
||||
for (int i=0; i<args.args.size(); i++) {
|
||||
std::any& var = args.args[i];
|
||||
if ( var.type() == typeid(long) ) {
|
||||
std::cout << any_cast<long>(var) << '\t';
|
||||
} else {
|
||||
cxt.error("Attempt to print unsupported type");
|
||||
}
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
};
|
||||
|
||||
std::any eval(Context &cxt, peg::Ast& ast) {
|
||||
const auto &nodes = ast.nodes;
|
||||
if (ast.name == "Root") {
|
||||
for (int i=0; i<ast.nodes.size(); i++) {
|
||||
eval(cxt, *nodes[i]);
|
||||
}
|
||||
return Nil();
|
||||
} else if (ast.name == "VarDecl") {
|
||||
auto identifier = any_cast<Identifier>(eval(cxt, *nodes[0]));
|
||||
auto value = eval(cxt, *nodes[1]);
|
||||
cxt.currentScope->writeVar(cxt, identifier.identifier, value);
|
||||
return Nil();
|
||||
} else if (ast.name == "IDENTIFIER" || ast.name == "BUILTINIDENTIFIER") {
|
||||
return Identifier(ast.token_to_string());
|
||||
} else if (ast.name == "INTEGER") {
|
||||
// TODO parse hex, oct, and binary
|
||||
return ast.token_to_number<long>();
|
||||
} else if (ast.name == "BUILTINCALL" ) {
|
||||
auto identifier = any_cast<Identifier>(eval(cxt, *nodes[0]));
|
||||
auto args = Args::toArgs(cxt, eval(cxt, *nodes[1]));
|
||||
cxt.builtins.at(identifier.identifier).execute(cxt, args);
|
||||
}
|
||||
|
||||
std::cout << "AST Name: " << ast.name << std::endl;
|
||||
for (int i=0; i<ast.nodes.size(); i++) {
|
||||
std::cout << "AST child: " << ast.name << " : ";
|
||||
eval(cxt, *nodes[i]);
|
||||
}
|
||||
return Nil();
|
||||
}
|
||||
|
||||
int main(void) {
|
||||
peg::parser parser(
|
||||
#include "grammar.peg"
|
||||
);
|
||||
std::string s = R"(
|
||||
var a = 10;
|
||||
@print(a);
|
||||
)";
|
||||
|
||||
parser.enable_ast();
|
||||
std::shared_ptr<peg::Ast> ast;
|
||||
|
||||
if (parser.parse(s, ast)) {
|
||||
ast = parser.optimize_ast(ast);
|
||||
std::cout << ast_to_s(ast) << std::endl;
|
||||
|
||||
Context cxt;
|
||||
|
||||
// init new scope
|
||||
std::shared_ptr<Scope> scope(new Scope(nullptr));
|
||||
|
||||
// init builtins
|
||||
cxt.builtins.emplace("print", Func(BuiltinFunctions::print));
|
||||
cxt.currentScope = scope;
|
||||
|
||||
eval(cxt, *ast);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
std::cout << "syntax error..." << std::endl;
|
||||
|
||||
return 1;
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user