This commit is contained in:
zuckerberg 2021-07-08 19:05:04 -06:00
parent 7789e28631
commit abd7bcda7f
2 changed files with 310 additions and 110 deletions

View File

@ -1,8 +1,10 @@
R"===(
Root <- skip Statement* eof Root <- skip Statement* eof
Fn <- KEYWORD_fn LPAREN ParamDeclList RPAREN Block Fn <- KEYWORD_fn LPAREN ParamDeclList RPAREN Block
VarDecl <- IDENTIFIER (EQUAL Expr)? SEMICOLON VarDecl <- KEYWORD_var IDENTIFIER (EQUAL Expr)? SEMICOLON
# *** Block Level *** # *** Block Level ***
Statement Statement
@ -73,7 +75,7 @@ SuffixExpr
<- PrimaryTypeExpr (SuffixOp / FnCallArguments)* <- PrimaryTypeExpr (SuffixOp / FnCallArguments)*
PrimaryTypeExpr PrimaryTypeExpr
<- BUILTINIDENTIFIER FnCallArguments <- BUILTINCALL
/ CHAR_LITERAL / CHAR_LITERAL
/ DOT IDENTIFIER / DOT IDENTIFIER
/ FLOAT / FLOAT
@ -86,6 +88,8 @@ PrimaryTypeExpr
/ KEYWORD_true / KEYWORD_true
/ STRINGLITERAL / STRINGLITERAL
BUILTINCALL <- BUILTINIDENTIFIER FnCallArguments
GroupedExpr <- LPAREN Expr RPAREN GroupedExpr <- LPAREN Expr RPAREN
# *** Helper grammar *** # *** Helper grammar ***
@ -93,12 +97,10 @@ BreakLabel <- COLON IDENTIFIER
BlockLabel <- IDENTIFIER COLON BlockLabel <- IDENTIFIER COLON
WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN
# Control flow prefixes # Control flow prefixes
IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload? IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload?
WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload?
ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload
@ -141,7 +143,6 @@ BitShiftOp
AdditionOp AdditionOp
<- PLUS <- PLUS
/ MINUS / MINUS
/ PLUS2
MultiplyOp MultiplyOp
<- ASTERISK <- ASTERISK
@ -168,15 +169,13 @@ ParamDeclList <- (IDENTIFIER COMMA)* IDENTIFIER?
# *** Tokens *** # *** Tokens ***
INTEGER INTEGER
<- "0b" bin_int skip <- "0b" < bin_int > skip
/ "0o" oct_int skip / "0o" < oct_int > skip
/ "0x" hex_int skip / "0x" < hex_int > skip
/ dec_int skip / < dec_int > skip
IDENTIFIER IDENTIFIER <- !keyword < [A-Za-z_] [A-Za-z0-9_]* > skip
<- !keyword [A-Za-z_] [A-Za-z0-9_]* skip BUILTINIDENTIFIER <- "@" < [A-Za-z_][A-Za-z0-9_]* > skip
/ "@\"" string_char* "\"" skip
BUILTINIDENTIFIER <- "@"[A-Za-z_][A-Za-z0-9_]* skip
CHAR_LITERAL <- "'" char_char "'" skip CHAR_LITERAL <- "'" char_char "'" skip
FLOAT FLOAT
@ -190,38 +189,38 @@ STRINGLITERAL
/ (line_string skip)+ / (line_string skip)+
STRINGLITERALSINGLE <- "\"" string_char* "\"" skip STRINGLITERALSINGLE <- "\"" string_char* "\"" skip
eof <- !. ~bin_int <- bin bin_*
bin <- [01] ~oct_int <- oct oct_*
bin_ <- '_'? bin ~dec_int <- dec dec_*
oct <- [0-7] ~hex_int <- hex hex_*
oct_ <- '_'? oct
hex <- [0-9a-fA-F]
hex_ <- '_'? hex
dec <- [0-9]
dec_ <- '_'? dec
bin_int <- bin bin_* ~eof <- !.
oct_int <- oct oct_* ~bin <- [01]
dec_int <- dec dec_* ~bin_ <- '_'? bin
hex_int <- hex hex_* ~oct <- [0-7]
~oct_ <- '_'? oct
~hex <- [0-9a-fA-F]
~hex_ <- '_'? hex
~dec <- [0-9]
~dec_ <- '_'? dec
ox80_oxBF <- [\200-\277] ~ox80_oxBF <- [\200-\277]
oxF4 <- '\364' ~oxF4 <- '\364'
ox80_ox8F <- [\200-\217] ~ox80_ox8F <- [\200-\217]
oxF1_oxF3 <- [\361-\363] ~oxF1_oxF3 <- [\361-\363]
oxF0 <- '\360' ~oxF0 <- '\360'
ox90_0xBF <- [\220-\277] ~ox90_0xBF <- [\220-\277]
oxEE_oxEF <- [\356-\357] ~oxEE_oxEF <- [\356-\357]
oxED <- '\355' ~oxED <- '\355'
ox80_ox9F <- [\200-\237] ~ox80_ox9F <- [\200-\237]
oxE1_oxEC <- [\341-\354] ~oxE1_oxEC <- [\341-\354]
oxE0 <- '\340' ~oxE0 <- '\340'
oxA0_oxBF <- [\240-\277] ~oxA0_oxBF <- [\240-\277]
oxC2_oxDF <- [\302-\337] ~oxC2_oxDF <- [\302-\337]
# From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ # From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
mb_utf8_literal <- ~mb_utf8_literal <-
oxF4 ox80_ox8F ox80_oxBF ox80_oxBF oxF4 ox80_ox8F ox80_oxBF ox80_oxBF
/ oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF / oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF
/ oxF0 ox90_0xBF ox80_oxBF ox80_oxBF / oxF0 ox90_0xBF ox80_oxBF ox80_oxBF
@ -231,84 +230,84 @@ mb_utf8_literal <-
/ oxE0 oxA0_oxBF ox80_oxBF / oxE0 oxA0_oxBF ox80_oxBF
/ oxC2_oxDF ox80_oxBF / oxC2_oxDF ox80_oxBF
ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177] ~ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177]
char_escape ~char_escape
<- "\\x" hex hex <- "\\x" hex hex
/ "\\u{" hex+ "}" / "\\u{" hex+ "}"
/ "\\" [nr\\t'"] / "\\" [nr\\t'"]
char_char ~char_char
<- mb_utf8_literal <- mb_utf8_literal
/ char_escape / char_escape
/ ascii_char_not_nl_slash_squote / ascii_char_not_nl_slash_squote
string_char ~string_char
<- char_escape <- char_escape
/ [^\\"\n] / [^\\"\n]
line_comment <- '//' ![!/][^\n]* / '////' [^\n]* ~line_comment <- '//' ![!/][^\n]* / '////' [^\n]*
line_string <- ("\\\\" [^\n]* [ \n]*)+ ~line_string <- ("\\\\" [^\n]* [ \n]*)+
skip <- ([ \n] / line_comment)* ~skip <- ([ \n] / line_comment)*
AMPERSAND <- '&' ![=] skip ~AMPERSAND <- '&' ![=] skip
AMPERSANDEQUAL <- '&=' skip ~AMPERSANDEQUAL <- '&=' skip
ASTERISK <- '*' ![*%=] skip ~ASTERISK <- '*' ![*%=] skip
ASTERISK2 <- '**' skip ~ASTERISK2 <- '**' skip
ASTERISKEQUAL <- '*=' skip ~ASTERISKEQUAL <- '*=' skip
ASTERISKPERCENT <- '*%' ![=] skip ~ASTERISKPERCENT <- '*%' ![=] skip
ASTERISKPERCENTEQUAL <- '*%=' skip ~ASTERISKPERCENTEQUAL <- '*%=' skip
CARET <- '^' ![=] skip ~CARET <- '^' ![=] skip
CARETEQUAL <- '^=' skip ~CARETEQUAL <- '^=' skip
COLON <- ':' skip ~COLON <- ':' skip
COMMA <- ',' skip ~COMMA <- ',' skip
DOT <- '.' ![*.?] skip ~DOT <- '.' ![*.?] skip
EQUAL <- '=' ![>=] skip ~EQUAL <- '=' ![>=] skip
EQUALEQUAL <- '==' skip ~EQUALEQUAL <- '==' skip
EXCLAMATIONMARK <- '!' ![=] skip ~EXCLAMATIONMARK <- '!' ![=] skip
EXCLAMATIONMARKEQUAL <- '!=' skip ~EXCLAMATIONMARKEQUAL <- '!=' skip
LARROW <- '<' ![<=] skip ~LARROW <- '<' ![<=] skip
LARROW2 <- '<<' skip ~LARROW2 <- '<<' skip
LARROWEQUAL <- '<=' skip ~LARROWEQUAL <- '<=' skip
LBRACE <- '{' skip ~LBRACE <- '{' skip
LBRACKET <- '[' skip ~LBRACKET <- '[' skip
LPAREN <- '(' skip ~LPAREN <- '(' skip
MINUS <- '-' ![=>] skip ~MINUS <- '-' ![=>] skip
MINUSEQUAL <- '-=' skip ~MINUSEQUAL <- '-=' skip
PERCENT <- '%' ![=] skip ~PERCENT <- '%' ![=] skip
PERCENTEQUAL <- '%=' skip ~PERCENTEQUAL <- '%=' skip
PIPE <- '|' ![|=] skip ~PIPE <- '|' ![|=] skip
PIPEEQUAL <- '|=' skip ~PIPEEQUAL <- '|=' skip
PLUS <- '+' ![+=] skip ~PLUS <- '+' ![+=] skip
PLUS2 <- '++' skip ~PLUSEQUAL <- '+=' skip
PLUSEQUAL <- '+=' skip ~RARROW <- '>' skip
RARROW <- '>' skip ~RARROW2 <- '>>' skip
RARROW2 <- '>>' skip ~RARROWEQUAL <- '>=' skip
RARROWEQUAL <- '>=' skip ~RBRACE <- '}' skip
RBRACE <- '}' skip ~RBRACKET <- ']' skip
RBRACKET <- ']' skip ~RPAREN <- ')' skip
RPAREN <- ')' skip ~SEMICOLON <- ';' skip
SEMICOLON <- ';' skip ~SLASH <- '/' ![=] skip
SLASH <- '/' ![=] skip
end_of_word <- ![a-zA-Z0-9_] skip ~end_of_word <- ![a-zA-Z0-9_] skip
KEYWORD_and <- 'and' end_of_word ~KEYWORD_and <- 'and' end_of_word
KEYWORD_break <- 'break' end_of_word ~KEYWORD_break <- 'break' end_of_word
KEYWORD_continue <- 'continue' end_of_word ~KEYWORD_continue <- 'continue' end_of_word
KEYWORD_else <- 'else' end_of_word ~KEYWORD_else <- 'else' end_of_word
KEYWORD_export <- 'export' end_of_word ~KEYWORD_export <- 'export' end_of_word
KEYWORD_false <- 'false' end_of_word ~KEYWORD_false <- 'false' end_of_word
KEYWORD_fn <- 'fn' end_of_word ~KEYWORD_fn <- 'fn' end_of_word
KEYWORD_for <- 'for' end_of_word ~KEYWORD_for <- 'for' end_of_word
KEYWORD_if <- 'if' end_of_word ~KEYWORD_if <- 'if' end_of_word
KEYWORD_nil <- 'nil' end_of_word ~KEYWORD_nil <- 'nil' end_of_word
KEYWORD_or <- 'or' end_of_word ~KEYWORD_or <- 'or' end_of_word
KEYWORD_return <- 'return' end_of_word ~KEYWORD_return <- 'return' end_of_word
KEYWORD_test <- 'test' end_of_word ~KEYWORD_test <- 'test' end_of_word
KEYWORD_true <- 'true' end_of_word ~KEYWORD_true <- 'true' end_of_word
KEYWORD_while <- 'while' end_of_word ~KEYWORD_var <- 'var' end_of_word
~KEYWORD_while <- 'while' end_of_word
keyword <- KEYWORD_and ~keyword <- KEYWORD_and
/ KEYWORD_break / KEYWORD_break
/ KEYWORD_continue / KEYWORD_else / KEYWORD_continue / KEYWORD_else
/ KEYWORD_export / KEYWORD_export
@ -316,4 +315,6 @@ keyword <- KEYWORD_and
/ KEYWORD_nil / KEYWORD_or / KEYWORD_nil / KEYWORD_or
/ KEYWORD_return / KEYWORD_return
/ KEYWORD_test / KEYWORD_true / KEYWORD_test / KEYWORD_true
/ KEYWORD_while / KEYWORD_var / KEYWORD_while
)==="

View File

@ -1,13 +1,15 @@
#include <cpp-peglib/peglib.h> #include <cpp-peglib/peglib.h>
#include <assert.h> #include <assert.h>
#include <iostream> #include <iostream>
#include <unordered_map>
#include <exception>
using namespace peg; using peg::SemanticValues;
using namespace std; using std::any_cast;
int main(void) { int main_old(void) {
// (2) Make a parser // (2) Make a parser
parser parser(R"( peg::parser parser(R"(
# Grammar for Calculator... # Grammar for Calculator...
Additive <- Multitive '+' Additive / Multitive Additive <- Multitive '+' Additive / Multitive
Multitive <- Primary '*' Multitive / Primary Multitive <- Primary '*' Multitive / Primary
@ -49,5 +51,202 @@ int main(void) {
assert(val == 9); assert(val == 9);
std::cout << "hello" << std::endl; return 0;
}
class Func;
class Scope;
class Identifier;
// represents the 'nil' type
class Nil {
};
class Context {
public:
std::unordered_map<std::string_view, Func> builtins;
std::shared_ptr<Scope> currentScope;
public:
void error(std::string_view msg) {
std::cout << "Error! " << msg << std::endl;
}
};
class Identifier {
public:
std::string identifier;
Identifier(std::string id) : identifier(id) {}
};
// table, function, number, string
// enum Type {
// Type_Table,
// Type_Function,
// Type_Number,
// Type_String,
// };
// Scope - parent scope, local variables, return handler, continue handler, break handler
class Scope {
public:
std::shared_ptr<Scope> parentScope;
std::unordered_map<std::string_view, std::any&> vars;
// std::function<bool(Context&,Args&)> returnHandler;
// std::function<bool(Context&)> breakHandler;
// std::function<bool(Context&)> continueHandler;
public:
Scope(std::shared_ptr<Scope> parentScope) : parentScope(parentScope) {
}
bool varExists(Context& ctx, std::string_view name) {
if (vars.find(name) == vars.end()) {
if (parentScope.get() == nullptr) {
return false;
} else {
return parentScope.get()->varExists(ctx, name);
}
} else {
return true;
}
}
/**
* Looks up var and creates it in the current scope if it doesn't exist
*/
std::any& getVar(Context& cxt, std::string_view name) {
if (vars.find(name) == vars.end()) {
if (varExists(cxt, name)) {
return parentScope.get()->getVar(cxt, name);
} else {
// doesn't exist at all
throw new std::exception();
}
}
return vars.at(name);
}
void writeVar(Context& cxt, std::string_view name, std::any& var) {
if (vars.find(name) == vars.end()) {
if (varExists(cxt, name)) {
parentScope.get()->writeVar(cxt, name, var);
}
}
vars.insert_or_assign(name, var);
}
};
/**
* Arguments passed to a function and also results returned from a function
*/
class Args {
public:
std::vector<std::any> args;
static Args toArgs(Context& cxt, std::any input) {
Args args;
if (input.type() == typeid(Identifier)) {
auto id = any_cast<Identifier>(input);
auto value = cxt.currentScope->getVar(cxt, id.identifier);
args.args.push_back(value);
}
return args;
}
};
// Function - captured scope, ast ptr OR native function
class Func {
public:
std::shared_ptr<Scope> capturedScope;
// ast ptr
std::function<void(Context&,Args&)> nativeFunction;
public:
Func(std::function<void(Context&,Args&)> nativeFunction)
: capturedScope(nullptr), nativeFunction(nativeFunction) {}
void execute(Context& cxt, Args& args) {
// TODO implement AST executer
nativeFunction(cxt, args);
}
};
class BuiltinFunctions {
public:
static void print(Context& cxt, Args& args) {
for (int i=0; i<args.args.size(); i++) {
std::any& var = args.args[i];
if ( var.type() == typeid(long) ) {
std::cout << any_cast<long>(var) << '\t';
} else {
cxt.error("Attempt to print unsupported type");
}
}
std::cout << std::endl;
}
};
std::any eval(Context &cxt, peg::Ast& ast) {
const auto &nodes = ast.nodes;
if (ast.name == "Root") {
for (int i=0; i<ast.nodes.size(); i++) {
eval(cxt, *nodes[i]);
}
return Nil();
} else if (ast.name == "VarDecl") {
auto identifier = any_cast<Identifier>(eval(cxt, *nodes[0]));
auto value = eval(cxt, *nodes[1]);
cxt.currentScope->writeVar(cxt, identifier.identifier, value);
return Nil();
} else if (ast.name == "IDENTIFIER" || ast.name == "BUILTINIDENTIFIER") {
return Identifier(ast.token_to_string());
} else if (ast.name == "INTEGER") {
// TODO parse hex, oct, and binary
return ast.token_to_number<long>();
} else if (ast.name == "BUILTINCALL" ) {
auto identifier = any_cast<Identifier>(eval(cxt, *nodes[0]));
auto args = Args::toArgs(cxt, eval(cxt, *nodes[1]));
cxt.builtins.at(identifier.identifier).execute(cxt, args);
}
std::cout << "AST Name: " << ast.name << std::endl;
for (int i=0; i<ast.nodes.size(); i++) {
std::cout << "AST child: " << ast.name << " : ";
eval(cxt, *nodes[i]);
}
return Nil();
}
int main(void) {
peg::parser parser(
#include "grammar.peg"
);
std::string s = R"(
var a = 10;
@print(a);
)";
parser.enable_ast();
std::shared_ptr<peg::Ast> ast;
if (parser.parse(s, ast)) {
ast = parser.optimize_ast(ast);
std::cout << ast_to_s(ast) << std::endl;
Context cxt;
// init new scope
std::shared_ptr<Scope> scope(new Scope(nullptr));
// init builtins
cxt.builtins.emplace("print", Func(BuiltinFunctions::print));
cxt.currentScope = scope;
eval(cxt, *ast);
return 0;
}
std::cout << "syntax error..." << std::endl;
return 1;
} }