This commit is contained in:
zuckerberg 2021-07-08 19:05:04 -06:00
parent 7789e28631
commit abd7bcda7f
2 changed files with 310 additions and 110 deletions

View File

@ -1,8 +1,10 @@
R"===(
Root <- skip Statement* eof
Fn <- KEYWORD_fn LPAREN ParamDeclList RPAREN Block
VarDecl <- IDENTIFIER (EQUAL Expr)? SEMICOLON
VarDecl <- KEYWORD_var IDENTIFIER (EQUAL Expr)? SEMICOLON
# *** Block Level ***
Statement
@ -73,7 +75,7 @@ SuffixExpr
<- PrimaryTypeExpr (SuffixOp / FnCallArguments)*
PrimaryTypeExpr
<- BUILTINIDENTIFIER FnCallArguments
<- BUILTINCALL
/ CHAR_LITERAL
/ DOT IDENTIFIER
/ FLOAT
@ -86,6 +88,8 @@ PrimaryTypeExpr
/ KEYWORD_true
/ STRINGLITERAL
BUILTINCALL <- BUILTINIDENTIFIER FnCallArguments
GroupedExpr <- LPAREN Expr RPAREN
# *** Helper grammar ***
@ -93,12 +97,10 @@ BreakLabel <- COLON IDENTIFIER
BlockLabel <- IDENTIFIER COLON
WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN
# Control flow prefixes
IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload?
WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr?
WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload?
ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload
@ -141,7 +143,6 @@ BitShiftOp
AdditionOp
<- PLUS
/ MINUS
/ PLUS2
MultiplyOp
<- ASTERISK
@ -168,15 +169,13 @@ ParamDeclList <- (IDENTIFIER COMMA)* IDENTIFIER?
# *** Tokens ***
INTEGER
<- "0b" bin_int skip
/ "0o" oct_int skip
/ "0x" hex_int skip
/ dec_int skip
<- "0b" < bin_int > skip
/ "0o" < oct_int > skip
/ "0x" < hex_int > skip
/ < dec_int > skip
IDENTIFIER
<- !keyword [A-Za-z_] [A-Za-z0-9_]* skip
/ "@\"" string_char* "\"" skip
BUILTINIDENTIFIER <- "@"[A-Za-z_][A-Za-z0-9_]* skip
IDENTIFIER <- !keyword < [A-Za-z_] [A-Za-z0-9_]* > skip
BUILTINIDENTIFIER <- "@" < [A-Za-z_][A-Za-z0-9_]* > skip
CHAR_LITERAL <- "'" char_char "'" skip
FLOAT
@ -190,38 +189,38 @@ STRINGLITERAL
/ (line_string skip)+
STRINGLITERALSINGLE <- "\"" string_char* "\"" skip
eof <- !.
bin <- [01]
bin_ <- '_'? bin
oct <- [0-7]
oct_ <- '_'? oct
hex <- [0-9a-fA-F]
hex_ <- '_'? hex
dec <- [0-9]
dec_ <- '_'? dec
~bin_int <- bin bin_*
~oct_int <- oct oct_*
~dec_int <- dec dec_*
~hex_int <- hex hex_*
bin_int <- bin bin_*
oct_int <- oct oct_*
dec_int <- dec dec_*
hex_int <- hex hex_*
~eof <- !.
~bin <- [01]
~bin_ <- '_'? bin
~oct <- [0-7]
~oct_ <- '_'? oct
~hex <- [0-9a-fA-F]
~hex_ <- '_'? hex
~dec <- [0-9]
~dec_ <- '_'? dec
ox80_oxBF <- [\200-\277]
oxF4 <- '\364'
ox80_ox8F <- [\200-\217]
oxF1_oxF3 <- [\361-\363]
oxF0 <- '\360'
ox90_0xBF <- [\220-\277]
oxEE_oxEF <- [\356-\357]
oxED <- '\355'
ox80_ox9F <- [\200-\237]
oxE1_oxEC <- [\341-\354]
oxE0 <- '\340'
oxA0_oxBF <- [\240-\277]
oxC2_oxDF <- [\302-\337]
~ox80_oxBF <- [\200-\277]
~oxF4 <- '\364'
~ox80_ox8F <- [\200-\217]
~oxF1_oxF3 <- [\361-\363]
~oxF0 <- '\360'
~ox90_0xBF <- [\220-\277]
~oxEE_oxEF <- [\356-\357]
~oxED <- '\355'
~ox80_ox9F <- [\200-\237]
~oxE1_oxEC <- [\341-\354]
~oxE0 <- '\340'
~oxA0_oxBF <- [\240-\277]
~oxC2_oxDF <- [\302-\337]
# From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
mb_utf8_literal <-
~mb_utf8_literal <-
oxF4 ox80_ox8F ox80_oxBF ox80_oxBF
/ oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF
/ oxF0 ox90_0xBF ox80_oxBF ox80_oxBF
@ -231,84 +230,84 @@ mb_utf8_literal <-
/ oxE0 oxA0_oxBF ox80_oxBF
/ oxC2_oxDF ox80_oxBF
ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177]
~ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177]
char_escape
~char_escape
<- "\\x" hex hex
/ "\\u{" hex+ "}"
/ "\\" [nr\\t'"]
char_char
~char_char
<- mb_utf8_literal
/ char_escape
/ ascii_char_not_nl_slash_squote
string_char
~string_char
<- char_escape
/ [^\\"\n]
line_comment <- '//' ![!/][^\n]* / '////' [^\n]*
line_string <- ("\\\\" [^\n]* [ \n]*)+
skip <- ([ \n] / line_comment)*
~line_comment <- '//' ![!/][^\n]* / '////' [^\n]*
~line_string <- ("\\\\" [^\n]* [ \n]*)+
~skip <- ([ \n] / line_comment)*
AMPERSAND <- '&' ![=] skip
AMPERSANDEQUAL <- '&=' skip
ASTERISK <- '*' ![*%=] skip
ASTERISK2 <- '**' skip
ASTERISKEQUAL <- '*=' skip
ASTERISKPERCENT <- '*%' ![=] skip
ASTERISKPERCENTEQUAL <- '*%=' skip
CARET <- '^' ![=] skip
CARETEQUAL <- '^=' skip
COLON <- ':' skip
COMMA <- ',' skip
DOT <- '.' ![*.?] skip
EQUAL <- '=' ![>=] skip
EQUALEQUAL <- '==' skip
EXCLAMATIONMARK <- '!' ![=] skip
EXCLAMATIONMARKEQUAL <- '!=' skip
LARROW <- '<' ![<=] skip
LARROW2 <- '<<' skip
LARROWEQUAL <- '<=' skip
LBRACE <- '{' skip
LBRACKET <- '[' skip
LPAREN <- '(' skip
MINUS <- '-' ![=>] skip
MINUSEQUAL <- '-=' skip
PERCENT <- '%' ![=] skip
PERCENTEQUAL <- '%=' skip
PIPE <- '|' ![|=] skip
PIPEEQUAL <- '|=' skip
PLUS <- '+' ![+=] skip
PLUS2 <- '++' skip
PLUSEQUAL <- '+=' skip
RARROW <- '>' skip
RARROW2 <- '>>' skip
RARROWEQUAL <- '>=' skip
RBRACE <- '}' skip
RBRACKET <- ']' skip
RPAREN <- ')' skip
SEMICOLON <- ';' skip
SLASH <- '/' ![=] skip
~AMPERSAND <- '&' ![=] skip
~AMPERSANDEQUAL <- '&=' skip
~ASTERISK <- '*' ![*%=] skip
~ASTERISK2 <- '**' skip
~ASTERISKEQUAL <- '*=' skip
~ASTERISKPERCENT <- '*%' ![=] skip
~ASTERISKPERCENTEQUAL <- '*%=' skip
~CARET <- '^' ![=] skip
~CARETEQUAL <- '^=' skip
~COLON <- ':' skip
~COMMA <- ',' skip
~DOT <- '.' ![*.?] skip
~EQUAL <- '=' ![>=] skip
~EQUALEQUAL <- '==' skip
~EXCLAMATIONMARK <- '!' ![=] skip
~EXCLAMATIONMARKEQUAL <- '!=' skip
~LARROW <- '<' ![<=] skip
~LARROW2 <- '<<' skip
~LARROWEQUAL <- '<=' skip
~LBRACE <- '{' skip
~LBRACKET <- '[' skip
~LPAREN <- '(' skip
~MINUS <- '-' ![=>] skip
~MINUSEQUAL <- '-=' skip
~PERCENT <- '%' ![=] skip
~PERCENTEQUAL <- '%=' skip
~PIPE <- '|' ![|=] skip
~PIPEEQUAL <- '|=' skip
~PLUS <- '+' ![+=] skip
~PLUSEQUAL <- '+=' skip
~RARROW <- '>' skip
~RARROW2 <- '>>' skip
~RARROWEQUAL <- '>=' skip
~RBRACE <- '}' skip
~RBRACKET <- ']' skip
~RPAREN <- ')' skip
~SEMICOLON <- ';' skip
~SLASH <- '/' ![=] skip
end_of_word <- ![a-zA-Z0-9_] skip
KEYWORD_and <- 'and' end_of_word
KEYWORD_break <- 'break' end_of_word
KEYWORD_continue <- 'continue' end_of_word
KEYWORD_else <- 'else' end_of_word
KEYWORD_export <- 'export' end_of_word
KEYWORD_false <- 'false' end_of_word
KEYWORD_fn <- 'fn' end_of_word
KEYWORD_for <- 'for' end_of_word
KEYWORD_if <- 'if' end_of_word
KEYWORD_nil <- 'nil' end_of_word
KEYWORD_or <- 'or' end_of_word
KEYWORD_return <- 'return' end_of_word
KEYWORD_test <- 'test' end_of_word
KEYWORD_true <- 'true' end_of_word
KEYWORD_while <- 'while' end_of_word
~end_of_word <- ![a-zA-Z0-9_] skip
~KEYWORD_and <- 'and' end_of_word
~KEYWORD_break <- 'break' end_of_word
~KEYWORD_continue <- 'continue' end_of_word
~KEYWORD_else <- 'else' end_of_word
~KEYWORD_export <- 'export' end_of_word
~KEYWORD_false <- 'false' end_of_word
~KEYWORD_fn <- 'fn' end_of_word
~KEYWORD_for <- 'for' end_of_word
~KEYWORD_if <- 'if' end_of_word
~KEYWORD_nil <- 'nil' end_of_word
~KEYWORD_or <- 'or' end_of_word
~KEYWORD_return <- 'return' end_of_word
~KEYWORD_test <- 'test' end_of_word
~KEYWORD_true <- 'true' end_of_word
~KEYWORD_var <- 'var' end_of_word
~KEYWORD_while <- 'while' end_of_word
keyword <- KEYWORD_and
~keyword <- KEYWORD_and
/ KEYWORD_break
/ KEYWORD_continue / KEYWORD_else
/ KEYWORD_export
@ -316,4 +315,6 @@ keyword <- KEYWORD_and
/ KEYWORD_nil / KEYWORD_or
/ KEYWORD_return
/ KEYWORD_test / KEYWORD_true
/ KEYWORD_while
/ KEYWORD_var / KEYWORD_while
)==="

View File

@ -1,13 +1,15 @@
#include <cpp-peglib/peglib.h>
#include <assert.h>
#include <iostream>
#include <unordered_map>
#include <exception>
using namespace peg;
using namespace std;
using peg::SemanticValues;
using std::any_cast;
int main(void) {
int main_old(void) {
// (2) Make a parser
parser parser(R"(
peg::parser parser(R"(
# Grammar for Calculator...
Additive <- Multitive '+' Additive / Multitive
Multitive <- Primary '*' Multitive / Primary
@ -49,5 +51,202 @@ int main(void) {
assert(val == 9);
std::cout << "hello" << std::endl;
return 0;
}
class Func;
class Scope;
class Identifier;
// represents the 'nil' type
class Nil {
};
class Context {
public:
std::unordered_map<std::string_view, Func> builtins;
std::shared_ptr<Scope> currentScope;
public:
void error(std::string_view msg) {
std::cout << "Error! " << msg << std::endl;
}
};
class Identifier {
public:
std::string identifier;
Identifier(std::string id) : identifier(id) {}
};
// table, function, number, string
// enum Type {
// Type_Table,
// Type_Function,
// Type_Number,
// Type_String,
// };
// Scope - parent scope, local variables, return handler, continue handler, break handler
class Scope {
public:
std::shared_ptr<Scope> parentScope;
std::unordered_map<std::string_view, std::any&> vars;
// std::function<bool(Context&,Args&)> returnHandler;
// std::function<bool(Context&)> breakHandler;
// std::function<bool(Context&)> continueHandler;
public:
Scope(std::shared_ptr<Scope> parentScope) : parentScope(parentScope) {
}
bool varExists(Context& ctx, std::string_view name) {
if (vars.find(name) == vars.end()) {
if (parentScope.get() == nullptr) {
return false;
} else {
return parentScope.get()->varExists(ctx, name);
}
} else {
return true;
}
}
/**
* Looks up var and creates it in the current scope if it doesn't exist
*/
std::any& getVar(Context& cxt, std::string_view name) {
if (vars.find(name) == vars.end()) {
if (varExists(cxt, name)) {
return parentScope.get()->getVar(cxt, name);
} else {
// doesn't exist at all
throw new std::exception();
}
}
return vars.at(name);
}
void writeVar(Context& cxt, std::string_view name, std::any& var) {
if (vars.find(name) == vars.end()) {
if (varExists(cxt, name)) {
parentScope.get()->writeVar(cxt, name, var);
}
}
vars.insert_or_assign(name, var);
}
};
/**
* Arguments passed to a function and also results returned from a function
*/
class Args {
public:
std::vector<std::any> args;
static Args toArgs(Context& cxt, std::any input) {
Args args;
if (input.type() == typeid(Identifier)) {
auto id = any_cast<Identifier>(input);
auto value = cxt.currentScope->getVar(cxt, id.identifier);
args.args.push_back(value);
}
return args;
}
};
// Function - captured scope, ast ptr OR native function
class Func {
public:
std::shared_ptr<Scope> capturedScope;
// ast ptr
std::function<void(Context&,Args&)> nativeFunction;
public:
Func(std::function<void(Context&,Args&)> nativeFunction)
: capturedScope(nullptr), nativeFunction(nativeFunction) {}
void execute(Context& cxt, Args& args) {
// TODO implement AST executer
nativeFunction(cxt, args);
}
};
class BuiltinFunctions {
public:
static void print(Context& cxt, Args& args) {
for (int i=0; i<args.args.size(); i++) {
std::any& var = args.args[i];
if ( var.type() == typeid(long) ) {
std::cout << any_cast<long>(var) << '\t';
} else {
cxt.error("Attempt to print unsupported type");
}
}
std::cout << std::endl;
}
};
std::any eval(Context &cxt, peg::Ast& ast) {
const auto &nodes = ast.nodes;
if (ast.name == "Root") {
for (int i=0; i<ast.nodes.size(); i++) {
eval(cxt, *nodes[i]);
}
return Nil();
} else if (ast.name == "VarDecl") {
auto identifier = any_cast<Identifier>(eval(cxt, *nodes[0]));
auto value = eval(cxt, *nodes[1]);
cxt.currentScope->writeVar(cxt, identifier.identifier, value);
return Nil();
} else if (ast.name == "IDENTIFIER" || ast.name == "BUILTINIDENTIFIER") {
return Identifier(ast.token_to_string());
} else if (ast.name == "INTEGER") {
// TODO parse hex, oct, and binary
return ast.token_to_number<long>();
} else if (ast.name == "BUILTINCALL" ) {
auto identifier = any_cast<Identifier>(eval(cxt, *nodes[0]));
auto args = Args::toArgs(cxt, eval(cxt, *nodes[1]));
cxt.builtins.at(identifier.identifier).execute(cxt, args);
}
std::cout << "AST Name: " << ast.name << std::endl;
for (int i=0; i<ast.nodes.size(); i++) {
std::cout << "AST child: " << ast.name << " : ";
eval(cxt, *nodes[i]);
}
return Nil();
}
int main(void) {
peg::parser parser(
#include "grammar.peg"
);
std::string s = R"(
var a = 10;
@print(a);
)";
parser.enable_ast();
std::shared_ptr<peg::Ast> ast;
if (parser.parse(s, ast)) {
ast = parser.optimize_ast(ast);
std::cout << ast_to_s(ast) << std::endl;
Context cxt;
// init new scope
std::shared_ptr<Scope> scope(new Scope(nullptr));
// init builtins
cxt.builtins.emplace("print", Func(BuiltinFunctions::print));
cxt.currentScope = scope;
eval(cxt, *ast);
return 0;
}
std::cout << "syntax error..." << std::endl;
return 1;
}