diff --git a/src/grammar.peg b/src/grammar.peg index 83d461f..1ac5ab6 100644 --- a/src/grammar.peg +++ b/src/grammar.peg @@ -1,8 +1,10 @@ +R"===( + Root <- skip Statement* eof Fn <- KEYWORD_fn LPAREN ParamDeclList RPAREN Block -VarDecl <- IDENTIFIER (EQUAL Expr)? SEMICOLON +VarDecl <- KEYWORD_var IDENTIFIER (EQUAL Expr)? SEMICOLON # *** Block Level *** Statement @@ -73,7 +75,7 @@ SuffixExpr <- PrimaryTypeExpr (SuffixOp / FnCallArguments)* PrimaryTypeExpr - <- BUILTINIDENTIFIER FnCallArguments + <- BUILTINCALL / CHAR_LITERAL / DOT IDENTIFIER / FLOAT @@ -86,6 +88,8 @@ PrimaryTypeExpr / KEYWORD_true / STRINGLITERAL +BUILTINCALL <- BUILTINIDENTIFIER FnCallArguments + GroupedExpr <- LPAREN Expr RPAREN # *** Helper grammar *** @@ -93,12 +97,10 @@ BreakLabel <- COLON IDENTIFIER BlockLabel <- IDENTIFIER COLON -WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN - # Control flow prefixes IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload? -WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? +WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload @@ -141,7 +143,6 @@ BitShiftOp AdditionOp <- PLUS / MINUS - / PLUS2 MultiplyOp <- ASTERISK @@ -168,15 +169,13 @@ ParamDeclList <- (IDENTIFIER COMMA)* IDENTIFIER? # *** Tokens *** INTEGER - <- "0b" bin_int skip - / "0o" oct_int skip - / "0x" hex_int skip - / dec_int skip + <- "0b" < bin_int > skip + / "0o" < oct_int > skip + / "0x" < hex_int > skip + / < dec_int > skip -IDENTIFIER - <- !keyword [A-Za-z_] [A-Za-z0-9_]* skip - / "@\"" string_char* "\"" skip -BUILTINIDENTIFIER <- "@"[A-Za-z_][A-Za-z0-9_]* skip +IDENTIFIER <- !keyword < [A-Za-z_] [A-Za-z0-9_]* > skip +BUILTINIDENTIFIER <- "@" < [A-Za-z_][A-Za-z0-9_]* > skip CHAR_LITERAL <- "'" char_char "'" skip FLOAT @@ -190,38 +189,38 @@ STRINGLITERAL / (line_string skip)+ STRINGLITERALSINGLE <- "\"" string_char* "\"" skip -eof <- !. -bin <- [01] -bin_ <- '_'? bin -oct <- [0-7] -oct_ <- '_'? oct -hex <- [0-9a-fA-F] -hex_ <- '_'? hex -dec <- [0-9] -dec_ <- '_'? dec +~bin_int <- bin bin_* +~oct_int <- oct oct_* +~dec_int <- dec dec_* +~hex_int <- hex hex_* -bin_int <- bin bin_* -oct_int <- oct oct_* -dec_int <- dec dec_* -hex_int <- hex hex_* +~eof <- !. +~bin <- [01] +~bin_ <- '_'? bin +~oct <- [0-7] +~oct_ <- '_'? oct +~hex <- [0-9a-fA-F] +~hex_ <- '_'? hex +~dec <- [0-9] +~dec_ <- '_'? dec -ox80_oxBF <- [\200-\277] -oxF4 <- '\364' -ox80_ox8F <- [\200-\217] -oxF1_oxF3 <- [\361-\363] -oxF0 <- '\360' -ox90_0xBF <- [\220-\277] -oxEE_oxEF <- [\356-\357] -oxED <- '\355' -ox80_ox9F <- [\200-\237] -oxE1_oxEC <- [\341-\354] -oxE0 <- '\340' -oxA0_oxBF <- [\240-\277] -oxC2_oxDF <- [\302-\337] +~ox80_oxBF <- [\200-\277] +~oxF4 <- '\364' +~ox80_ox8F <- [\200-\217] +~oxF1_oxF3 <- [\361-\363] +~oxF0 <- '\360' +~ox90_0xBF <- [\220-\277] +~oxEE_oxEF <- [\356-\357] +~oxED <- '\355' +~ox80_ox9F <- [\200-\237] +~oxE1_oxEC <- [\341-\354] +~oxE0 <- '\340' +~oxA0_oxBF <- [\240-\277] +~oxC2_oxDF <- [\302-\337] # From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ -mb_utf8_literal <- +~mb_utf8_literal <- oxF4 ox80_ox8F ox80_oxBF ox80_oxBF / oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF / oxF0 ox90_0xBF ox80_oxBF ox80_oxBF @@ -231,84 +230,84 @@ mb_utf8_literal <- / oxE0 oxA0_oxBF ox80_oxBF / oxC2_oxDF ox80_oxBF -ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177] +~ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177] -char_escape +~char_escape <- "\\x" hex hex / "\\u{" hex+ "}" / "\\" [nr\\t'"] -char_char +~char_char <- mb_utf8_literal / char_escape / ascii_char_not_nl_slash_squote -string_char +~string_char <- char_escape / [^\\"\n] -line_comment <- '//' ![!/][^\n]* / '////' [^\n]* -line_string <- ("\\\\" [^\n]* [ \n]*)+ -skip <- ([ \n] / line_comment)* +~line_comment <- '//' ![!/][^\n]* / '////' [^\n]* +~line_string <- ("\\\\" [^\n]* [ \n]*)+ +~skip <- ([ \n] / line_comment)* -AMPERSAND <- '&' ![=] skip -AMPERSANDEQUAL <- '&=' skip -ASTERISK <- '*' ![*%=] skip -ASTERISK2 <- '**' skip -ASTERISKEQUAL <- '*=' skip -ASTERISKPERCENT <- '*%' ![=] skip -ASTERISKPERCENTEQUAL <- '*%=' skip -CARET <- '^' ![=] skip -CARETEQUAL <- '^=' skip -COLON <- ':' skip -COMMA <- ',' skip -DOT <- '.' ![*.?] skip -EQUAL <- '=' ![>=] skip -EQUALEQUAL <- '==' skip -EXCLAMATIONMARK <- '!' ![=] skip -EXCLAMATIONMARKEQUAL <- '!=' skip -LARROW <- '<' ![<=] skip -LARROW2 <- '<<' skip -LARROWEQUAL <- '<=' skip -LBRACE <- '{' skip -LBRACKET <- '[' skip -LPAREN <- '(' skip -MINUS <- '-' ![=>] skip -MINUSEQUAL <- '-=' skip -PERCENT <- '%' ![=] skip -PERCENTEQUAL <- '%=' skip -PIPE <- '|' ![|=] skip -PIPEEQUAL <- '|=' skip -PLUS <- '+' ![+=] skip -PLUS2 <- '++' skip -PLUSEQUAL <- '+=' skip -RARROW <- '>' skip -RARROW2 <- '>>' skip -RARROWEQUAL <- '>=' skip -RBRACE <- '}' skip -RBRACKET <- ']' skip -RPAREN <- ')' skip -SEMICOLON <- ';' skip -SLASH <- '/' ![=] skip +~AMPERSAND <- '&' ![=] skip +~AMPERSANDEQUAL <- '&=' skip +~ASTERISK <- '*' ![*%=] skip +~ASTERISK2 <- '**' skip +~ASTERISKEQUAL <- '*=' skip +~ASTERISKPERCENT <- '*%' ![=] skip +~ASTERISKPERCENTEQUAL <- '*%=' skip +~CARET <- '^' ![=] skip +~CARETEQUAL <- '^=' skip +~COLON <- ':' skip +~COMMA <- ',' skip +~DOT <- '.' ![*.?] skip +~EQUAL <- '=' ![>=] skip +~EQUALEQUAL <- '==' skip +~EXCLAMATIONMARK <- '!' ![=] skip +~EXCLAMATIONMARKEQUAL <- '!=' skip +~LARROW <- '<' ![<=] skip +~LARROW2 <- '<<' skip +~LARROWEQUAL <- '<=' skip +~LBRACE <- '{' skip +~LBRACKET <- '[' skip +~LPAREN <- '(' skip +~MINUS <- '-' ![=>] skip +~MINUSEQUAL <- '-=' skip +~PERCENT <- '%' ![=] skip +~PERCENTEQUAL <- '%=' skip +~PIPE <- '|' ![|=] skip +~PIPEEQUAL <- '|=' skip +~PLUS <- '+' ![+=] skip +~PLUSEQUAL <- '+=' skip +~RARROW <- '>' skip +~RARROW2 <- '>>' skip +~RARROWEQUAL <- '>=' skip +~RBRACE <- '}' skip +~RBRACKET <- ']' skip +~RPAREN <- ')' skip +~SEMICOLON <- ';' skip +~SLASH <- '/' ![=] skip -end_of_word <- ![a-zA-Z0-9_] skip -KEYWORD_and <- 'and' end_of_word -KEYWORD_break <- 'break' end_of_word -KEYWORD_continue <- 'continue' end_of_word -KEYWORD_else <- 'else' end_of_word -KEYWORD_export <- 'export' end_of_word -KEYWORD_false <- 'false' end_of_word -KEYWORD_fn <- 'fn' end_of_word -KEYWORD_for <- 'for' end_of_word -KEYWORD_if <- 'if' end_of_word -KEYWORD_nil <- 'nil' end_of_word -KEYWORD_or <- 'or' end_of_word -KEYWORD_return <- 'return' end_of_word -KEYWORD_test <- 'test' end_of_word -KEYWORD_true <- 'true' end_of_word -KEYWORD_while <- 'while' end_of_word +~end_of_word <- ![a-zA-Z0-9_] skip +~KEYWORD_and <- 'and' end_of_word +~KEYWORD_break <- 'break' end_of_word +~KEYWORD_continue <- 'continue' end_of_word +~KEYWORD_else <- 'else' end_of_word +~KEYWORD_export <- 'export' end_of_word +~KEYWORD_false <- 'false' end_of_word +~KEYWORD_fn <- 'fn' end_of_word +~KEYWORD_for <- 'for' end_of_word +~KEYWORD_if <- 'if' end_of_word +~KEYWORD_nil <- 'nil' end_of_word +~KEYWORD_or <- 'or' end_of_word +~KEYWORD_return <- 'return' end_of_word +~KEYWORD_test <- 'test' end_of_word +~KEYWORD_true <- 'true' end_of_word +~KEYWORD_var <- 'var' end_of_word +~KEYWORD_while <- 'while' end_of_word -keyword <- KEYWORD_and +~keyword <- KEYWORD_and / KEYWORD_break / KEYWORD_continue / KEYWORD_else / KEYWORD_export @@ -316,4 +315,6 @@ keyword <- KEYWORD_and / KEYWORD_nil / KEYWORD_or / KEYWORD_return / KEYWORD_test / KEYWORD_true - / KEYWORD_while \ No newline at end of file + / KEYWORD_var / KEYWORD_while + +)===" \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp index fca8d2c..480d2cc 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,13 +1,15 @@ #include #include #include +#include +#include -using namespace peg; -using namespace std; +using peg::SemanticValues; +using std::any_cast; -int main(void) { +int main_old(void) { // (2) Make a parser - parser parser(R"( + peg::parser parser(R"( # Grammar for Calculator... Additive <- Multitive '+' Additive / Multitive Multitive <- Primary '*' Multitive / Primary @@ -49,5 +51,202 @@ int main(void) { assert(val == 9); - std::cout << "hello" << std::endl; + return 0; +} + +class Func; +class Scope; +class Identifier; + +// represents the 'nil' type +class Nil { +}; + +class Context { +public: + std::unordered_map builtins; + std::shared_ptr currentScope; +public: + void error(std::string_view msg) { + std::cout << "Error! " << msg << std::endl; + } +}; + +class Identifier { +public: + std::string identifier; + Identifier(std::string id) : identifier(id) {} +}; + +// table, function, number, string +// enum Type { +// Type_Table, +// Type_Function, +// Type_Number, +// Type_String, +// }; + +// Scope - parent scope, local variables, return handler, continue handler, break handler +class Scope { +public: + std::shared_ptr parentScope; + std::unordered_map vars; + // std::function returnHandler; + // std::function breakHandler; + // std::function continueHandler; + +public: + Scope(std::shared_ptr parentScope) : parentScope(parentScope) { + } + + bool varExists(Context& ctx, std::string_view name) { + if (vars.find(name) == vars.end()) { + if (parentScope.get() == nullptr) { + return false; + } else { + return parentScope.get()->varExists(ctx, name); + } + } else { + return true; + } + } + + /** + * Looks up var and creates it in the current scope if it doesn't exist + */ + std::any& getVar(Context& cxt, std::string_view name) { + if (vars.find(name) == vars.end()) { + if (varExists(cxt, name)) { + return parentScope.get()->getVar(cxt, name); + } else { + // doesn't exist at all + throw new std::exception(); + } + } + return vars.at(name); + } + + void writeVar(Context& cxt, std::string_view name, std::any& var) { + if (vars.find(name) == vars.end()) { + if (varExists(cxt, name)) { + parentScope.get()->writeVar(cxt, name, var); + } + } + vars.insert_or_assign(name, var); + } +}; + +/** + * Arguments passed to a function and also results returned from a function + */ +class Args { +public: + std::vector args; + static Args toArgs(Context& cxt, std::any input) { + Args args; + if (input.type() == typeid(Identifier)) { + auto id = any_cast(input); + auto value = cxt.currentScope->getVar(cxt, id.identifier); + args.args.push_back(value); + } + return args; + } +}; + +// Function - captured scope, ast ptr OR native function +class Func { +public: + std::shared_ptr capturedScope; + // ast ptr + std::function nativeFunction; + +public: + Func(std::function nativeFunction) + : capturedScope(nullptr), nativeFunction(nativeFunction) {} + + void execute(Context& cxt, Args& args) { + // TODO implement AST executer + nativeFunction(cxt, args); + } +}; + +class BuiltinFunctions { +public: + static void print(Context& cxt, Args& args) { + for (int i=0; i(var) << '\t'; + } else { + cxt.error("Attempt to print unsupported type"); + } + } + std::cout << std::endl; + } +}; + +std::any eval(Context &cxt, peg::Ast& ast) { + const auto &nodes = ast.nodes; + if (ast.name == "Root") { + for (int i=0; i(eval(cxt, *nodes[0])); + auto value = eval(cxt, *nodes[1]); + cxt.currentScope->writeVar(cxt, identifier.identifier, value); + return Nil(); + } else if (ast.name == "IDENTIFIER" || ast.name == "BUILTINIDENTIFIER") { + return Identifier(ast.token_to_string()); + } else if (ast.name == "INTEGER") { + // TODO parse hex, oct, and binary + return ast.token_to_number(); + } else if (ast.name == "BUILTINCALL" ) { + auto identifier = any_cast(eval(cxt, *nodes[0])); + auto args = Args::toArgs(cxt, eval(cxt, *nodes[1])); + cxt.builtins.at(identifier.identifier).execute(cxt, args); + } + + std::cout << "AST Name: " << ast.name << std::endl; + for (int i=0; i ast; + + if (parser.parse(s, ast)) { + ast = parser.optimize_ast(ast); + std::cout << ast_to_s(ast) << std::endl; + + Context cxt; + + // init new scope + std::shared_ptr scope(new Scope(nullptr)); + + // init builtins + cxt.builtins.emplace("print", Func(BuiltinFunctions::print)); + cxt.currentScope = scope; + + eval(cxt, *ast); + + return 0; + } + + std::cout << "syntax error..." << std::endl; + + return 1; } \ No newline at end of file