commit 7789e2863115fc06a01c329574f692dfab0f1016 Author: zuckerberg Date: Wed Jul 7 15:52:42 2021 -0400 Initial grammar diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1899660 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +build +.vscode \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..86a9430 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,24 @@ +cmake_minimum_required(VERSION 3.7) + +project(dpro) + +set(THIRD_PARTY_INCLUDE_DIRS third_party) +set(SRC_DIRS src) + +set(CMAKE_CXX_STANDARD 17) +file(GLOB SRCS ${SRC_DIRS}/*.cpp ${SRC_DIRS}/*.h) + +add_executable(${CMAKE_PROJECT_NAME} ${SRCS}) + +# find_package(GLEW REQUIRED) +# find_package(SDL2 REQUIRED) + +set(THREADS_PREFER_PTHREAD_FLAG ON) +find_package(Threads) + +if(CMAKE_SYSTEM_NAME STREQUAL "Linux") + set(add_link_deps Threads::Threads) +endif() + +include_directories(${THIRD_PARTY_INCLUDE_DIRS}) +target_link_libraries(${CMAKE_PROJECT_NAME} ${add_link_deps}) \ No newline at end of file diff --git a/default.nix b/default.nix new file mode 100644 index 0000000..84b3888 --- /dev/null +++ b/default.nix @@ -0,0 +1,25 @@ +{ pkgs ? import { } }: +pkgs.stdenv.mkDerivation rec { + pname = "dpro"; + version = "0.1.0"; + + src = ./.; + + buildInputs = [ + pkgs.cmake + pkgs.gdb + ]; + + configurePhase = '' + cmake . + ''; + + buildPhase = '' + make + ''; + + installPhase = '' + mkdir -p $out/bin + mv dpro $out/bin + ''; +} diff --git a/src/grammar.peg b/src/grammar.peg new file mode 100644 index 0000000..83d461f --- /dev/null +++ b/src/grammar.peg @@ -0,0 +1,319 @@ +Root <- skip Statement* eof + +Fn <- KEYWORD_fn LPAREN ParamDeclList RPAREN Block + +VarDecl <- IDENTIFIER (EQUAL Expr)? SEMICOLON + +# *** Block Level *** +Statement + <- VarDecl + / IfStatement + / LabeledStatement + / AssignExpr SEMICOLON + +IfStatement + <- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )? + / IfPrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) + +LabeledStatement <- BlockLabel? (Block / LoopStatement) + +LoopStatement <- ForStatement / WhileStatement + +ForStatement + <- ForPrefix BlockExpr ( KEYWORD_else Statement )? + / ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement ) + +WhileStatement + <- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )? + / WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement ) + +BlockExpr <- BlockLabel? Block + +# *** Expression Level *** +AssignExpr <- Expr (AssignOp Expr)? + +Expr <- BoolOrExpr + +BoolOrExpr <- BoolAndExpr (KEYWORD_or BoolAndExpr)* + +BoolAndExpr <- CompareExpr (KEYWORD_and CompareExpr)* + +CompareExpr <- BitwiseExpr (CompareOp BitwiseExpr)? + +BitwiseExpr <- BitShiftExpr (BitwiseOp BitShiftExpr)* + +BitShiftExpr <- AdditionExpr (BitShiftOp AdditionExpr)* + +AdditionExpr <- MultiplyExpr (AdditionOp MultiplyExpr)* + +MultiplyExpr <- PrefixExpr (MultiplyOp PrefixExpr)* + +PrefixExpr <- PrefixOp* PrimaryExpr + +PrimaryExpr + <- IfExpr + / KEYWORD_break BreakLabel? Expr? + / KEYWORD_continue BreakLabel? + / KEYWORD_return Expr? + / BlockLabel? LoopExpr + / Block + / SuffixExpr + +IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)? + +Block <- LBRACE Statement* RBRACE + +LoopExpr <- ForExpr / WhileExpr + +ForExpr <- ForPrefix Expr (KEYWORD_else Expr)? + +WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)? + +SuffixExpr + <- PrimaryTypeExpr (SuffixOp / FnCallArguments)* + +PrimaryTypeExpr + <- BUILTINIDENTIFIER FnCallArguments + / CHAR_LITERAL + / DOT IDENTIFIER + / FLOAT + / Fn + / GroupedExpr + / IDENTIFIER + / INTEGER + / KEYWORD_false + / KEYWORD_nil + / KEYWORD_true + / STRINGLITERAL + +GroupedExpr <- LPAREN Expr RPAREN + +# *** Helper grammar *** +BreakLabel <- COLON IDENTIFIER + +BlockLabel <- IDENTIFIER COLON + +WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN + +# Control flow prefixes +IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload? + +WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr? + +ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload + +# Payloads +Payload <- PIPE IDENTIFIER PIPE + +PtrPayload <- PIPE ASTERISK? IDENTIFIER PIPE + +PtrIndexPayload <- PIPE ASTERISK? IDENTIFIER (COMMA IDENTIFIER)? PIPE + +# Operators +AssignOp + <- ASTERISKEQUAL + / PERCENTEQUAL + / PLUSEQUAL + / MINUSEQUAL + / AMPERSANDEQUAL + / CARETEQUAL + / PIPEEQUAL + / ASTERISKPERCENTEQUAL + / EQUAL + +CompareOp + <- EQUALEQUAL + / EXCLAMATIONMARKEQUAL + / LARROW + / RARROW + / LARROWEQUAL + / RARROWEQUAL + +BitwiseOp + <- AMPERSAND + / CARET + / PIPE + +BitShiftOp + <- LARROW2 + / RARROW2 + +AdditionOp + <- PLUS + / MINUS + / PLUS2 + +MultiplyOp + <- ASTERISK + / SLASH + / PERCENT + / ASTERISK2 + / ASTERISKPERCENT + +PrefixOp + <- EXCLAMATIONMARK + / MINUS + / AMPERSAND + +SuffixOp + <- LBRACKET Expr RBRACKET + / DOT IDENTIFIER + +FnCallArguments <- LPAREN ExprList RPAREN + +# Lists +ExprList <- (Expr COMMA)* Expr? + +ParamDeclList <- (IDENTIFIER COMMA)* IDENTIFIER? + +# *** Tokens *** +INTEGER + <- "0b" bin_int skip + / "0o" oct_int skip + / "0x" hex_int skip + / dec_int skip + +IDENTIFIER + <- !keyword [A-Za-z_] [A-Za-z0-9_]* skip + / "@\"" string_char* "\"" skip +BUILTINIDENTIFIER <- "@"[A-Za-z_][A-Za-z0-9_]* skip + +CHAR_LITERAL <- "'" char_char "'" skip +FLOAT + <- "0x" hex_int "." hex_int ([pP] [-+]? dec_int)? skip + / dec_int "." dec_int ([eE] [-+]? dec_int)? skip + / "0x" hex_int "."? [pP] [-+]? dec_int skip + / dec_int "."? [eE] [-+]? dec_int skip + +STRINGLITERAL + <- STRINGLITERALSINGLE + / (line_string skip)+ +STRINGLITERALSINGLE <- "\"" string_char* "\"" skip + +eof <- !. +bin <- [01] +bin_ <- '_'? bin +oct <- [0-7] +oct_ <- '_'? oct +hex <- [0-9a-fA-F] +hex_ <- '_'? hex +dec <- [0-9] +dec_ <- '_'? dec + +bin_int <- bin bin_* +oct_int <- oct oct_* +dec_int <- dec dec_* +hex_int <- hex hex_* + +ox80_oxBF <- [\200-\277] +oxF4 <- '\364' +ox80_ox8F <- [\200-\217] +oxF1_oxF3 <- [\361-\363] +oxF0 <- '\360' +ox90_0xBF <- [\220-\277] +oxEE_oxEF <- [\356-\357] +oxED <- '\355' +ox80_ox9F <- [\200-\237] +oxE1_oxEC <- [\341-\354] +oxE0 <- '\340' +oxA0_oxBF <- [\240-\277] +oxC2_oxDF <- [\302-\337] + +# From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/ + +mb_utf8_literal <- + oxF4 ox80_ox8F ox80_oxBF ox80_oxBF + / oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF + / oxF0 ox90_0xBF ox80_oxBF ox80_oxBF + / oxEE_oxEF ox80_oxBF ox80_oxBF + / oxED ox80_ox9F ox80_oxBF + / oxE1_oxEC ox80_oxBF ox80_oxBF + / oxE0 oxA0_oxBF ox80_oxBF + / oxC2_oxDF ox80_oxBF + +ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177] + +char_escape + <- "\\x" hex hex + / "\\u{" hex+ "}" + / "\\" [nr\\t'"] +char_char + <- mb_utf8_literal + / char_escape + / ascii_char_not_nl_slash_squote + +string_char + <- char_escape + / [^\\"\n] + +line_comment <- '//' ![!/][^\n]* / '////' [^\n]* +line_string <- ("\\\\" [^\n]* [ \n]*)+ +skip <- ([ \n] / line_comment)* + + +AMPERSAND <- '&' ![=] skip +AMPERSANDEQUAL <- '&=' skip +ASTERISK <- '*' ![*%=] skip +ASTERISK2 <- '**' skip +ASTERISKEQUAL <- '*=' skip +ASTERISKPERCENT <- '*%' ![=] skip +ASTERISKPERCENTEQUAL <- '*%=' skip +CARET <- '^' ![=] skip +CARETEQUAL <- '^=' skip +COLON <- ':' skip +COMMA <- ',' skip +DOT <- '.' ![*.?] skip +EQUAL <- '=' ![>=] skip +EQUALEQUAL <- '==' skip +EXCLAMATIONMARK <- '!' ![=] skip +EXCLAMATIONMARKEQUAL <- '!=' skip +LARROW <- '<' ![<=] skip +LARROW2 <- '<<' skip +LARROWEQUAL <- '<=' skip +LBRACE <- '{' skip +LBRACKET <- '[' skip +LPAREN <- '(' skip +MINUS <- '-' ![=>] skip +MINUSEQUAL <- '-=' skip +PERCENT <- '%' ![=] skip +PERCENTEQUAL <- '%=' skip +PIPE <- '|' ![|=] skip +PIPEEQUAL <- '|=' skip +PLUS <- '+' ![+=] skip +PLUS2 <- '++' skip +PLUSEQUAL <- '+=' skip +RARROW <- '>' skip +RARROW2 <- '>>' skip +RARROWEQUAL <- '>=' skip +RBRACE <- '}' skip +RBRACKET <- ']' skip +RPAREN <- ')' skip +SEMICOLON <- ';' skip +SLASH <- '/' ![=] skip + +end_of_word <- ![a-zA-Z0-9_] skip +KEYWORD_and <- 'and' end_of_word +KEYWORD_break <- 'break' end_of_word +KEYWORD_continue <- 'continue' end_of_word +KEYWORD_else <- 'else' end_of_word +KEYWORD_export <- 'export' end_of_word +KEYWORD_false <- 'false' end_of_word +KEYWORD_fn <- 'fn' end_of_word +KEYWORD_for <- 'for' end_of_word +KEYWORD_if <- 'if' end_of_word +KEYWORD_nil <- 'nil' end_of_word +KEYWORD_or <- 'or' end_of_word +KEYWORD_return <- 'return' end_of_word +KEYWORD_test <- 'test' end_of_word +KEYWORD_true <- 'true' end_of_word +KEYWORD_while <- 'while' end_of_word + +keyword <- KEYWORD_and + / KEYWORD_break + / KEYWORD_continue / KEYWORD_else + / KEYWORD_export + / KEYWORD_false / KEYWORD_fn / KEYWORD_for / KEYWORD_if + / KEYWORD_nil / KEYWORD_or + / KEYWORD_return + / KEYWORD_test / KEYWORD_true + / KEYWORD_while \ No newline at end of file diff --git a/src/main.cpp b/src/main.cpp new file mode 100644 index 0000000..fca8d2c --- /dev/null +++ b/src/main.cpp @@ -0,0 +1,53 @@ +#include +#include +#include + +using namespace peg; +using namespace std; + +int main(void) { + // (2) Make a parser + parser parser(R"( + # Grammar for Calculator... + Additive <- Multitive '+' Additive / Multitive + Multitive <- Primary '*' Multitive / Primary + Primary <- '(' Additive ')' / Number + Number <- < [0-9]+ > + %whitespace <- [ \t]* + )"); + + assert(static_cast(parser) == true); + + // (3) Setup actions + parser["Additive"] = [](const SemanticValues &vs) { + switch (vs.choice()) { + case 0: // "Multitive '+' Additive" + return any_cast(vs[0]) + any_cast(vs[1]); + default: // "Multitive" + return any_cast(vs[0]); + } + }; + + parser["Multitive"] = [](const SemanticValues &vs) { + switch (vs.choice()) { + case 0: // "Primary '*' Multitive" + return any_cast(vs[0]) * any_cast(vs[1]); + default: // "Primary" + return any_cast(vs[0]); + } + }; + + parser["Number"] = [](const SemanticValues &vs) { + return vs.token_to_number(); + }; + + // (4) Parse + parser.enable_packrat_parsing(); // Enable packrat parsing. + + int val; + parser.parse(" (1 + 2) * 3 ", val); + + assert(val == 9); + + std::cout << "hello" << std::endl; +} \ No newline at end of file diff --git a/third_party/cpp-peglib b/third_party/cpp-peglib new file mode 160000 index 0000000..4109480 --- /dev/null +++ b/third_party/cpp-peglib @@ -0,0 +1 @@ +Subproject commit 4109480a0cb2d6067c8b123b09da6e277a11599d