Initial grammar

This commit is contained in:
zuckerberg 2021-07-07 15:52:42 -04:00
commit 7789e28631
6 changed files with 424 additions and 0 deletions

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
build
.vscode

24
CMakeLists.txt Normal file
View File

@ -0,0 +1,24 @@
cmake_minimum_required(VERSION 3.7)
project(dpro)
set(THIRD_PARTY_INCLUDE_DIRS third_party)
set(SRC_DIRS src)
set(CMAKE_CXX_STANDARD 17)
file(GLOB SRCS ${SRC_DIRS}/*.cpp ${SRC_DIRS}/*.h)
add_executable(${CMAKE_PROJECT_NAME} ${SRCS})
# find_package(GLEW REQUIRED)
# find_package(SDL2 REQUIRED)
set(THREADS_PREFER_PTHREAD_FLAG ON)
find_package(Threads)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux")
set(add_link_deps Threads::Threads)
endif()
include_directories(${THIRD_PARTY_INCLUDE_DIRS})
target_link_libraries(${CMAKE_PROJECT_NAME} ${add_link_deps})

25
default.nix Normal file
View File

@ -0,0 +1,25 @@
{ pkgs ? import <nixpkgs> { } }:
pkgs.stdenv.mkDerivation rec {
pname = "dpro";
version = "0.1.0";
src = ./.;
buildInputs = [
pkgs.cmake
pkgs.gdb
];
configurePhase = ''
cmake .
'';
buildPhase = ''
make
'';
installPhase = ''
mkdir -p $out/bin
mv dpro $out/bin
'';
}

319
src/grammar.peg Normal file
View File

@ -0,0 +1,319 @@
Root <- skip Statement* eof
Fn <- KEYWORD_fn LPAREN ParamDeclList RPAREN Block
VarDecl <- IDENTIFIER (EQUAL Expr)? SEMICOLON
# *** Block Level ***
Statement
<- VarDecl
/ IfStatement
/ LabeledStatement
/ AssignExpr SEMICOLON
IfStatement
<- IfPrefix BlockExpr ( KEYWORD_else Payload? Statement )?
/ IfPrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement )
LabeledStatement <- BlockLabel? (Block / LoopStatement)
LoopStatement <- ForStatement / WhileStatement
ForStatement
<- ForPrefix BlockExpr ( KEYWORD_else Statement )?
/ ForPrefix AssignExpr ( SEMICOLON / KEYWORD_else Statement )
WhileStatement
<- WhilePrefix BlockExpr ( KEYWORD_else Payload? Statement )?
/ WhilePrefix AssignExpr ( SEMICOLON / KEYWORD_else Payload? Statement )
BlockExpr <- BlockLabel? Block
# *** Expression Level ***
AssignExpr <- Expr (AssignOp Expr)?
Expr <- BoolOrExpr
BoolOrExpr <- BoolAndExpr (KEYWORD_or BoolAndExpr)*
BoolAndExpr <- CompareExpr (KEYWORD_and CompareExpr)*
CompareExpr <- BitwiseExpr (CompareOp BitwiseExpr)?
BitwiseExpr <- BitShiftExpr (BitwiseOp BitShiftExpr)*
BitShiftExpr <- AdditionExpr (BitShiftOp AdditionExpr)*
AdditionExpr <- MultiplyExpr (AdditionOp MultiplyExpr)*
MultiplyExpr <- PrefixExpr (MultiplyOp PrefixExpr)*
PrefixExpr <- PrefixOp* PrimaryExpr
PrimaryExpr
<- IfExpr
/ KEYWORD_break BreakLabel? Expr?
/ KEYWORD_continue BreakLabel?
/ KEYWORD_return Expr?
/ BlockLabel? LoopExpr
/ Block
/ SuffixExpr
IfExpr <- IfPrefix Expr (KEYWORD_else Payload? Expr)?
Block <- LBRACE Statement* RBRACE
LoopExpr <- ForExpr / WhileExpr
ForExpr <- ForPrefix Expr (KEYWORD_else Expr)?
WhileExpr <- WhilePrefix Expr (KEYWORD_else Payload? Expr)?
SuffixExpr
<- PrimaryTypeExpr (SuffixOp / FnCallArguments)*
PrimaryTypeExpr
<- BUILTINIDENTIFIER FnCallArguments
/ CHAR_LITERAL
/ DOT IDENTIFIER
/ FLOAT
/ Fn
/ GroupedExpr
/ IDENTIFIER
/ INTEGER
/ KEYWORD_false
/ KEYWORD_nil
/ KEYWORD_true
/ STRINGLITERAL
GroupedExpr <- LPAREN Expr RPAREN
# *** Helper grammar ***
BreakLabel <- COLON IDENTIFIER
BlockLabel <- IDENTIFIER COLON
WhileContinueExpr <- COLON LPAREN AssignExpr RPAREN
# Control flow prefixes
IfPrefix <- KEYWORD_if LPAREN Expr RPAREN PtrPayload?
WhilePrefix <- KEYWORD_while LPAREN Expr RPAREN PtrPayload? WhileContinueExpr?
ForPrefix <- KEYWORD_for LPAREN Expr RPAREN PtrIndexPayload
# Payloads
Payload <- PIPE IDENTIFIER PIPE
PtrPayload <- PIPE ASTERISK? IDENTIFIER PIPE
PtrIndexPayload <- PIPE ASTERISK? IDENTIFIER (COMMA IDENTIFIER)? PIPE
# Operators
AssignOp
<- ASTERISKEQUAL
/ PERCENTEQUAL
/ PLUSEQUAL
/ MINUSEQUAL
/ AMPERSANDEQUAL
/ CARETEQUAL
/ PIPEEQUAL
/ ASTERISKPERCENTEQUAL
/ EQUAL
CompareOp
<- EQUALEQUAL
/ EXCLAMATIONMARKEQUAL
/ LARROW
/ RARROW
/ LARROWEQUAL
/ RARROWEQUAL
BitwiseOp
<- AMPERSAND
/ CARET
/ PIPE
BitShiftOp
<- LARROW2
/ RARROW2
AdditionOp
<- PLUS
/ MINUS
/ PLUS2
MultiplyOp
<- ASTERISK
/ SLASH
/ PERCENT
/ ASTERISK2
/ ASTERISKPERCENT
PrefixOp
<- EXCLAMATIONMARK
/ MINUS
/ AMPERSAND
SuffixOp
<- LBRACKET Expr RBRACKET
/ DOT IDENTIFIER
FnCallArguments <- LPAREN ExprList RPAREN
# Lists
ExprList <- (Expr COMMA)* Expr?
ParamDeclList <- (IDENTIFIER COMMA)* IDENTIFIER?
# *** Tokens ***
INTEGER
<- "0b" bin_int skip
/ "0o" oct_int skip
/ "0x" hex_int skip
/ dec_int skip
IDENTIFIER
<- !keyword [A-Za-z_] [A-Za-z0-9_]* skip
/ "@\"" string_char* "\"" skip
BUILTINIDENTIFIER <- "@"[A-Za-z_][A-Za-z0-9_]* skip
CHAR_LITERAL <- "'" char_char "'" skip
FLOAT
<- "0x" hex_int "." hex_int ([pP] [-+]? dec_int)? skip
/ dec_int "." dec_int ([eE] [-+]? dec_int)? skip
/ "0x" hex_int "."? [pP] [-+]? dec_int skip
/ dec_int "."? [eE] [-+]? dec_int skip
STRINGLITERAL
<- STRINGLITERALSINGLE
/ (line_string skip)+
STRINGLITERALSINGLE <- "\"" string_char* "\"" skip
eof <- !.
bin <- [01]
bin_ <- '_'? bin
oct <- [0-7]
oct_ <- '_'? oct
hex <- [0-9a-fA-F]
hex_ <- '_'? hex
dec <- [0-9]
dec_ <- '_'? dec
bin_int <- bin bin_*
oct_int <- oct oct_*
dec_int <- dec dec_*
hex_int <- hex hex_*
ox80_oxBF <- [\200-\277]
oxF4 <- '\364'
ox80_ox8F <- [\200-\217]
oxF1_oxF3 <- [\361-\363]
oxF0 <- '\360'
ox90_0xBF <- [\220-\277]
oxEE_oxEF <- [\356-\357]
oxED <- '\355'
ox80_ox9F <- [\200-\237]
oxE1_oxEC <- [\341-\354]
oxE0 <- '\340'
oxA0_oxBF <- [\240-\277]
oxC2_oxDF <- [\302-\337]
# From https://lemire.me/blog/2018/05/09/how-quickly-can-you-check-that-a-string-is-valid-unicode-utf-8/
mb_utf8_literal <-
oxF4 ox80_ox8F ox80_oxBF ox80_oxBF
/ oxF1_oxF3 ox80_oxBF ox80_oxBF ox80_oxBF
/ oxF0 ox90_0xBF ox80_oxBF ox80_oxBF
/ oxEE_oxEF ox80_oxBF ox80_oxBF
/ oxED ox80_ox9F ox80_oxBF
/ oxE1_oxEC ox80_oxBF ox80_oxBF
/ oxE0 oxA0_oxBF ox80_oxBF
/ oxC2_oxDF ox80_oxBF
ascii_char_not_nl_slash_squote <- [\000-\011\013-\046-\050-\133\135-\177]
char_escape
<- "\\x" hex hex
/ "\\u{" hex+ "}"
/ "\\" [nr\\t'"]
char_char
<- mb_utf8_literal
/ char_escape
/ ascii_char_not_nl_slash_squote
string_char
<- char_escape
/ [^\\"\n]
line_comment <- '//' ![!/][^\n]* / '////' [^\n]*
line_string <- ("\\\\" [^\n]* [ \n]*)+
skip <- ([ \n] / line_comment)*
AMPERSAND <- '&' ![=] skip
AMPERSANDEQUAL <- '&=' skip
ASTERISK <- '*' ![*%=] skip
ASTERISK2 <- '**' skip
ASTERISKEQUAL <- '*=' skip
ASTERISKPERCENT <- '*%' ![=] skip
ASTERISKPERCENTEQUAL <- '*%=' skip
CARET <- '^' ![=] skip
CARETEQUAL <- '^=' skip
COLON <- ':' skip
COMMA <- ',' skip
DOT <- '.' ![*.?] skip
EQUAL <- '=' ![>=] skip
EQUALEQUAL <- '==' skip
EXCLAMATIONMARK <- '!' ![=] skip
EXCLAMATIONMARKEQUAL <- '!=' skip
LARROW <- '<' ![<=] skip
LARROW2 <- '<<' skip
LARROWEQUAL <- '<=' skip
LBRACE <- '{' skip
LBRACKET <- '[' skip
LPAREN <- '(' skip
MINUS <- '-' ![=>] skip
MINUSEQUAL <- '-=' skip
PERCENT <- '%' ![=] skip
PERCENTEQUAL <- '%=' skip
PIPE <- '|' ![|=] skip
PIPEEQUAL <- '|=' skip
PLUS <- '+' ![+=] skip
PLUS2 <- '++' skip
PLUSEQUAL <- '+=' skip
RARROW <- '>' skip
RARROW2 <- '>>' skip
RARROWEQUAL <- '>=' skip
RBRACE <- '}' skip
RBRACKET <- ']' skip
RPAREN <- ')' skip
SEMICOLON <- ';' skip
SLASH <- '/' ![=] skip
end_of_word <- ![a-zA-Z0-9_] skip
KEYWORD_and <- 'and' end_of_word
KEYWORD_break <- 'break' end_of_word
KEYWORD_continue <- 'continue' end_of_word
KEYWORD_else <- 'else' end_of_word
KEYWORD_export <- 'export' end_of_word
KEYWORD_false <- 'false' end_of_word
KEYWORD_fn <- 'fn' end_of_word
KEYWORD_for <- 'for' end_of_word
KEYWORD_if <- 'if' end_of_word
KEYWORD_nil <- 'nil' end_of_word
KEYWORD_or <- 'or' end_of_word
KEYWORD_return <- 'return' end_of_word
KEYWORD_test <- 'test' end_of_word
KEYWORD_true <- 'true' end_of_word
KEYWORD_while <- 'while' end_of_word
keyword <- KEYWORD_and
/ KEYWORD_break
/ KEYWORD_continue / KEYWORD_else
/ KEYWORD_export
/ KEYWORD_false / KEYWORD_fn / KEYWORD_for / KEYWORD_if
/ KEYWORD_nil / KEYWORD_or
/ KEYWORD_return
/ KEYWORD_test / KEYWORD_true
/ KEYWORD_while

53
src/main.cpp Normal file
View File

@ -0,0 +1,53 @@
#include <cpp-peglib/peglib.h>
#include <assert.h>
#include <iostream>
using namespace peg;
using namespace std;
int main(void) {
// (2) Make a parser
parser parser(R"(
# Grammar for Calculator...
Additive <- Multitive '+' Additive / Multitive
Multitive <- Primary '*' Multitive / Primary
Primary <- '(' Additive ')' / Number
Number <- < [0-9]+ >
%whitespace <- [ \t]*
)");
assert(static_cast<bool>(parser) == true);
// (3) Setup actions
parser["Additive"] = [](const SemanticValues &vs) {
switch (vs.choice()) {
case 0: // "Multitive '+' Additive"
return any_cast<int>(vs[0]) + any_cast<int>(vs[1]);
default: // "Multitive"
return any_cast<int>(vs[0]);
}
};
parser["Multitive"] = [](const SemanticValues &vs) {
switch (vs.choice()) {
case 0: // "Primary '*' Multitive"
return any_cast<int>(vs[0]) * any_cast<int>(vs[1]);
default: // "Primary"
return any_cast<int>(vs[0]);
}
};
parser["Number"] = [](const SemanticValues &vs) {
return vs.token_to_number<int>();
};
// (4) Parse
parser.enable_packrat_parsing(); // Enable packrat parsing.
int val;
parser.parse(" (1 + 2) * 3 ", val);
assert(val == 9);
std::cout << "hello" << std::endl;
}

1
third_party/cpp-peglib vendored Submodule

@ -0,0 +1 @@
Subproject commit 4109480a0cb2d6067c8b123b09da6e277a11599d