29 #include "../base/vector.h" 47 : name(), rule_ids(), id(0)
48 , first(
Lexer::MaxTokenID()), follow(
Lexer::MaxTokenID()), nullable(false) { ; }
56 ParseRule(
size_t sid) : symbol_id(sid), pattern() { ; }
69 template <
typename T,
typename... EXTRAS>
71 new_pattern.
push_back( GetID((
size_t) std::forward<T>(arg)) );
72 BuildRule(new_pattern, std::forward<EXTRAS>(extras)...);
76 int GetSymbolPos(
const std::string &
name)
const {
77 for (
size_t i = 0; i < symbols.
size(); i++) {
78 if (symbols[i].name == name)
return (
int) i;
84 int GetIDPos(
size_t id)
const {
90 size_t AddSymbol(
const std::string & name) {
93 new_symbol.
id = cur_symbol_id++;
94 const size_t out_pos = symbols.
size();
101 : lexer(in_lexer), symbols(), rules()
102 , cur_symbol_id(in_lexer.MaxTokenID()), active_pos(0) { ; }
111 size_t GetID(
const std::string & name) {
112 int spos = GetSymbolPos(name);
113 if (spos >= 0)
return symbols[(size_t)spos].
id;
118 size_t new_spos = AddSymbol(name);
119 return symbols[new_spos].id;
125 const size_t spos = symbol_id - lexer.
MaxTokenID();
126 return symbols[spos].name;
131 active_pos = GetSymbolPos(name);
132 if (active_pos == -1) active_pos = (int) AddSymbol(name);
138 size_t pos = (size_t) GetSymbolPos( name );
143 template <
typename... STATES>
145 emp_assert(active_pos >= 0 && active_pos < (
int) symbols.
size(), active_pos);
147 auto rule_id = rules.
size();
150 BuildRule(rules.
back().pattern, states...);
151 if (rules.
back().pattern.size() == 0) symbols[(
size_t)active_pos].nullable =
true;
156 template <
typename... STATES>
157 size_t AddRule(
const std::string & name, STATES &&... states) {
158 const size_t id = GetID(name);
159 active_pos = GetSymbolPos(name);
160 Rule(std::forward<STATES>(states)...);
165 void Process(std::istream & is,
bool test_valid=
true) {
174 bool progress =
true;
178 for (
auto & r : rules) {
179 auto & s = symbols[r.symbol_id];
180 if (s.nullable)
continue;
183 bool cur_nullable =
true;
184 for (
size_t sid : r.pattern) {
185 int pos = GetIDPos(sid);
186 if (pos < 0 || symbols[(
size_t)pos].
nullable ==
false) { cur_nullable =
false;
break; }
188 if (cur_nullable) { s.nullable =
true; progress =
true;
break; }
202 void Print(std::ostream & os=std::cout)
const {
203 os << symbols.
size() <<
" parser symbols available." <<
std::endl;
204 for (
const auto & s : symbols) {
205 os <<
"symbol '" << s.name <<
"' (id " << s.id <<
") has " 206 << s.rule_ids.size() <<
" patterns.";
207 if (s.nullable) os <<
" [NULLABLE]";
209 for (
size_t rid : s.rule_ids) {
212 if (p.
size() == 0) os <<
" [empty]";
213 for (
size_t x : p) os <<
" " << GetName(x) <<
"(" << x <<
")";
void Process(std::istream &is, bool test_valid=true)
Convert an input stream into a parse tree (TO FINISH!)
Definition: Parser.h:165
size_t id
What is the unique ID of this symbol?
Definition: Parser.h:40
emp::vector< size_t > pattern
The pattern that this rule is triggered by.
Definition: Parser.h:54
std::string GetName(size_t symbol_id) const
Conversion ot a sybol ID to its name.
Definition: Parser.h:123
static bool TokenOK(size_t id)
Definition: Lexer.h:75
ParseSymbol()
Definition: Parser.h:46
A drop-in replacement for std::vector<bool>, but with extra bitwise logic features.
Definition: BitVector.h:39
A drop-in replacement for std::vector<bool>, with additional bitwise logic features.
void push_back(PB_Ts &&...args)
Definition: vector.h:189
A lexer with a set of token types (and associated regular expressions)
Definition: Lexer.h:64
A rule for how parsing should work.
Definition: ConfigParser.h:21
size_t GetID(const std::string &name)
Converstion of a symbol name to its ID.
Definition: Parser.h:111
ParseRule(size_t sid)
Definition: Parser.h:56
static constexpr size_t MaxTokenID()
How many total token types are allowed in this lexer?
Definition: Lexer.h:93
size_t size() const
Definition: vector.h:151
void emplace_back(ARGS &&...args)
Definition: vector.h:219
emp::BitVector first
What tokens can begin this symbol?
Definition: Parser.h:42
Parser & operator()(const std::string &name)
Provide a symbol to the compiler and set it as active.
Definition: Parser.h:130
static const PrintStr endl("<br>")
Pre-define emp::endl to insert a "<br>" and thus acting like a newline.
A general-purpose, fast lexer.
Parser(Lexer &in_lexer)
Definition: Parser.h:100
bool nullable
Can this symbol be converted to nothing?
Definition: Parser.h:44
size_t AddRule(const std::string &name, STATES &&...states)
Specify the name of the symbol and add a rule to it, returning the symbol id.
Definition: Parser.h:157
void Print(std::ostream &os=std::cout) const
Print the current status of this parser (for debugging)
Definition: Parser.h:202
A single symbol in a grammer including the patterns that generate it.
Definition: Parser.h:37
~Parser()
Definition: Parser.h:103
If we are in emscripten, make sure to include the header.
Definition: array.h:37
std::string name
Unique name for this parse symbol.
Definition: Parser.h:38
std::string GetTokenName(size_t id) const
Get the name associated with a token type (you provide the ID)
Definition: Lexer.h:104
Lexer & GetLexer()
Definition: Parser.h:105
emp::BitVector follow
What tokens can come after this symbol?
Definition: Parser.h:43
size_t symbol_id
The ID of the symbol that this rule should simplify to.
Definition: Parser.h:53
#define emp_assert(...)
Definition: assert.h:199
T & back()
Definition: vector.h:183
size_t GetID(size_t id) const
Trivial conversions of ID to ID...
Definition: Parser.h:108
ParseSymbol & GetParseSymbol(const std::string &name)
Get the parser symbol information associated with a provided name.
Definition: Parser.h:137
Full information about a parser, including a lexer, symbols, and rules.
Definition: Parser.h:60
Parser & Rule(STATES...states)
Use the currently active symbol and attach a rule to it.
Definition: Parser.h:144
size_t GetTokenID(const std::string &name) const
Get the ID associated with a token type (you provide the token name)
Definition: Lexer.h:96
emp::vector< size_t > rule_ids
Which rules apply to this symbol?
Definition: Parser.h:39