11 #ifndef EMP_CONFIG_LEXER_H 12 #define EMP_CONFIG_LEXER_H 18 #include "../base/errors.h" 26 std::map<std::string, emp::Token> command_map;
30 std::string cur_lexeme;
39 std::map<std::string, Pattern> patterns;
43 ConfigLexer(std::istream & in_stream) : is(in_stream), next_token_id(256) {
44 command_map[
"print"] =
Token(Token::COMMAND_PRINT);
45 command_map[
"include"] =
Token(Token::COMMAND_INCLUDE);
46 command_map[
"if"] =
Token(Token::COMMAND_IF);
47 command_map[
"else"] =
Token(Token::COMMAND_ELSE);
48 command_map[
"while"] =
Token(Token::COMMAND_WHILE);
49 command_map[
"break"] =
Token(Token::COMMAND_BREAK);
50 command_map[
"continue"] =
Token(Token::COMMAND_CONTINUE);
51 command_map[
"return"] =
Token(Token::COMMAND_RETURN);
52 command_map[
"function"] =
Token(Token::COMMAND_FUNCTION);
53 command_map[
"foreach"] =
Token(Token::COMMAND_FOREACH);
54 command_map[
"random"] =
Token(Token::FUN_RANDOM);
64 int AddPattern(
const std::string & name,
const std::string & pattern,
int id=0,
bool ignore=
false) {
65 if (patterns.find(name) != patterns.end()) {
66 emp::LibraryWarning(
"Attempting to add multiple lexer patterns for '", name,
"'. Ignoring.");
69 if (!
id)
id = next_token_id++;
70 else if (
id >= next_token_id) next_token_id =
id+1;
72 patterns[name] = { name, pattern, id, ignore };
77 AddPattern(
"WHITESPACE",
"[ \t\r]", Token::WHITESPACE,
true);
78 AddPattern(
"COMMENT",
"#.*", Token::COMMENT,
true);
79 AddPattern(
"INT_LIT",
"[0-9]+", Token::INT_LIT);
80 AddPattern(
"FLOAT_LIT",
"[0-9]+[.]'[0-9]+", Token::FLOAT_LIT);
81 AddPattern(
"CHAR_LIT",
"'(.|(\\\\[\\\\'nt]))'", Token::CHAR_LIT);
83 AddPattern(
"STRING_LIT",
"[\"](\\\\[nt\"\\]|[^\\\"])*\"", Token::STRING_LIT);
87 AddPattern(
"ENDLINE",
"[\n;]", Token::ENDLINE);
88 AddPattern(
"CASSIGN_ADD",
"\"+=\"", Token::CASSIGN_ADD);
89 AddPattern(
"CASSIGN_SUB",
"\"-=\"", Token::CASSIGN_SUB);
90 AddPattern(
"CASSIGN_MULT",
"\"*=\"", Token::CASSIGN_MULT);
91 AddPattern(
"CASSIGN_DIV",
"\"/=\"", Token::CASSIGN_DIV);
92 AddPattern(
"CASSIGN_MOD",
"\"%=\"", Token::CASSIGN_MOD);
94 AddPattern(
"COMP_NEQU",
"!=", Token::COMP_NEQU);
95 AddPattern(
"COMP_LESS",
"<", Token::COMP_LESS);
103 AddPattern(
"COMMAND_PRINT",
"print", Token::COMMAND_PRINT);
104 AddPattern(
"COMMAND_INCLUDE",
"include", Token::COMMAND_INCLUDE);
105 AddPattern(
"COMMAND_IF",
"if", Token::COMMAND_IF);
106 AddPattern(
"COMMAND_ELSE",
"else", Token::COMMAND_ELSE);
107 AddPattern(
"COMMAND_WHILE",
"while", Token::COMMAND_WHILE);
108 AddPattern(
"COMMAND_FOREACH",
"foreach", Token::COMMAND_FOREACH);
109 AddPattern(
"COMMAND_BREAK",
"break", Token::COMMAND_BREAK);
110 AddPattern(
"COMMAND_CONTINUE",
"continue", Token::COMMAND_CONTINUE);
111 AddPattern(
"COMMAND_FUNCTION",
"function", Token::COMMAND_FUNCTION);
112 AddPattern(
"COMMAND_RETURN",
"return", Token::COMMAND_RETURN);
115 AddPattern(
"FUN_RANDOM",
"random", Token::FUN_RANDOM);
122 while (next_char > 0) {
124 cur_lexeme.resize(1);
125 cur_lexeme[0] = next_char;
126 while (
is_digit(next_char = is.get())) {
127 cur_lexeme.push_back(next_char);
129 if (next_char ==
'.') {
130 cur_lexeme.push_back(
'.');
131 while (
is_digit(next_char = is.get())) {
132 cur_lexeme.push_back(next_char);
134 return Token(Token::FLOAT_LIT, cur_lexeme);
136 return Token(Token::INT_LIT, cur_lexeme);
139 cur_lexeme.resize(1);
140 cur_lexeme[0] = next_char;
141 while (
is_idchar(next_char = is.get())) {
142 cur_lexeme.push_back(next_char);
146 auto map_ptr = command_map.find(cur_lexeme);
147 if (map_ptr != command_map.end()) {
148 return map_ptr->second;
156 char prev_char = next_char;
157 next_char = is.get();
161 while (next_char !=
'\n') next_char = is.get();
162 next_char = is.get();
165 cur_lexeme.resize(0);
166 next_char = is.get();
167 while (next_char !=
'\"') { cur_lexeme.push_back(next_char); next_char = is.get(); }
168 next_char = is.get();
169 return Token(Token::STRING_LIT, cur_lexeme);
176 return Token(Token::ENDLINE);
178 if (next_char ==
'=') { next_char = is.get();
return Token(Token::CASSIGN_ADD); }
181 if (next_char ==
'=') { next_char = is.get();
return Token(Token::CASSIGN_SUB); }
184 if (next_char ==
'=') { next_char = is.get();
return Token(Token::CASSIGN_MULT); }
187 if (next_char ==
'=') { next_char = is.get();
return Token(Token::CASSIGN_DIV); }
190 if (next_char ==
'=') { next_char = is.get();
return Token(Token::CASSIGN_MOD); }
193 if (next_char ==
'=') { next_char = is.get();
return Token(Token::COMP_EQU); }
196 if (next_char ==
'=') { next_char = is.get();
return Token(Token::COMP_LTE); }
197 return Token(Token::COMP_LESS);
199 if (next_char ==
'=') { next_char = is.get();
return Token(Token::COMP_GTE); }
200 return Token(Token::COMP_GTR);
202 if (next_char ==
'=') { next_char = is.get();
return Token(Token::COMP_NEQU); }
205 if (next_char ==
'&') { next_char = is.get();
return Token(Token::BOOL_AND); }
208 if (next_char ==
'|') { next_char = is.get();
return Token(Token::BOOL_OR); }
211 if (next_char ==
'\\') {
212 next_char = is.get();
214 case 'n': next_char =
'\n';
break;
215 case 'r': next_char =
'\r';
break;
216 case 't': next_char =
'\t';
break;
217 case '\\': next_char =
'\\';
break;
218 case '\'': next_char =
'\'';
break;
219 case '\"': next_char =
'\"';
break;
222 prev_char = next_char;
223 next_char = is.get();
224 if (next_char !=
'\'')
return Token(Token::ERROR);
225 next_char = is.get();
226 return Token(Token::CHAR_LIT, std::string(1,prev_char));
237 return Token(prev_char);
~ConfigLexer()
Definition: ConfigLexer.h:62
int GetMaxToken() const
Definition: ConfigLexer.h:118
Unknown modifier; will trigger error.
bool is_idchar(char test_char)
Determine if a character is a letter, digit, or underscore.
Definition: string_utils.h:195
Information about a token instance from an input stream.
Definition: Lexer.h:48
ID
Definition: struct.h:26
emp::Token GetToken()
Definition: ConfigLexer.h:121
void AddDefaultPatterns()
Definition: ConfigLexer.h:76
bool is_digit(char test_char)
Determine if a character is a digit.
Definition: string_utils.h:185
If we are in emscripten, make sure to include the header.
Definition: array.h:37
Definition: ConfigLexer.h:24
int AddPattern(const std::string &name, const std::string &pattern, int id=0, bool ignore=false)
Definition: ConfigLexer.h:64
ConfigLexer(std::istream &in_stream)
Definition: ConfigLexer.h:43
void LibraryWarning(Ts &&...msg)
Library user has made an error in how they are using the library.
Definition: errors.h:149