12 #ifndef EMP_LEXER_UTILS_H 13 #define EMP_LEXER_UTILS_H 18 #include "../base/vector.h" 33 static inline DFA to_DFA(
const NFA & nfa,
int keep_invalid=
false) {
35 std::map<std::set<size_t>,
size_t> id_map;
36 std::vector<std::set<size_t>> state_stack;
37 state_stack.emplace_back(nfa.
GetStart());
38 id_map[state_stack[0]] = 0;
41 while (state_stack.size()) {
43 std::set<size_t> cur_state = state_stack.back();
44 const size_t cur_id = id_map[cur_state];
45 state_stack.pop_back();
52 std::set<size_t> next_state = nfa.
GetNext(sym, cur_state);
53 if (next_state.size() == 0 && !keep_invalid)
continue;
59 for (
auto x : remove_set) next_state.erase(x);
62 if (id_map.find(next_state) == id_map.end()) {
63 const size_t next_id = dfa.
GetSize();
64 id_map[next_state] = next_id;
66 state_stack.emplace_back(next_state);
70 const size_t next_id = id_map[next_state];
82 for (
size_t from = 0; from < dfa.
GetSize(); from++) {
84 for (
size_t sym = 0; sym < t.size(); sym++) {
85 if (t[sym] == -1)
continue;
86 nfa.AddTransition(from, (
size_t) t[sym], sym);
95 template <
typename T1>
97 return to_NFA(std::forward<T1>(in));
101 template <
typename T1,
typename T2,
typename... Ts>
103 NFA nfa_out(
to_NFA(std::forward<T1>(in1)) );
105 return MergeNFA(nfa_out, std::forward<Ts>(others)...);
110 template <
typename T1,
typename T2,
typename... Ts>
119 DFAStatus(
size_t _state,
const std::string & _seq) : state(_state), sequence(_seq) { ; }
129 while (next_id < traverse_set.
size()) {
130 const auto cur_status = traverse_set[next_id++];
132 for (
size_t sym = 0; sym < t.size(); sym++) {
133 const int next_state = t[sym];
134 if (next_state == -1)
continue;
135 std::string cur_str(cur_status.sequence);
136 cur_str += (char) sym;
137 if (min_size <= cur_str.size() ) {
138 if (dfa.
IsStop(next_state))
return cur_str;
static const NFA & to_NFA(const NFA &nfa)
Converting NFA to MFA – no change needed.
Definition: lexer_utils.h:30
const emp::array< int, NUM_SYMBOLS > & GetTransitions(size_t from) const
Return an array of all transitions associated with a specified state.
Definition: DFA.h:50
static NFA MergeNFA(T1 &&in)
Merge multiple automata into one NFA (base case, single converstion)
Definition: lexer_utils.h:96
size_t state
Definition: lexer_utils.h:117
A Deterministic Finite Automata simulator.
bool IsEmpty(size_t state) const
Test if this state has only empty transitions from it, and not stop state.
Definition: NFA.h:186
stop_t GetStop(size_t state) const
Get any stop value associated with the provided state.
Definition: NFA.h:177
A dynamic NFA class, for easily building non-determanistic finite automata.
Definition: NFA.h:37
static const constexpr size_t NUM_SYMBOLS
Definition: NFA.h:39
void Merge(const tNFA< NUM_SYMBOLS, STOP_TYPE > &nfa2)
Merge another NFA into this one.
Definition: NFA.h:189
A drop-in replacement for std::vector<bool>, with additional bitwise logic features.
void push_back(PB_Ts &&...args)
Definition: vector.h:189
bool IsStop(int state) const
Test if a state has a stop.
Definition: DFA.h:82
DFAStatus(size_t _state, const std::string &_seq)
Definition: lexer_utils.h:119
size_t size() const
Definition: vector.h:151
static const DFA & to_DFA(const DFA &dfa)
Converting DFA to DFA – no change needed.
Definition: lexer_utils.h:27
void emplace_back(ARGS &&...args)
Definition: vector.h:219
const std::set< size_t > & GetStart() const
Return start state and all others reachable through empty transitions.
Definition: NFA.h:72
static DFA MergeDFA(T1 &&in1, T2 &&in2, Ts &&...others)
Merge multiple automata (DFA, NFA, RegEx) into one DFA.
Definition: lexer_utils.h:111
size_t GetSize() const
How many states is this DFA using?
Definition: DFA.h:39
Structure to track the current status of a DFA.
Definition: lexer_utils.h:116
void Resize(size_t new_size)
Add Additional empty states.
Definition: DFA.h:42
std::set< size_t > GetNext(size_t sym, size_t from_id=0) const
Return the states reachable from the current state given the provided symbol.
Definition: NFA.h:78
If we are in emscripten, make sure to include the header.
Definition: array.h:37
void AddStop(size_t state, stop_t stop_val=1)
Set the stop value only if it's higher than the current stop value.
Definition: DFA.h:69
std::string sequence
Definition: lexer_utils.h:118
void SetTransition(size_t from, size_t to, size_t sym)
Add a specific transition associated with an input symbol.
Definition: DFA.h:55
std::string FindExample(const DFA &dfa, const size_t min_size=1)
Method to find an example string that satisfies a DFA.
Definition: lexer_utils.h:123
stop_t GetStop(int state) const
Get the stop value associated with a state.
Definition: DFA.h:76
A Non-deterministic Finite Automata simulator.