Fixed a lot of bugs. Actually gets through the entire experimental grammer. (A largeish experimental grammer for Kraken written to continue testing and to really start language development.
This commit is contained in:
121
src/RegEx.cpp
121
src/RegEx.cpp
@@ -2,9 +2,15 @@
|
||||
|
||||
RegEx::RegEx(std::string inPattern) {
|
||||
pattern = inPattern;
|
||||
construct();
|
||||
deperenthesize();
|
||||
}
|
||||
|
||||
void RegEx::construct() {
|
||||
std::vector<RegExState*> previousStates;
|
||||
std::vector<RegExState*> currentStates;
|
||||
std::stack<std::pair<std::vector<RegExState*>, RegExState*> > perenStack;
|
||||
std::stack<std::pair<std::vector<RegExState*>, std::vector<RegExState*> > > perenStack;
|
||||
bool alternating = false;
|
||||
begin = new RegExState();
|
||||
currentStates.push_back(begin);
|
||||
for (int i = 0; i < pattern.length(); i++) {
|
||||
@@ -42,11 +48,7 @@ RegEx::RegEx(std::string inPattern) {
|
||||
{
|
||||
std::cout << "Alternation at " << i << " in " << pattern << std::endl;
|
||||
//alternation
|
||||
i++;
|
||||
RegExState* next = new RegExState(pattern[i]);
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++)
|
||||
previousStates[j]->addNext(next);
|
||||
currentStates.push_back(next);
|
||||
alternating = true;
|
||||
}
|
||||
|
||||
break;
|
||||
@@ -57,17 +59,35 @@ RegEx::RegEx(std::string inPattern) {
|
||||
//Create a peren node with an inner empty node
|
||||
RegExState* next = new RegExState(new RegExState());
|
||||
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||
currentStates[j]->addNext(next);
|
||||
if (alternating) {
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++)
|
||||
previousStates[j]->addNext(next);
|
||||
|
||||
previousStates.clear();
|
||||
//Save both current states here as well as the current preren
|
||||
std::vector<RegExState*> saveStates = currentStates;
|
||||
// saveStates.insert(saveStates.end(), currentStates.begin(), currentStates.end())
|
||||
perenStack.push(std::make_pair(saveStates, next));
|
||||
//Save both current states here as well as the current preren
|
||||
std::vector<RegExState*> savePreviousStates = previousStates;
|
||||
currentStates.push_back(next);
|
||||
std::vector<RegExState*> saveCurrentStates = currentStates;
|
||||
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
|
||||
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next->getInner());
|
||||
previousStates.clear();
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next->getInner());
|
||||
alternating = false;
|
||||
} else {
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||
currentStates[j]->addNext(next);
|
||||
|
||||
//Save both current states here as well as the current preren
|
||||
std::vector<RegExState*> savePreviousStates = currentStates;
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next);
|
||||
std::vector<RegExState*> saveCurrentStates = currentStates;
|
||||
perenStack.push(std::make_pair(savePreviousStates, saveCurrentStates));
|
||||
|
||||
previousStates.clear();
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next->getInner());
|
||||
}
|
||||
std::cout << "Peren is " << next << " Inner is " << currentStates[0] << " = " << next->getInner() << std::endl;
|
||||
}
|
||||
break;
|
||||
@@ -77,15 +97,16 @@ RegEx::RegEx(std::string inPattern) {
|
||||
std::cout << "End peren at " << i << " in " << pattern << std::endl;
|
||||
//perentheses
|
||||
//Pop off the states that will now be the previous states and the peren node which will now be the current node
|
||||
std::pair<std::vector<RegExState*>, RegExState*> savedPair = perenStack.top();
|
||||
std::pair<std::vector<RegExState*>, std::vector<RegExState*> > savedPair = perenStack.top();
|
||||
perenStack.pop();
|
||||
//Make the it so
|
||||
previousStates = savedPair.first;
|
||||
//Make sure the end of the inner stuff points back to the peren node
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++)
|
||||
currentStates[j]->addNext(savedPair.second);
|
||||
currentStates[j]->addNext(savedPair.second[savedPair.second.size()-1]);
|
||||
//currentStates[j]->addNext(*(savedPair.second.end()));
|
||||
currentStates.clear();
|
||||
currentStates.push_back(savedPair.second);
|
||||
currentStates = savedPair.second;
|
||||
}
|
||||
break;
|
||||
|
||||
@@ -93,42 +114,42 @@ RegEx::RegEx(std::string inPattern) {
|
||||
{
|
||||
i++;
|
||||
std::cout << "Escape! Escaping: " << pattern[i] << std::endl;
|
||||
//Ahh, it's escaping a special character
|
||||
RegExState* next = new RegExState(pattern[i]);
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
||||
currentStates[j]->addNext(next);
|
||||
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
|
||||
}
|
||||
|
||||
previousStates.clear();
|
||||
// previousStates.insert(previousStates.begin(), currentStates.begin(), currentStates.end());
|
||||
previousStates = currentStates;
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next);
|
||||
//Ahh, it's escaping a special character, so fall through to the default.
|
||||
}
|
||||
break;
|
||||
default:
|
||||
{
|
||||
std::cout << "Regular" << std::endl;
|
||||
//Ahh, it's regular
|
||||
RegExState* next = new RegExState(pattern[i]);
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
||||
currentStates[j]->addNext(next);
|
||||
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
|
||||
//If we're alternating, add next as the next for each previous state, and add self to currentStates
|
||||
if (alternating) {
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < previousStates.size(); j++) {
|
||||
previousStates[j]->addNext(next);
|
||||
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << previousStates[j] << std::endl;
|
||||
}
|
||||
currentStates.push_back(next);
|
||||
alternating = false;
|
||||
} else {
|
||||
//If we're not alternating, add next as next for all the current states, make the current states the new
|
||||
//previous states, and add ourself as the new current state.
|
||||
for (std::vector<RegExState*>::size_type j = 0; j < currentStates.size(); j++) {
|
||||
currentStates[j]->addNext(next);
|
||||
std::cout << "Adding " << next << ", which is " << pattern[i] << " to " << currentStates[j] << std::endl;
|
||||
}
|
||||
previousStates.clear();
|
||||
previousStates = currentStates;
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next);
|
||||
}
|
||||
|
||||
previousStates.clear();
|
||||
// previousStates.insert(previousStates.begin(), currentStates.begin(), currentStates.end());
|
||||
previousStates = currentStates;
|
||||
currentStates.clear();
|
||||
currentStates.push_back(next);
|
||||
}
|
||||
}
|
||||
}
|
||||
//last one is goal state
|
||||
for (std::vector<RegExState*>::size_type i = 0; i < currentStates.size(); i++)
|
||||
currentStates[i]->addNext(NULL);
|
||||
}
|
||||
|
||||
void RegEx::deperenthesize() {
|
||||
std::cout << "About to de-perenthesize " << begin->toString() << std::endl;
|
||||
|
||||
//Now go through and expand the peren nodes to regular nodes
|
||||
@@ -144,13 +165,13 @@ RegEx::RegEx(std::string inPattern) {
|
||||
if ((*nextStates)[j] != NULL && (*nextStates)[j]->getInner() != NULL) {
|
||||
//Fix all the next references pointing to the peren node to point to the inner nodes. (if more than one, push back to add others)
|
||||
std::vector<RegExState*>* insideNextStates = (*nextStates)[j]->getInner()->getNextStates();
|
||||
std::cout << "insideNextStates = " << insideNextStates << " [0] " << (*insideNextStates)[0] << std::endl;
|
||||
//std::cout << "insideNextStates = " << insideNextStates << " [0] " << (*insideNextStates)[0] << std::endl;
|
||||
RegExState* perenState = (*nextStates)[j];
|
||||
(*nextStates)[j] = (*insideNextStates)[0];
|
||||
std::cout << "So now nextstates[j] = " << (*nextStates)[j] << std::endl;
|
||||
//std::cout << "So now nextstates[j] = " << (*nextStates)[j] << std::endl;
|
||||
for (std::vector<RegExState*>::size_type k = 1; k < insideNextStates->size(); k++)
|
||||
nextStates->push_back((*insideNextStates)[k]);
|
||||
std::cout << "Replaced beginning: " << begin->toString() << std::endl;
|
||||
//std::cout << "Replaced beginning: " << begin->toString() << std::endl;
|
||||
//Now, if the peren node is self-referential (has a repitition operator after i), fix it's self-references in the same manner
|
||||
std::vector<RegExState*>* perenNextNodes = perenState->getNextStates();
|
||||
for (std::vector<RegExState*>::size_type k = 0; k < perenNextNodes->size(); k++) {
|
||||
@@ -166,17 +187,17 @@ RegEx::RegEx(std::string inPattern) {
|
||||
traversalList.push_back(perenState->getInner());
|
||||
for (std::vector<RegExState*>::size_type k = 0; k < traversalList.size(); k++) {
|
||||
std::vector<RegExState*>* nextTraversalStates = traversalList[k]->getNextStates();
|
||||
std::cout << "Traversing! nextTraversalStates from traversalList " << traversalList[k] << " char = " << traversalList[k]->getCharacter() << std::endl;
|
||||
std::cout << "with children: ";
|
||||
for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++)
|
||||
std::cout << (*nextTraversalStates)[l]->getCharacter() << " ";
|
||||
std::cout << std::endl;
|
||||
//std::cout << "Traversing! nextTraversalStates from traversalList " << traversalList[k] << " char = " << traversalList[k]->getCharacter() << std::endl;
|
||||
//std::cout << "with children:" << std::endl;
|
||||
//for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++)
|
||||
// std::cout << "\t\"" << (*nextTraversalStates)[l]->getCharacter() << "\"" << std::endl;
|
||||
//std::cout << std::endl;
|
||||
for (std::vector<RegExState*>::size_type l = 0; l < nextTraversalStates->size(); l++) {
|
||||
//If this node is equal to the peren node we came from, then that means we've reached the end of the inner part of the peren
|
||||
//And we now replace this reference with the next nodes from the peren node
|
||||
std::cout << "Traversal Next is on " << (*nextTraversalStates)[l]->getCharacter() << std::endl;
|
||||
//std::cout << "Traversal Next is on " << (*nextTraversalStates)[l]->getCharacter() << std::endl;
|
||||
if ((*nextTraversalStates)[l] == perenState) {
|
||||
std::cout << "nextTraversalStates[l] = to perenState!" << std::endl;
|
||||
// std::cout << "nextTraversalStates[l] = to perenState!" << std::endl;
|
||||
std::vector<RegExState*> endPerenNextStates = *(perenState->getNextStates());
|
||||
(*nextTraversalStates)[l] = endPerenNextStates[0];
|
||||
for (std::vector<RegExState*>::size_type n = 1; n < endPerenNextStates.size(); n++)
|
||||
|
||||
Reference in New Issue
Block a user