Created
April 5, 2018 03:00
-
-
Save maixuanhan/43948380a8a99503f6b7285770929c56 to your computer and use it in GitHub Desktop.
Word count problem
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// mxhan | |
// Problem 1: word counting | |
#include <cstdlib> | |
#include <cstring> | |
#include <fstream> | |
#include <iostream> | |
#include <string> | |
using namespace std; | |
inline int hashChar(char c) | |
{ | |
return c - 'A' - (c > 'Z' ? 32 : 0); | |
} | |
inline char toChar(int i) | |
{ | |
return 'a' + i; | |
} | |
inline bool isChar(char c) | |
{ | |
return 'A' <= c && c <= 'Z' || 'a' <= c && c <= 'z'; | |
} | |
class TreeNode | |
{ | |
public: | |
TreeNode() | |
{ | |
this->counter = 0; | |
memset(this->dict, 0x00, sizeof(this->dict)); | |
} | |
~TreeNode() | |
{ | |
for (int i = 0; i < 26; ++i) | |
{ | |
if (this->dict[i] != NULL) delete this->dict[i]; | |
} | |
} | |
int counter; | |
TreeNode* dict[26]; | |
}; | |
class Word | |
{ | |
public: | |
Word() | |
{ | |
// this->word = ""; | |
this->counter = 0; | |
} | |
string word; | |
int counter; | |
}; | |
class Fsm | |
{ | |
public: | |
Fsm() | |
{ | |
this->head = new TreeNode; | |
this->current = this->head; | |
this->wordCount = 0; | |
this->currentWordIdx = 0; | |
this->words = NULL; | |
} | |
~Fsm() | |
{ | |
delete this->head; | |
if (this->words != NULL) delete[] this->words; | |
} | |
void Read(char c) | |
{ | |
if (isChar(c)) | |
{ | |
int idx = hashChar(c); | |
if (this->current->dict[idx] == NULL) | |
{ | |
this->current->dict[idx] = new TreeNode; | |
} | |
this->current = this->current->dict[idx]; | |
} | |
else | |
{ | |
EndRead(); | |
} | |
} | |
void EndRead() | |
{ | |
if (this->current != this->head) | |
{ | |
this->current->counter++; | |
if (this->current->counter == 1) // new word | |
{ | |
this->wordCount++; | |
} | |
} | |
this->current = this->head; | |
} | |
void ExtractData() | |
{ | |
if (this->wordCount <= 0) return; | |
// Init result list | |
if (this->words != NULL) delete[] this->words; | |
this->words = new Word[this->wordCount]; | |
this->currentWordIdx = 0; | |
this->ExtractNode(this->head, string()); | |
} | |
void SortData() | |
{ | |
// TODO: qsort algorithm if the wordCount is big | |
for (int i = 0; i < this->wordCount - 1; ++i) | |
{ | |
for (int j = i + 1; j < this->wordCount; ++j) | |
{ | |
if (this->words[i].counter > this->words[j].counter) | |
{ | |
// swap 2 words | |
Word temp = this->words[i]; | |
this->words[i] = this->words[j]; | |
this->words[j] = temp; | |
} | |
} | |
} | |
} | |
void PrintData(int n, bool isDescendingMode) | |
{ | |
if (isDescendingMode) | |
{ | |
for (int i = 0; i < n && i < this->wordCount; ++i) | |
{ | |
cout << (i + 1) << ".\t" << this->words[this->wordCount - 1 - i].word << " - " | |
<< this->words[this->wordCount - 1 - i].counter << endl; | |
} | |
} | |
else | |
{ | |
for (int i = 0; i < n && i < this->wordCount; ++i) | |
{ | |
cout << (i + 1) << ".\t" << this->words[i].word << " - " << this->words[i].counter << endl; | |
} | |
} | |
} | |
private: | |
void ExtractNode(TreeNode* node, string baseStr) | |
{ | |
if (node == NULL) return; | |
if (node->counter > 0) | |
{ | |
this->words[this->currentWordIdx].word = baseStr; | |
this->words[this->currentWordIdx].counter = node->counter; | |
this->currentWordIdx++; | |
} | |
for (int i = 0; i < 26; ++i) | |
{ | |
if (node->dict[i] != NULL) | |
{ | |
ExtractNode(node->dict[i], baseStr + toChar(i)); | |
} | |
} | |
} | |
private: | |
TreeNode* head; // head TreeNode's counter is alway zero | |
TreeNode* current; | |
Word* words; | |
int wordCount; | |
int currentWordIdx; | |
}; | |
int main(int argc, char* argv[]) | |
{ | |
if (argc != 4) | |
{ | |
cerr << "Wrong command usage.\n" << endl; | |
return 1; | |
} | |
char* fileName = argv[1]; | |
char* command = argv[2]; | |
int count = atoi(argv[3]); | |
if (count <= 0) | |
{ | |
// fair enough, no entry found | |
return 0; | |
} | |
Fsm f; | |
string line; | |
ifstream input(fileName); | |
if (input.is_open()) | |
{ | |
char c = input.get(); | |
while (input.good()) | |
{ | |
f.Read(c); | |
c = input.get(); | |
} | |
f.EndRead(); | |
input.close(); | |
} | |
else | |
{ | |
cerr << "File not found.\n" << endl; | |
return 1; | |
} | |
f.ExtractData(); | |
f.SortData(); | |
if (hashChar(command[0]) == hashChar('A')) | |
{ | |
// Ascending mode | |
f.PrintData(count, false); | |
} | |
else if (hashChar(command[0]) == hashChar('D')) | |
{ | |
// Descending mode | |
f.PrintData(count, true); | |
} | |
else | |
{ | |
cerr << "Unrecognized query mode.\n" << endl; | |
return 1; | |
} | |
return 0; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Compile and test
novel.txt