Skip to content

Instantly share code, notes, and snippets.

@ultrasilicon
Created November 30, 2018 04:53
Show Gist options
  • Save ultrasilicon/efa3445156b4503b34a0f6bd595d2393 to your computer and use it in GitHub Desktop.
Save ultrasilicon/efa3445156b4503b34a0f6bd595d2393 to your computer and use it in GitHub Desktop.
/*******************************************************************************
* Name : stairclimber.cpp
* Author : Han Zheng
* Date : 28 Nov. 2018
* Description : Lists the number of ways to climb n stairs.
* Pledge : I pledge my honor that I have abided by the Stevens Honor System.
******************************************************************************/
#include <iostream>
#include <vector>
#include <algorithm>
#include <sstream>
#include <iomanip>
#include <functional>
#include <fstream>
#include <queue>
using namespace std;
struct TrieNode;
static int g_limit = 0;
static string g_data;
static vector<string> g_words;
struct TrieNode
{
TrieNode()
{}
TrieNode(const char& key, TrieNode* parent)
: key_(key)
, parent_(parent)
{}
int num_ = 0;
char key_ = '0';
char padding[3];
array<TrieNode*, 27> children_{};
TrieNode* parent_;
static TrieNode root;
};
TrieNode TrieNode::root;
void trie_insert(const string &s, TrieNode *n)
{
TrieNode *cur = n;
for(const char& c : s)
{
size_t index = static_cast<size_t>(c) - '`';
if(!cur->children_[index])
cur->children_[index] = new TrieNode(c, cur);
cur = cur->children_[index];
}
++ cur->num_;
}
string trie_recover(TrieNode *n)
{
string r;
TrieNode* cur = n;
while(cur->key_ != '0')
{
if(cur->key_ == '`')
r.push_back('\'');
else
r.push_back(cur->key_);
cur = cur->parent_;
}
std::reverse(r.begin(), r.end());
return r;
}
static auto cmp = [](TrieNode* l, TrieNode* r) {
return l->num_ < r->num_;
};
static priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(cmp)> pq(cmp);
void print_queue(priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(cmp)>& q) {
int size = q.top()->num_;
int digit;
if(size < 10)
digit = 1;
else if(size < 100)
digit = 2;
else if(size < 1000)
digit = 3;
else if(size < 10000)
digit = 4;
else if(size < 100000)
digit = 5;
else if(size < 1000000)
digit = 6;
else if(size < 10000000)
digit = 7;
else if(size < 100000000)
digit = 8;
else
digit = 9;
cout << std::right;
while(!q.empty()) {
cout << setw(digit) << q.top()->num_ << ". " << trie_recover(q.top()) << " \n";
q.pop();
}
}
void collect(TrieNode *n)
{
if(n->num_ != 0)
pq.push(n);
for(TrieNode* it : n->children_)
if(it)
collect(it);
}
void wash_data()
{
bool jump = false;
int wordBegin = 0;
auto head = g_data.begin();
for(auto it = g_data.begin(); it != g_data.end(); ++ it)
{
if(jump)
{
jump = false;
continue;
}
if((*it >= 'a' && *it <= 'z'))
{
*head = *it;
wordBegin = 0;
}
else if(*it == ' ')
{
*head = *it;
wordBegin = 1;
}
else if(*it >= 'A' && *it <= 'Z')
{
*head = 32 + *it;
wordBegin = 0;
}
else if(*it == '\'')
{
if(wordBegin == 1)
{
*head = ' ';
wordBegin = 1;
}
else if(wordBegin == 2)
{
*head = ' ';
wordBegin = 0;
}
else
*head = '`';
}
else if(*it == '\\')
jump = true;
else
*head = ' ';
head ++;
}
g_data.erase(head, g_data.cend());
string tmp;
stringstream ss(g_data);
while (ss >> tmp)
g_words.push_back(tmp);
}
int main(int argc, char * const argv[]) {
if(argc < 2)
{
cout << "Usage: ./commonwordfinder <filename> [limit]" << endl;
return 0;
}
ifstream file;
file.open(argv[1]);
if(!file)
{
cout << "Usage: ./commonwordfinder <filename> [limit]" << endl;
return 0;
}
if(argc == 3)
{
istringstream iss(argv[2]);
if(!iss >> g_limit)
{
cout << "Error: Invalid limit '" << argv[2] << "' received." << endl;
return 0;
}
}
cout << "1" << endl;
g_data = string((std::istreambuf_iterator<char>(file)), (std::istreambuf_iterator<char>()));
cout << "2" << endl;
wash_data();
cout << "3" << endl;
for(const string &s : g_words)
{
trie_insert(s, &TrieNode::root);
}
cout << "4" << endl;
TrieNode *n = &TrieNode::root;
collect(n);
print_queue(pq);
return 0;
}
@ultrasilicon
Copy link
Author

/*******************************************************************************

  • Name : stairclimber.cpp
  • Author : Han Zheng
  • Date : 28 Nov. 2018
  • Description : Lists the number of ways to climb n stairs.
  • Pledge : I pledge my honor that I have abided by the Stevens Honor System.
    ******************************************************************************/

#include
#include
#include
#include
#include
#include
#include
#include

using namespace std;

struct TrieNode;

static int g_limit = 0;
static string g_data;

struct TrieNode
{
TrieNode()
{}

TrieNode(const char& key, TrieNode* parent)
: key_(key)
, parent_(parent)
{}

int num_ = 0;
char key_ = '0';
char padding[3];
array<TrieNode*, 27> children_{};
TrieNode* parent_;

static TrieNode root;
};

TrieNode TrieNode::root;

void trie_insert(const string &s, TrieNode *n)
{
TrieNode *cur = n;
for(const char& c : s)
{
size_t index = static_cast<size_t>(c) - '`';
if(!cur->children_[index])
cur->children_[index] = new TrieNode(c, cur);
cur = cur->children_[index];
}
++ cur->num_;
}

string trie_recover(TrieNode n)
{
string r;
TrieNode
cur = n;
while(cur->key_ != '0')
{
if(cur->key_ == '`')
r.push_back(''');
else
r.push_back(cur->key_);
cur = cur->parent_;
}
std::reverse(r.begin(), r.end());
return r;
}

static auto cmp = [](TrieNode* l, TrieNode* r) {
return l->num_ < r->num_;
};
static priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(cmp)> pq(cmp);

void print_queue(priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(cmp)>& q) {
int size = q.top()->num_;
int digit;
if(size < 10)
digit = 1;
else if(size < 100)
digit = 2;
else if(size < 1000)
digit = 3;
else if(size < 10000)
digit = 4;
else if(size < 100000)
digit = 5;
else if(size < 1000000)
digit = 6;
else if(size < 10000000)
digit = 7;
else if(size < 100000000)
digit = 8;
else
digit = 9;
cout << std::right;
while(!q.empty()) {
cout << setw(digit) << q.top()->num_ << ". " << trie_recover(q.top()) << " \n";
q.pop();
}
}

void collect(TrieNode n)
{
if(n->num_ != 0)
pq.push(n);
for(TrieNode
it : n->children_)
if(it)
collect(it);
}

void wash_data()
{
bool jump = false;
int wordBegin = 0;
auto head = g_data.begin();
for(auto it = g_data.begin(); it != g_data.end(); ++ it)
{
if(jump)
{
jump = false;
continue;
}
if((*it >= 'a' && *it <= 'z'))
{
*head = *it;
wordBegin = 0;
}
else if(*it == ' ')
{
*head = *it;
wordBegin = 1;
}
else if(*it >= 'A' && *it <= 'Z')
{
*head = 32 + *it;
wordBegin = 0;
}
else if(*it == ''')
{
if(wordBegin == 1)
{
*head = ' ';
wordBegin = 1;
}
else if(wordBegin == 2)
{
*head = ' ';
wordBegin = 0;
}
else
*head = '`';
}
else if(*it == '\')
jump = true;
else
*head = ' ';
head ++;
}
g_data.erase(head, g_data.cend());

// string tmp;
// stringstream ss(g_data);
// while (ss >> tmp)
// trie_insert(tmp, &TrieNode::root);

string tmp;
auto begin = g_data.begin();
auto end = begin;
for(auto it = g_data.cbegin(); it != g_data.cend(); ++ it)
{
if(*it == ' ')
{
if(begin == end)
continue;
trie_insert(string(begin, end), &TrieNode::root);
begin = end; //?
}
}
}

int main(int argc, char * const argv[]) {
ios_base::sync_with_stdio(false);
cin.tie(nullptr);

if(argc < 2)
{
cout << "Usage: ./commonwordfinder [limit]" << endl;
return 0;
}

ifstream file;
file.open(argv[1]);
if(!file)
{
cout << "Usage: ./commonwordfinder [limit]" << endl;
return 0;
}

if(argc == 3)
{
istringstream iss(argv[2]);
if(!iss >> g_limit)
{
cout << "Error: Invalid limit '" << argv[2] << "' received." << endl;
return 0;
}
}
cout << "1" << endl;

g_data = string((std::istreambuf_iterator(file)), (std::istreambuf_iterator()));
cout << "2" << endl;
wash_data();
cout << "3" << endl;

clock_t begin = clock();

cout << "4" << endl;
collect(&TrieNode::root);
print_queue(pq);

clock_t end = clock();
double elapsed_secs = double(end - begin) / CLOCKS_PER_SEC;
cout << "Algo:" << elapsed_secs << endl;

return 0;
}

@ultrasilicon
Copy link
Author

/*******************************************************************************

  • Name : stairclimber.cpp
  • Author : Han Zheng
  • Date : 28 Nov. 2018
  • Description : Lists the number of ways to climb n stairs.
  • Pledge : I pledge my honor that I have abided by the Stevens Honor System.
    ******************************************************************************/

#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include
#include

using namespace std;

struct TrieNode;

static int g_limit = 10;
static string g_data;
static size_t g_max_word_width = 0;

struct TrieNode
{
TrieNode()
{}

TrieNode(const char& key, TrieNode* parent)
: key_(key)
, parent_(parent)
{}

int value_ = 0;
char key_ = '0';
char padding[3];
array<TrieNode*, 27> children_{};
TrieNode* parent_;

static TrieNode root;
};

TrieNode TrieNode::root;

void trie_insert(const string &s, TrieNode *n)
{
TrieNode *cur = n;
for(const char& c : s)
{
size_t index = static_cast<size_t>(c) - '`';
if(!cur->children_[index])
cur->children_[index] = new TrieNode(c, cur);
cur = cur->children_[index];
}
++ cur->value_;
}

string trie_recover(TrieNode n)
{
string r;
TrieNode
cur = n;
while(cur->key_ != '0')
{
if(cur->key_ == '`')
r.push_back(''');
else
r.push_back(cur->key_);
cur = cur->parent_;
}
std::reverse(r.begin(), r.end());
return r;
}

static auto ranking_cmp = [](TrieNode* l, TrieNode* r) {
return l->value_ < r->value_;
};
static priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(ranking_cmp)> ranking_pq(ranking_cmp);

int num_digit(const int& num){
int r;
if(num < 10)
r = 1;
else if(num < 100)
r = 2;
else if(num < 1000)
r = 3;
else if(num < 10000)
r = 4;
else if(num < 100000)
r = 5;
else if(num < 1000000)
r = 6;
else if(num < 10000000)
r = 7;
else if(num < 100000000)
r = 8;
else
r = 9;
return r;
}

void print_queue(priority_queue<TrieNode*, std::vector<TrieNode*>, decltype(ranking_cmp)>& q) {
cout << "Total unique words: " << (int)q.size() <<'\n'<< std::right;

list<tuple<int, set>> r;
int currentCount1 = q.top()->value_;
set tmpSet;
while(!q.empty())
{
tmpSet.insert(trie_recover(q.top()));
q.pop();
if(!q.top())
break;
int tmpV = q.top()->value_;

  if(tmpV != currentCount1)
    {
      r.push_back(make_tuple(currentCount1, tmpSet));
      tmpSet.clear();
      currentCount1 = tmpV;
    }
}

r.push_back(make_tuple(currentCount1, tmpSet));

int indexWidth = 0;
int wordWidth = 0;
int limit = g_limit;
for(auto it : r)
{
if(limit == 0)
break;
for(auto jt : get<1>(it))
{
if(limit == 0)
break;
indexWidth ++;
if((int)jt.size() > wordWidth)
wordWidth = jt.size();
-- limit;
}
}
wordWidth ++;

int index = 0;
for(auto it : r)
{
for(auto jt : get<1>(it))
{
if(g_limit == 0)
return;
cout << setw(num_digit(indexWidth)) << ++ index << ". "
<< std::left << setw((int)wordWidth) << jt
<< get<0>(it) << '\n' << std::right;
-- g_limit;
}
}
}

void collect(TrieNode n)
{
if(n->value_ != 0)
ranking_pq.push(n);
for(TrieNode
it : n->children_)
if(it)
collect(it);
}

void wash_data()
{
bool jump = false;
int wordBegin = 0;
auto head = g_data.begin();
for(auto it = g_data.begin(); it != g_data.end(); ++ it)
{
if(jump)
{
jump = false;
continue;
}
if((*it >= 'a' && *it <= 'z'))
{
*head = *it;
wordBegin = 0;
}
else if(*it == ' ')
{
*head = *it;
wordBegin = 1;
}
else if(*it >= 'A' && *it <= 'Z')
{
*head = 32 + *it;
wordBegin = 0;
}
else if(*it == ''')
{
if(wordBegin == 1)
{
*head = ' ';
wordBegin = 1;
}
else if(wordBegin == 2)
{
*head = ' ';
wordBegin = 0;
}
else
*head = '`';
}
else if(*it == '\')
jump = true;
else
*head = ' ';
head ++;
}
g_data.erase(head, g_data.cend());

string tmp;
stringstream ss(g_data);
while (ss >> tmp)
{
if(tmp.size() > g_max_word_width)
g_max_word_width = tmp.size();
trie_insert(tmp, &TrieNode::root);
}
}

int main(int argc, char * const argv[]) {
ios_base::sync_with_stdio(false);
cin.tie(nullptr);

if(argc < 2)
{
cout << "Usage: ./commonwordfinder [limit]" << endl;
return 0;
}

ifstream file;
file.open(argv[1]);
if(!file)
{
cout << "Error: Cannot open file '"<< argv[1] <<"' for input.\n" << endl;
return 0;
}

if(argc == 3)
{
istringstream iss(argv[2]);
if(!(iss >> g_limit))
{
cout << "Error: Invalid limit '" << argv[2] << "' received." << endl;
return 0;
}
if(g_limit < 0)
{
cout << "Error: Invalid limit '" << argv[2] << "' received." << endl;
return 0;
}
}

g_data = string((std::istreambuf_iterator(file)), (std::istreambuf_iterator()));
wash_data();

collect(&TrieNode::root);
print_queue(ranking_pq);

return 0;
}

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment