Skip to content

Instantly share code, notes, and snippets.

@miquels
Created October 18, 2020 07:59
Show Gist options
  • Save miquels/b93ed7af98bc3bc6022b474c4fd66ab2 to your computer and use it in GitHub Desktop.
Save miquels/b93ed7af98bc3bc6022b474c4fd66ab2 to your computer and use it in GitHub Desktop.
word count
[package]
name = "wc"
version = "0.1.0"
edition = "2018"
[dependencies]
regex = "1"
lazy_static = "1"
use std::collections::HashMap;
use std::fs;
use std::io::{self, BufRead};
use regex::Regex;
lazy_static::lazy_static! {
static ref WORD_REGEX: Regex = Regex::new(r"(?i)[a-z]{2,}").unwrap();
static ref FNAME_REGEX: Regex = Regex::new(r"(?i)\.txt$").unwrap();
}
fn main() -> io::Result<()> {
let mut word_counts = HashMap::new();
for path in fs::read_dir(".")?
.filter_map(|p| p.ok())
.map(|p| p.path())
.filter(|p| p.is_file() && FNAME_REGEX.is_match(&p.to_string_lossy())) {
let file = fs::File::open(path)?;
for line in io::BufReader::new(file).lines() {
for cap in WORD_REGEX.captures_iter(&line?) {
*word_counts.entry(cap[0].to_string()).or_insert(0) += 1u32;
}
}
}
let mut word_array: Vec<(String, u32)> = word_counts.into_iter().collect();
word_array.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
let final_size = std::cmp::min(word_array.len(), 10);
for entry in &word_array[..final_size] {
println!("{} {}", entry.1, entry.0);
}
Ok(())
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment