Created
January 18, 2024 03:46
-
-
Save icub3d/34dd7a95ab84e8acf6df2fc82b09b988 to your computer and use it in GitHub Desktop.
Byes vs Chars in Rust
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// A helper function to print some details about a string. | |
fn string_details(string: &str) { | |
println!("string: {}", string); | |
println!("char details (char: index byte_index bytes)"); | |
// The index and byte_index may be different for non-ASCII characters. | |
for (index, (byte_index, c)) in string.char_indices().enumerate() { | |
println!( | |
" {:?}: {:02} {:02} {:?}", | |
c, | |
index, | |
byte_index, | |
c.to_string().as_bytes() | |
); | |
} | |
println!(); | |
} | |
fn main() { | |
// Print some information about an ASCII string. | |
let english = "Hello, world"; | |
string_details(english); | |
// Print some information about a UTF-8 string. | |
let korean = "μλ μΈμ"; | |
string_details(korean); | |
let emoji = "ππ"; | |
string_details(emoji); | |
// Let's see how long it takes to count vowels in a non-trivially sized string using the | |
// chars() iterator. | |
let data = std::fs::read_to_string("/usr/share/dict/cracklib-small").unwrap(); | |
let now = std::time::Instant::now(); | |
let mut vowels = 0; | |
for c in data.chars() { | |
if ['a', 'e', 'i', 'o', 'u'].contains(&c) { | |
vowels += 1; | |
} | |
} | |
println!("{} vowels in {:?}", vowels, now.elapsed()); | |
// Now let's see how long it takes to count vowels in a non-trivially sized string using the | |
// bytes() iterator. | |
let data = std::fs::read_to_string("/usr/share/dict/cracklib-small").unwrap(); | |
let now = std::time::Instant::now(); | |
let mut vowels = 0; | |
for b in data.bytes() { | |
if [b'a', b'e', b'i', b'o', b'u'].contains(&b) { | |
vowels += 1; | |
} | |
} | |
println!("{} vowels in {:?}", vowels, now.elapsed()); | |
} | |
// https://leetcode.com/problems/minimum-window-substring/description/ | |
pub fn min_window_substring_chars(s: &str, t: &str) -> String { | |
use std::collections::HashMap; | |
// *state* | |
let mut counts: HashMap<char, isize> = t.chars().fold(HashMap::new(), |mut counts, c| { | |
*counts.entry(c).or_default() += 1; | |
counts | |
}); | |
let (mut min_left, mut min_right) = (0, 0); | |
// *iterate* | |
// | |
// Track our left position and the character at that position. | |
let mut left_iter = s.chars().enumerate(); | |
let (mut left, mut left_char) = left_iter.next().unwrap(); | |
for (right, b) in s.chars().enumerate() { | |
// *update state* | |
if let Some(count) = counts.get_mut(&b) { | |
*count -= 1 | |
} | |
while counts.values().all(|&count| count <= 0) { | |
if right - left + 1 < (min_right - min_left + 1) || (min_left == 0 && min_right == 0) { | |
(min_left, min_right) = (left, right); | |
} | |
if let Some(count) = counts.get_mut(&left_char) { | |
*count += 1; | |
} | |
// Move left. We need to make sure we don't move past the end of the string. If we do, | |
// we know we are done. | |
if let Some((l, c)) = left_iter.next() { | |
left = l; | |
left_char = c; | |
} else { | |
break; | |
} | |
} | |
} | |
s.chars() | |
.skip(min_left) | |
.take(min_right - min_left + 1) | |
.collect() | |
} | |
// https://leetcode.com/problems/minimum-window-substring/description/ | |
// Let's modify our min_window_substring function to be more performant assuming we only get ASCII. | |
// If we do need to be mindful of UTF-8, we can use the chars() iterator instead of the bytes(). | |
pub fn min_window_substring(s: &str, t: &str) -> String { | |
use std::collections::HashMap; | |
// *state* | |
let mut counts: HashMap<u8, isize> = t.bytes().fold(HashMap::new(), |mut counts, b| { | |
*counts.entry(b).or_default() += 1; | |
counts | |
}); | |
let mut min_window = ""; | |
// *iterate* | |
// | |
// Track our left position and the character at that position. | |
let mut left_iter = s.bytes().enumerate(); | |
let (mut left, mut left_char) = left_iter.next().unwrap(); | |
for (right, b) in s.bytes().enumerate() { | |
// *update state* | |
if let Some(count) = counts.get_mut(&b) { | |
*count -= 1 | |
} | |
while counts.values().all(|&count| count <= 0) { | |
if right - left + 1 < min_window.len() || min_window.is_empty() { | |
min_window = &s[left..=right]; | |
} | |
if let Some(count) = counts.get_mut(&left_char) { | |
*count += 1; | |
} | |
// Move left. We need to make sure we don't move past the end of the string. If we do, | |
// we know we are done. | |
if let Some((l, c)) = left_iter.next() { | |
left = l; | |
left_char = c; | |
} else { | |
break; | |
} | |
} | |
} | |
min_window.to_string() | |
} | |
#[cfg(test)] | |
mod test { | |
use super::*; | |
#[test] | |
fn test_min_window_substring() { | |
let s = "ADOBECODEBANC"; | |
let t = "ABC"; | |
assert_eq!(min_window_substring(s, t), "BANC"); | |
let s = "a"; | |
let t = "a"; | |
assert_eq!(min_window_substring(s, t), "a"); | |
let s = "a"; | |
let t = "aa"; | |
assert_eq!(min_window_substring(s, t), ""); | |
let s = "a"; | |
let t = "b"; | |
assert_eq!(min_window_substring(s, t), ""); | |
let s = "aa"; | |
let t = "aa"; | |
assert_eq!(min_window_substring(s, t), "aa"); | |
let s = "bbaa"; | |
let t = "aba"; | |
assert_eq!(min_window_substring(s, t), "baa"); | |
let s = "bbaa"; | |
let t = "abb"; | |
assert_eq!(min_window_substring(s, t), "bba"); | |
let s = "bbaa"; | |
let t = "abaaa"; | |
assert_eq!(min_window_substring(s, t), ""); | |
} | |
#[test] | |
fn test_min_window_substring_chars() { | |
let s = "ADOBECODEBANC"; | |
let t = "ABC"; | |
assert_eq!(min_window_substring_chars(s, t), "BANC"); | |
let emojis = "πππππππππ"; | |
let t = "ππ"; | |
assert_eq!(min_window_substring_chars(emojis, t), "ππ"); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment