Created
February 15, 2019 18:17
-
-
Save Geal/84775215be3b4d5978173165373c7dbb to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#[macro_use] | |
extern crate nom; | |
#[macro_use] | |
extern crate bencher; | |
extern crate fnv; | |
use fnv::FnvHashMap as HashMap; | |
use bencher::{Bencher, black_box}; | |
use nom::{digit, be_u32, IResult, Err, ErrorKind, InputTakeAtPosition, Convert, recognize_float, | |
ParseTo, Slice, InputLength, Needed,HexDisplay}; | |
named!(first<u32>, flat_map!(digit, parse_to!(u32))); | |
named!(second<u32>, call!(be_u32)); | |
fn or<'b, I: Clone, O, E>(input: I, fns: &'b[&'b Fn(I) -> IResult<I, O, E>]) -> IResult<I, O, E> { | |
let mut index = 0; | |
for f in fns.iter() { | |
match f(input.clone()) { | |
Err(Err::Error(_)) => {}, | |
rest => return rest, | |
} | |
} | |
Err(Err::Error(error_position!(input, ErrorKind::Alt))) | |
} | |
fn separated<I: Clone, O1, O2, O3, E, F, G, H>(input: I, first: F, sep: G, second: H) -> IResult<I, (O1, O3), E> | |
where F: Fn(I) -> IResult<I, O1, E>, | |
G: Fn(I) -> IResult<I, O2, E>, | |
H: Fn(I) -> IResult<I, O3, E> { | |
let (input, o1) = first(input)?; | |
let (input, _) = sep(input)?; | |
second(input).map(|(i, o2)| (i, (o1, o2))) | |
} | |
fn delimited<I: Clone, O1, O2, O3, E, F, G, H>(input: I, first: F, sep: G, second: H) -> IResult<I, O2, E> | |
where F: Fn(I) -> IResult<I, O1, E>, | |
G: Fn(I) -> IResult<I, O2, E>, | |
H: Fn(I) -> IResult<I, O3, E> { | |
let (input, _) = first(input)?; | |
let (input, o2) = sep(input)?; | |
second(input).map(|(i, _)| (i, o2)) | |
} | |
fn take_while<'a, T: 'a, F>(input: &'a [T], cond: F) -> IResult<&'a [T], &'a [T]> | |
where F: Fn(T) -> bool, | |
&'a [T]: nom::InputTakeAtPosition<Item=T> { | |
input.split_at_position(cond) | |
} | |
fn map<I, O1, O2, F, G>(input: I, first: F, second: G) -> IResult<I, O2> | |
where F: Fn(I) -> IResult<I, O1>, | |
G: Fn(O1) -> O2 { | |
first(input).map(|(i, o1)| (i, second(o1))) | |
} | |
fn flat_map<I: Clone+From<O1>, O1, O2, F, G>(input: I, first: F, second: G) -> IResult<I, O2> | |
where F: Fn(I) -> IResult<I, O1>, | |
G: Fn(O1) -> IResult<O1, O2> { | |
let (i, o1) = first(input)?; | |
second(o1).map(|(_, o2)| (i, o2)).map_err(Err::convert) | |
} | |
fn many0<I: Clone+InputLength, O, F>(input: I, mut f: F) -> IResult<I, Vec<O>> | |
where F: FnMut(I) -> IResult<I, O> { | |
let mut i = input; | |
let mut acc = Vec::new(); | |
loop { | |
let i_ = i.clone(); | |
match f(i_) { | |
Err(_) => return Ok((i, acc)), | |
Ok((i2, o)) => { | |
if i.input_len() == i2.input_len() { | |
return Err(Err::Error(error_position!(i, ErrorKind::Many0))) | |
} | |
i = i2; | |
acc.push(o); | |
if i.input_len() == 0 { | |
return Ok((i, acc)); | |
} | |
} | |
} | |
} | |
} | |
fn separated_list<I: Clone+InputLength, O, O2, F, G>(input: I, mut sep: G, mut f: F) -> IResult<I, Vec<O>> | |
where F: FnMut(I) -> IResult<I, O>, | |
G: FnMut(I) -> IResult<I, O2> { | |
let mut acc = Vec::new(); | |
let (input, o) = f(input)?; | |
acc.push(o); | |
let mut i = input; | |
loop { | |
if i.input_len() == 0 { | |
return Ok((i, acc)); | |
} | |
let i_ = i.clone(); | |
match sep(i_) { | |
Err(_) => return Ok((i, acc)), | |
Ok((i2, _)) => { | |
if i.input_len() == i2.input_len() { | |
return Err(Err::Error(error_position!(i, ErrorKind::Many0))) | |
} | |
let i2_ = i2.clone(); | |
match f(i2_) { | |
Err(_) => return Ok((i, acc)), | |
Ok((i3, o)) => { | |
if i2.input_len() == i3.input_len() { | |
return Err(Err::Error(error_position!(i, ErrorKind::Many0))) | |
} | |
i = i3; | |
acc.push(o); | |
} | |
} | |
} | |
} | |
} | |
} | |
//fn char<F>(c: char) -> F | |
// where F: Fn(&[u8]) -> IResult<&[u8], char> { | |
fn char(c: char) -> impl Fn(&[u8]) -> IResult<&[u8], char> { | |
move |i:&[u8]| { | |
if i.len() == 0 { | |
Err(Err::Incomplete(Needed::Unknown)) | |
} else { | |
//beware of utf8 | |
if i[0] as char == c { | |
Ok((&i[1..], c)) | |
} else { | |
Err(Err::Error(error_position!(i, ErrorKind::Char))) | |
} | |
} | |
} | |
} | |
fn tag<'b, 'a: 'b>(t: &'a [u8]) -> impl Fn(&'b [u8]) -> IResult<&'b [u8], &'b [u8]> { | |
move |i:&'b [u8]| { | |
tag!(i, t) | |
} | |
} | |
fn value<I, O1, O2, F>(input: I, f: F, o: O2) -> IResult<I, O2> | |
where F: Fn(I) -> IResult<I, O1> { | |
f(input).map(|(i, _)| (i, o)) | |
} | |
/****************************/ | |
fn parser(input: &[u8]) -> IResult<&[u8], u32> { | |
or(input, &[&first, &second]) | |
} | |
pub fn is_string_character(c: u8) -> bool { | |
//FIXME: should validate unicode character | |
c != b'"' && c != b'\\' | |
} | |
pub fn is_space(c: u8) -> bool { | |
c == b' ' || c == b'\t' || c == b'\r' || c == b'\n' | |
} | |
//named!(sp, take_while!(is_space)); | |
fn sp(input: &[u8]) -> IResult<&[u8], &[u8]> { | |
take_while(input, is_space) | |
} | |
fn sp2(input: &[u8]) -> IResult<&[u8], &[u8]> { | |
let chars = b" \t\r\n"; | |
take_while(input, |c| chars.contains(&c)) | |
} | |
//named!(float<f64>, flat_map!(recognize_float, parse_to!(f64))); | |
fn float<'a>(i: &'a [u8]) -> IResult<&'a [u8], f64> { | |
let second = |i: &'a [u8]| { | |
match i.parse_to() { | |
Some(o) => Ok((i.slice(i.input_len()..), o)), | |
None => Err(Err::Error(error_position!(i, ErrorKind::ParseTo))) | |
} | |
}; | |
flat_map(i, recognize_float, second) | |
} | |
#[derive(Debug, PartialEq)] | |
pub enum JsonValue<'a> { | |
Str(&'a str), | |
Boolean(bool), | |
Num(f64), | |
Array(Vec<JsonValue<'a>>), | |
Object(HashMap<&'a str, JsonValue<'a>>), | |
} | |
use std::str; | |
fn parse_str(input: &[u8]) -> IResult<&[u8], &str> { | |
map_res!(input, | |
escaped!(take_while1!(is_string_character), '\\', one_of!("\"bfnrt\\")), | |
str::from_utf8 | |
) | |
} | |
fn string(input: &[u8]) -> IResult<&[u8], &str> { | |
delimited(input, char('\"'), parse_str, char('\"')) | |
} | |
fn boolean(input: &[u8]) -> IResult<&[u8], bool> { | |
or(input, &[ | |
&|i| { value(i, tag(&b"false"[..]), false) }, | |
&|i| { value(i, tag(&b"true"[..]), true) } | |
]) | |
} | |
fn array(input: &[u8]) -> IResult<&[u8], Vec<JsonValue>> { | |
delimited(input, | |
char('['), | |
|i| separated_list(i, char(','), json_value), | |
char(']') | |
) | |
} | |
fn key_value(input: &[u8]) -> IResult<&[u8], (&str, JsonValue)> { | |
separated(input, string, char(':'), json_value) | |
} | |
fn hash_internal(input: &[u8]) -> nom::IResult<&[u8], HashMap<&str, JsonValue>> { | |
match key_value(input) { | |
Err(nom::Err::Error(_)) => Ok((input, HashMap::default())), | |
Err(e) => Err(e), | |
Ok((i, (key, value))) => { | |
let mut map = HashMap::default(); | |
map.insert(key, value); | |
let mut input = i; | |
loop { | |
match do_parse!(input, sp >> char!(',') >> kv: key_value >> (kv)) { | |
Err(nom::Err::Error(_)) => break Ok((input, map)), | |
Err(e) => break Err(e), | |
Ok((i, (key, value))) => { | |
map.insert(key, value); | |
input = i; | |
} | |
} | |
} | |
} | |
} | |
} | |
named!( | |
hash<HashMap<&str, JsonValue>>, | |
delimited!( | |
char!('{'), | |
return_error!( | |
hash_internal | |
), | |
preceded!(sp, char!('}')) | |
) | |
); | |
fn json_value(input: &[u8]) -> IResult<&[u8], JsonValue> { | |
or(input, &[ | |
&|i| { map(i, string, JsonValue::Str) }, | |
&|i| { map(i, float, JsonValue::Num) }, | |
&|i| { map(i, array, JsonValue::Array) }, | |
&|i| { map(i, hash, JsonValue::Object) }, | |
&|i| { map(i, boolean, JsonValue::Boolean) }, | |
]) | |
} | |
fn root(input: &[u8]) -> IResult<&[u8], JsonValue> { | |
or(input, &[ | |
&|i| { map(i, array, JsonValue::Array) }, | |
&|i| { map(i, hash, JsonValue::Object) }, | |
]) | |
} | |
/* | |
named!( | |
value<JsonValue>, | |
preceded!(sp, alt!( | |
map!(string, JsonValue::Str) | | |
map!(float, JsonValue::Num) | | |
map!(array, JsonValue::Array) | | |
map!(hash, JsonValue::Object) | | |
map!(boolean, JsonValue::Boolean) | |
)) | |
); | |
named!( | |
root<JsonValue>, | |
delimited!( | |
call!(sp), | |
alt!( | |
map!(hash, JsonValue::Object) | | |
map!(array, JsonValue::Array) | |
), | |
not!(complete!(sp)) | |
) | |
); | |
*/ | |
fn test_many(input: &[u8]) -> IResult<&[u8], Vec<&[u8]>> { | |
let mut counter = 0; | |
let res = many0(input, | |
|i| { | |
counter = counter + 1; | |
tag!(i, "abcd") | |
}); | |
println!("counter: {}", counter); | |
res | |
} | |
#[test] | |
fn manytest() { | |
test_many(&b"abcdabcdabcd"[..]); | |
panic!(); | |
} | |
fn basic(b: &mut Bencher) { | |
let data = b"{\"a\":42,\"b\":[\"x\",\"y\",12],\"c\":{\"hello\":\"world\"}}"; | |
b.bytes = data.len() as u64; | |
parse(b, &data[..]) | |
} | |
fn parse<'a>(b: &mut Bencher, buffer: &'a[u8]) { | |
assert!(root(buffer).is_ok()); | |
b.iter(|| { | |
let mut buf = black_box(buffer); | |
match root(buf) { | |
Ok((i, o)) => { | |
return o; | |
} | |
Err(err) => { | |
if let &nom::Err::Error(nom::Context::Code(ref i, ref e)) = &err { | |
panic!("got err {:?} at:\n{}", e, i.to_hex(16)); | |
} else { | |
panic!("got err: {:?}", err) | |
} | |
}, | |
} | |
}); | |
} | |
benchmark_group!(json, basic); | |
benchmark_main!(json); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment