Last active
February 3, 2018 23:09
-
-
Save mondeja/a8215d34ee1c0c850d7b3f64ab6b2260 to your computer and use it in GitHub Desktop.
Comparation parsing broken JSON text with json decoder and re modules.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from re import sub | |
from timeit import timeit | |
from json import loads | |
from requests import get | |
def test(res): | |
return type(res) == list and \ | |
type(res[0]) == dict and \ | |
type(res[0]["last_updated"]) == int | |
url = "https://api.coinmarketcap.com/v1/ticker/" # Bad formatted JSON | |
# Parsing raw text response | |
raw_text = loads(sub(r'"(-*\d+(?:\.\d+)?)"', r"\1", get(url).text)) | |
# Parsing JSON responses | |
raw_json_1 = get(url).json() | |
raw_json_2 = loads(get(url).text) | |
responses = [ | |
{"title": "Parsed with: re.sub()", | |
"response": raw_text, | |
"setup": "from re import sub;from json import loads;", | |
"code": "loads(sub(r'" + '"(-*\d+(?:\.\d+)?)"' + "', r'" + "\\1', get(url).text))"}, | |
{"title": "Parsed with: requests.get(...).json()", | |
"response": raw_json_1, | |
"setup": "", | |
"code": "get(url).json()"}, | |
{"title": "Parsed with: json.loads()", | |
"response": raw_json_2, | |
"setup": "from json import loads", | |
"code": "loads(get(url).text)"} | |
] | |
print("\n============================================\n") | |
for res in responses: | |
print("\t%s" % res["title"]) | |
print("\n>>> %s" % res["code"]) | |
print("%s ..." % str(res["response"])[0:200]) | |
result = test(res["response"]) | |
print("\n--------> Test %s! <--------" % ("passed" if result else "failed")) | |
bench = timeit(res["code"], | |
setup="url='%s';from requests import get;%s" % (url, res["setup"]), | |
number=1) | |
print("Benchmark: %f" % bench) | |
print("\n============================================\n") | |
""" Output: | |
============================================ | |
Parsed with: re.sub() | |
>>> loads(sub(r'"(\d+(?:\.\d+)?)"', r'\1', get(url).text)) | |
[{'id': 'bitcoin', 'name': 'Bitcoin', 'symbol': 'BTC', 'rank': 1, 'price_usd': 9308.99, 'price_btc': 1.0, '24h_volume_usd': 7456650000.0, 'market_cap_usd': 156795386599, 'available_supply': 16843437.0 ... | |
--------> Test passed! <-------- | |
Benchmark: 0.151925 | |
============================================ | |
Parsed with: requests.get(...).json() | |
>>> get(url).json() | |
[{'id': 'bitcoin', 'name': 'Bitcoin', 'symbol': 'BTC', 'rank': '1', 'price_usd': '9308.99', 'price_btc': '1.0', '24h_volume_usd': '7456650000.0', 'market_cap_usd': '156795386599', 'available_supply': ... | |
--------> Test failed! <-------- | |
Benchmark: 0.136815 | |
============================================ | |
Parsed with: json.loads() | |
>>> loads(get(url).text) | |
[{'id': 'bitcoin', 'name': 'Bitcoin', 'symbol': 'BTC', 'rank': '1', 'price_usd': '9308.99', 'price_btc': '1.0', '24h_volume_usd': '7456650000.0', 'market_cap_usd': '156795386599', 'available_supply': ... | |
--------> Test failed! <-------- | |
Benchmark: 0.162506 | |
============================================ | |
""" |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment