Last active
January 25, 2019 00:42
-
-
Save oschonrock/b76b1fbf4e01bfe6b30b8a42921bf38d to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?php | |
/** | |
* intention of this class is not to do anything too fancy | |
* it basically just runs one query, but it does so with ease and in a scalable and safe way | |
* ie it won't block, you don't have to worry about setting up a new connection | |
* it will reuse the connection and the prepared statement if you make multiple calls | |
* been tested and refined in high volume production for 1 year. | |
* | |
* to use it just customise the __contrsuct below with your DB details and call: | |
* | |
* Hibp::isPawned($password) | |
* | |
* which retruns a boolean | |
* | |
*/ | |
class Hibp extends PDO | |
{ | |
private static $con; | |
private static $stmt; | |
// see here for notes on how to load this DB from the source (delimited text file) | |
// https://gist.github.com/oschonrock/b76b1fbf4e01bfe6b30b8a42921bf38d | |
public function __construct(String $dsn = 'mysql:dbname=hibp;host=hostname', | |
String $username = 'username', String $passwd = 'password', | |
Array $options = []) | |
{ | |
return parent::__construct($dsn, $username, $passwd, $options); | |
} | |
// public only for testing | |
public static function getConnection() | |
{ | |
if (!self::$con instanceof self) | |
{ | |
self::$con = new self(); | |
// experimemt with getting int and float values from mysqlnd, and always use FETCH_OBJ, because it has the nicest syntax | |
self::$con->setAttribute(PDO::ATTR_STRINGIFY_FETCHES, false); | |
self::$con->setAttribute(PDO::ATTR_EMULATE_PREPARES, false); | |
self::$con->setAttribute(PDO::ATTR_DEFAULT_FETCH_MODE, PDO::FETCH_OBJ); | |
} | |
return self::$con; | |
} | |
private static function getStmt() | |
{ | |
if (!self::$stmt instanceof PDOStatement) | |
{ | |
$con = self::getConnection(); | |
self::$stmt = $con->prepare("SET STATEMENT max_statement_time=1 FOR | |
select SQL_NO_CACHE cnt from pw where hash = :sha1_password limit 1;"); | |
} | |
return self::$stmt; | |
} | |
public static function isPawned(String $plaintext_password) | |
{ | |
try | |
{ | |
$stmt = self::getStmt(); | |
} | |
catch (PDOException $e) | |
{ | |
return false; // if db not found, then assume pw is not pawned | |
} | |
if ($stmt === false) | |
{ | |
return false; // if stmt prepare fails, then assume pw is not pawned | |
} | |
$bind_ok = $stmt->bindValue(':sha1_password', sha1($plaintext_password, true), PDO::PARAM_STR); | |
if ($bind_ok === false) | |
{ | |
return false; // if stmt->bindValue fails, then assume pw is not pawned | |
} | |
$stmt->execute(); | |
if ($stmt === false) | |
{ | |
return false; // if stmt->execute fails, then assume pw is not pawned | |
} | |
$cnt = $stmt->fetchColumn(); | |
if ($cnt === false) | |
{ | |
return false; // if empty result, then pw is not pawned | |
} | |
// if count is 1 or more (true by definition really if we get here), then password has been pawned | |
// $cnt is already type int due to PDO::ATTR_STRINGIFY_FETCHES = false | |
return $cnt > 0; | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
-- spent some time finding a good table structure and a fastest way to load the "Have I Been pawned" https://haveibeenpwned.com/Passwords | |
-- list of compromised passwords into a MariaDB or MySQL database for fast, unlimited, local querying | |
drop table if exists pw; | |
-- store the sha1 hashes in a binary(20) column (most space efficient) | |
-- use that column as the PK | |
-- PK uses default BTREE type index, because HASH type is not available for disk based storage engines (only MEMORY/HEAP) | |
-- don't add PK until after LOAD DATE INFILE is complete, because that is about twice as fast | |
-- use Aria engine with no transactions for MariaDB, use MyIsam for MySQL, because these are faster than InnoDB and totally sufficient | |
create table pw ( | |
hash binary(20) not null, | |
cnt int(11) not null | |
) engine=aria TRANSACTIONAL=0 TABLE_CHECKSUM=0; | |
-- tell load data infile about the windows line-ends in the source file | |
-- specify ":" as the field separator | |
-- use local variables for each row and do some simple data transformation | |
-- trim(@cnt) gets rid of spaces at end of each line and prevents "truncation" warnings for the cnt column | |
-- HIBP is now much better about ensuring their cnt values do not have redundant spaces, but we leave the trim() as | |
-- it doesn't hurt. | |
-- unhex(@txthash) transforms the "hexdgit format" of the sha1 hashes into binary for the "hash" column | |
load data infile 'pwned-passwords-ordered-by-hash.txt' into table pw fields terminated by ':' LINES TERMINATED BY '\r\n' | |
(@txthash, @cnt) | |
set hash = unhex(@txthash), cnt = trim(@cnt); | |
-- takes about 30mins, on Core i5 with spinning disks (seems evenly CPU and I/O bound on this machine) | |
alter table pw add primary key(hash); | |
-- takes about 30mins on same machine (CPU bound on this hardware) | |
-- ensure you use the "sha1-..-ordered-by-hash" version of the 7z/txt file, as this is about 3x as fast (already nearly sorted) | |
-- querying the table with a plaintext password of "password" | |
-- using SQL_NO_CACHE to prove this is not cached, and it would probably be pointless | |
-- using SET STATEMENT max_statement_time=1 FOR syntax is mariadb specific, | |
-- but ensures we don't block during login for more than 1 second (eg during inserting a new copy of HIBP DB) | |
SET STATEMENT max_statement_time=1 FOR | |
select SQL_NO_CACHE cnt from pw where hash = unhex(sha1('password')); | |
+---------+ | |
| cnt | | |
+---------+ | |
| 3303003 | | |
+---------+ | |
1 row in set (0.01 sec) | |
-- Space usage Data 17.1 GiB | |
-- Index 11.3 GiB | |
-- Total 28.4 GiB | |
-- where to put this table? | |
-- Recommend to put it a separate database from your main application | |
-- this way it won't inflate your backups or copies for dev environments | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment