Skip to content

Instantly share code, notes, and snippets.

@iwasaki-kenta
Last active November 28, 2019 15:30
Show Gist options
  • Save iwasaki-kenta/22f12d3d8d41048799e3ff678768563e to your computer and use it in GitHub Desktop.
Save iwasaki-kenta/22f12d3d8d41048799e3ff678768563e to your computer and use it in GitHub Desktop.
Cuckoo Filter. Assume content is pre-hashed with a 256-bit hash function.
import {Bucket, CuckooFilter} from "../src/cuckoo";
import {hash} from "../src/crypto";
import {assert} from "chai";
import {SmartBuffer} from "smart-buffer";
describe('CuckooFilter', () => {
it('sets and gets properly', () => {
const filter = new CuckooFilter(4, 100, 500);
for (let i = 0; i < 100; i++) {
const id = hash(Buffer.of(i));
assert.isTrue(filter.set(id));
assert.isTrue(filter.contains(id));
}
let evicted: number = 0;
for (let i = 0; i < 100; i++) {
const id = hash(Buffer.of(i));
if (!filter.contains(id)) {
evicted++;
}
}
assert.isBelow(evicted, 50);
const reconstructed = CuckooFilter.unmarshal(SmartBuffer.fromBuffer(filter.marshal()));
it('marshals and unmarshals properly', () => assert.deepStrictEqual(filter, reconstructed));
let count: number = 0;
for (let i = 0; i < 100; i++) {
const id = hash(Buffer.of(i));
if (!reconstructed.contains(id)) {
count++;
}
}
assert.equal(evicted, count);
})
});
import {toBigIntBE} from 'bigint-buffer';
import {Deserializable, hash, Serializable, statics} from '@perlin/noise';
import {SmartBuffer} from 'smart-buffer';
const EMPTY_FINGERPRINT = Buffer.alloc(32);
@statics<Deserializable>()
export class Bucket implements Serializable {
public constructor(
private readonly capacity: number,
private readonly entries: Buffer[] = [...new Array(capacity)].map(
() => undefined
)
) {}
get full() {
return this.entries.filter(stored => stored).length === this.capacity;
}
get empty() {
return this.entries.length === 0;
}
public static unmarshal(buf: SmartBuffer, capacityPerBucket = 100): Bucket {
return new Bucket(
capacityPerBucket,
[...new Array(capacityPerBucket)].map(() =>
buf.readUInt8() === 1 ? buf.readBuffer(32) : undefined
)
);
}
set(id: Buffer): boolean {
if (this.full) return false;
const index = this.entries.findIndex(stored => !stored);
this.entries[index] = id;
return true;
}
contains(id: Buffer): boolean {
return this.entries.some(
stored => stored && Buffer.compare(stored, id) === 0
);
}
delete(id: Buffer): boolean {
if (this.empty) return false;
const index = this.entries.findIndex(
stored => Buffer.compare(stored, id) === 0
);
if (index === -1) return false;
this.entries[index] = undefined;
}
swap(id: Buffer): Buffer {
const index = Math.floor(Math.random() * this.entries.length);
[id, this.entries[index]] = [this.entries[index], id];
return id;
}
marshal(): Buffer {
const w = new SmartBuffer();
this.entries.forEach(stored =>
w.writeBuffer(
stored ? Buffer.concat([Buffer.of(1), stored]) : Buffer.of(0)
)
);
return w.toBuffer();
}
}
@statics<Deserializable>()
export class CuckooFilter implements Serializable {
private readonly buckets: Bucket[];
private size = 0;
public constructor(
public readonly numBuckets: number = 100,
public readonly capacityPerBucket: number = 8,
public readonly maxEvictionKicks: number = 500,
buckets: Bucket[] = [...new Array(numBuckets)].map(
() => new Bucket(capacityPerBucket)
)
) {
this.buckets = buckets;
}
public static unmarshal(
buf: SmartBuffer,
maxEvictionKicks = 500
): CuckooFilter {
const numBuckets = buf.readUInt16BE();
const capacityPerBucket = buf.readUInt8();
const size = buf.readUInt32BE();
const filter = new CuckooFilter(
numBuckets,
capacityPerBucket,
maxEvictionKicks,
[...new Array(numBuckets)].map(() =>
Bucket.unmarshal(
SmartBuffer.fromBuffer(buf.readBuffer(buf.readUInt32BE())),
capacityPerBucket
)
)
);
filter.size = size;
return filter;
}
public set(id: Buffer): boolean {
if (this.contains(id)) return false;
const [first, second, firstIndex, secondIndex] = this.twoBuckets(id);
if (first.set(id) || second.set(id)) {
this.size++;
return true;
}
for (
let i = 0,
index = Math.floor(Math.random() * 2) === 1 ? firstIndex : secondIndex;
i < this.maxEvictionKicks;
i++
) {
const bucket = this.buckets[index];
const swapped = bucket.swap(id) || EMPTY_FINGERPRINT;
index =
(index ^
Number(BigInt(toBigIntBE(hash(swapped))) % BigInt(this.numBuckets))) %
this.numBuckets;
if (bucket.set(swapped)) {
this.size++;
return true;
}
}
return false;
}
public contains(id: Buffer): boolean {
if (this.size === 0) return false;
const [first, second] = this.twoBuckets(id);
return first.contains(id) || second.contains(id);
}
public delete(id: Buffer): boolean {
if (this.size === 0) return false;
const [first, second] = this.twoBuckets(id);
if (first.delete(id) || second.delete(id)) {
this.size--;
return true;
}
return false;
}
public marshal(): Buffer {
const w = new SmartBuffer();
w.writeUInt16BE(this.numBuckets);
w.writeUInt8(this.capacityPerBucket);
w.writeUInt32BE(this.size);
this.buckets.forEach(bucket => {
const data = bucket.marshal();
w.writeUInt32BE(data.byteLength);
w.writeBuffer(data);
});
return w.toBuffer();
}
private twoBuckets(id: Buffer): [Bucket, Bucket, number, number] {
const [firstIndex, secondIndex] = this.twoIndices(id);
return [
this.buckets[firstIndex],
this.buckets[secondIndex],
firstIndex,
secondIndex
];
}
private twoIndices(id: Buffer): [number, number] {
const first = Number(BigInt(toBigIntBE(id)) % BigInt(this.numBuckets));
const second =
(first ^ Number(BigInt(toBigIntBE(hash(id))) % BigInt(this.numBuckets))) %
this.numBuckets;
return [first, second];
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment