I have some early benchmark results for our work on a high performance NATS server in Go.
Quick Summary:
We can process ~2M msgs/sec through the system, and the ingress and egress are fairly well balanced.
The basics of the architecture are intelligent buffering and IO calls, fast hashing algorithms and subject distributor/routing, and a zero-allocation hand-written protocol parser.
In addition, I used quite a bit of inlining to avoid function overhead, no use of defer, and little to no object allocation within the fast path. I will share more details and the code at a future date.
============================
2012 MacbookAir 11" i7 2Ghz
OSX - Mountain Lion
Go version go1.0.3
============================
Note: Bytes are true payload bytes in these benchmarks.
~/gnatsd> go test --bench="." --run="zzz"
PASS
Benchmark____PubNoPayload 5000000 560 ns/op 19.64 MB/s
Benchmark___PubMinPayload 5000000 593 ns/op 20.23 MB/s
Benchmark__PubTinyPayload 5000000 603 ns/op 24.85 MB/s
Benchmark_PubSmallPayload 5000000 668 ns/op 37.38 MB/s
Benchmark___PubMedPayload 2000000 812 ns/op 70.17 MB/s
Benchmark_PubLargePayload 200000 10787 ns/op 223.96 MB/s
Benchmark__________PubSub 1000000 1056 ns/op
Benchmark__PubSubTwoConns 1000000 1056 ns/op
Benchmark__PubTwoQueueSub 1000000 2182 ns/op
Benchmark_PubFourQueueSub 1000000 2822 ns/op
ok github.com/apcera/gnatsd/test 29.113s
// This is a snippet of the benchmark driver code.
func flushConnection(b *testing.B, c net.Conn, buf []byte) {
c.Write([]byte("PING\r\n"))
c.SetReadDeadline(time.Now().Add(50 * time.Millisecond))
n, err := c.Read(buf)
if err != nil {
b.Fatalf("Failed read: %v\n", err)
}
if n != 6 && buf[0] != 'P' {
b.Fatalf("Failed read of PONG: %s\n", buf)
}
}
func benchPub(b *testing.B, subject, payload string) {
b.StopTimer()
s = startServer(b, PERF_PORT, "")
c := createClientConn(b, "localhost", PERF_PORT)
doDefaultConnect(b, c)
bw := bufio.NewWriterSize(c, defaultSendBufSize)
sendOp := []byte(fmt.Sprintf("PUB %s %d\r\n%s\r\n", subject, len(payload), payload))
b.SetBytes(int64(len(sendOp)))
buf := make([]byte, 1024)
b.StartTimer()
for i := 0; i < b.N; i++ {
bw.Write(sendOp)
}
bw.Flush()
flushConnection(b, c, buf)
b.StopTimer()
c.Close()
s.stopServer()
}
func Benchmark____PubNoPayload(b *testing.B) {
benchPub(b, "a", "")
}
func Benchmark___PubMinPayload(b *testing.B) {
benchPub(b, "a", "b")
}
func Benchmark__PubTinyPayload(b *testing.B) {
benchPub(b, "foo", "ok")
}
func Benchmark_PubSmallPayload(b *testing.B) {
benchPub(b, "foo", "hello world")
}
func Benchmark___PubMedPayload(b *testing.B) {
benchPub(b, "foo", "The quick brown fox jumps over the lazy dog")
}
func Benchmark_PubLargePayload(b *testing.B) {
b.StopTimer()
var p string
for i := 0 ; i < 200 ; i++ {
p = p + "hello world "
}
b.StartTimer()
benchPub(b, "foo", p)
}
"In addition, I used quite a bit of inlining to avoid function overhead" - Isn't the compiler supposed to do that for you?