先用bitmap来标记哪些field存在,每个field只占1bit,按1000个field来算只需要125个字节的开销,然后存在的field记录偏移,每个偏移4个字节。还可以采用inline value的方式进一步优化空间,低于4字节的field可以将value直接记录在偏移字段,不需要额外的偏移存储。
按照现在的物料数据进行测试:===>新数据格式空间大小=>inline优化后的空间大小(inline优化比率) 从数据可以看出,能够将单条物料控制在5K大小,inline的能够平均优化5%空间,1000万物料大概50G内存。
package com.weibo.hotwb.inspect;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import static java.nio.charset.StandardCharsets.UTF_8;
public class DataStruct {
// 内存结构 start
private final int maxBitMapSize;
private final long[] bitmaps;
private final int[] bitmapCounts;
private final List<byte[]> o_array; // 4byte data
private final List<Byte> v_array; // 1Byte flag
private final List<byte[]> largerBinary;
// 内存结构 end
private final transient ByteArrayOutputStream stream; // binary
private transient byte[] finalBinary;
public DataStruct(final int cap) {
maxBitMapSize = cap >> 6;
bitmaps = new long[maxBitMapSize];
bitmapCounts = new int[maxBitMapSize - 1];
o_array = new ArrayList<>();
v_array = new ArrayList<>();
largerBinary = new ArrayList<>();
stream = new ByteArrayOutputStream(1024 * 8);
}
public void appendWith4Bytes(final int f, final int d) {
int nthBitmap = f >> 6;
bitmaps[nthBitmap] |= 1L << (f & 0x3f);
v_array.add((byte) 0x0);
byte[] a = int2Bytes(d);
o_array.add(a);
largerBinary.add(new byte[]{0x0});
}
public void appendExcess4Bytes(final int f, final String d) {
int nthBitmap = f >> 6;
bitmaps[nthBitmap] |= 1L << (f & 0x3f);
v_array.add((byte) 0x1);
byte[] b = d.getBytes(UTF_8);
byte[] ll = int2Bytes(stream.size());
o_array.add(ll);
largerBinary.add(b);
stream.write(b, 0, b.length);
}
public byte[] get(final int f) {
int nthBitmap = f >> 6;
if ((bitmaps[nthBitmap] & (1L << f & 0x3f)) == 0L) {
return null;
}
int nthItem = Long.bitCount(bitmaps[nthBitmap] & (1L << (f & 0x3f) - 1));
if (nthBitmap > 0) {
nthItem += bitmapCounts[nthBitmap - 1];
}
byte[] bytes = o_array.get(nthItem);
int x = bytes2Int(bytes);
byte b = v_array.get(nthItem);
int i = 0;
return bytes;
}
public byte[] buildBinary() throws IOException {
// aflter flush
stream.flush();
int totalStreamSize = stream.size();
stream.close();
// fill bitmapCounts
int bitmaps_xp = 0;
for (int i = 0; i < maxBitMapSize; ++i) {
int c = Long.bitCount(bitmaps[i]);
if (i > 0) {
bitmapCounts[i - 1] = bitmaps_xp;
}
bitmaps_xp += c;
}
ByteArrayOutputStream outputStream = new ByteArrayOutputStream(1024 * 8);
outputStream.write(int2Bytes(bitmaps.length * 8 + bitmapCounts.length * 4 + v_array.size() + o_array.size() * 4 + totalStreamSize));
outputStream.write(int2Bytes(bitmaps_xp));
for (int i = 0; i < bitmaps.length; ++i) {
outputStream.write(long2Bytes(bitmaps[i]));
}
for (int i = 0; i < bitmapCounts.length; ++i) {
outputStream.write(int2Bytes(bitmapCounts[i]));
}
outputStream.flush();
final int offset = outputStream.size();
final int oArrayBytes = o_array.size() * 4;
for (int i = 0; i < o_array.size(); i++) {
if (v_array.get(i) == 0x1) {
int off = bytes2Int(o_array.get(i));
o_array.set(i, int2Bytes(offset + off + oArrayBytes));
}
}
for (int i = 0; i < o_array.size(); i++) {
outputStream.write(o_array.get(i), 0, 4);
}
if (totalStreamSize > 0) {
byte[] extra = stream.toByteArray();
outputStream.write(extra, 0, extra.length);
}
outputStream.flush();
finalBinary = outputStream.toByteArray();
return finalBinary;
}
// singed
static byte[] int2Bytes(int d) {
byte[] a = new byte[4];
a[0] = (byte) d;
a[1] = (byte) (d >> 8);
a[2] = (byte) (d >> 16);
a[3] = (byte) (d >> 24);
return a;
}
static int bytes2Int(byte[] d) {
int a = 0;
for (int i = 0; i < 4; i++) {
// signed byte to unsigned
int v = (d[i] & 0xff);
a |= v << (i * 8);
}
return a;
}
static byte[] long2Bytes(long d) {
byte[] a = new byte[8];
a[0] = (byte) d;
a[1] = (byte) (d >> 8);
a[2] = (byte) (d >> 16);
a[3] = (byte) (d >> 24);
a[4] = (byte) (d >> 32);
a[5] = (byte) (d >> 40);
a[6] = (byte) (d >> 48);
a[7] = (byte) (d >> 56);
return a;
}
public static void main(String[] args) throws IOException {
DataStruct dataStruct = new DataStruct(1024);
dataStruct.appendWith4Bytes(1, 4);
dataStruct.appendExcess4Bytes(2, "sssss");
byte[] out = dataStruct.buildBinary();
dataStruct.get(2);
int j = 0;
}
}