Last active
January 3, 2020 10:39
-
-
Save niujiabenben/f79b438bdb9f8001fd9a784a086b43f7 to your computer and use it in GitHub Desktop.
lmdb使用示例
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#include <lmdb.h> | |
#include "common.h" | |
#include "util.h" | |
#include "timer.h" | |
int main(int argc, char *argv[]) { | |
google::InitGoogleLogging(argv[0]); | |
google::LogToStderr(); | |
const std::string lmdb_file("./data/sample_lmdb"); | |
const std::string single_dir("./data/single_dir"); | |
const std::string multiple_dir("./data/multiple_dir"); | |
const std::string sample_file("./data/samples.txt"); | |
std::ifstream infile(sample_file); | |
CHECK(infile.is_open()) << "Failed to open file: " << sample_file; | |
std::vector<std::string> samples; | |
std::vector<std::string> names; | |
std::string path; | |
std::string name; | |
while (infile >> path >> name) { | |
for (int i = 0; i < 10; ++i) { | |
samples.push_back(path); | |
names.push_back(std::to_string(i) + "_" + name); | |
} | |
} | |
infile.close(); | |
LOG(INFO) << "Total samples: " << samples.size(); | |
Timer timer; | |
MDB_env* env = NULL; | |
MDB_txn* txn = NULL; | |
MDB_dbi dbi; | |
MDB_val key; | |
MDB_val data; | |
CHECK_EQ(mdb_env_create(&env), 0); | |
CHECK_EQ(mdb_env_set_maxreaders(env, 1), 0); | |
CHECK_EQ(mdb_env_set_mapsize(env, 1073741824L * 1024L), 0); | |
int option = MDB_FIXEDMAP | MDB_NOLOCK; | |
CHECK_EQ(mdb_env_open(env, lmdb_file.c_str(), option, 0664), 0); | |
CHECK_EQ(mdb_txn_begin(env, NULL, 0, &txn), 0); | |
CHECK_EQ(mdb_dbi_open(txn, NULL, 0, &dbi), 0); | |
for (size_t i = 0; i < samples.size(); ++i) { | |
const auto content = ReadFile(samples[i], true); | |
CHECK(!content.empty()) << "Failed to read file: " << samples[i]; | |
timer.Start(); | |
key.mv_size = names[i].length(); | |
key.mv_data = (void*) names[i].data(); | |
data.mv_size = content.length(); | |
data.mv_data = (void*) content.data(); | |
CHECK_EQ(mdb_put(txn, dbi, &key, &data, 0), 0); | |
timer.Accumulate(); | |
if (i % 1000 == 0) { | |
CHECK_EQ(mdb_txn_commit(txn), 0); | |
CHECK_EQ(mdb_txn_begin(env, NULL, 0, &txn), 0); | |
CHECK_EQ(mdb_dbi_open(txn, NULL, 0, &dbi), 0); | |
LOG(INFO) << i << ": " << timer.AverageMilliSeconds(); | |
} | |
} | |
timer.Start(); | |
LOG(INFO) << "final: " << timer.MilliSeconds(); | |
mdb_dbi_close(env, dbi); | |
mdb_env_close(env); | |
return 0; | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#! /home/chenli/Documents/tools/anaconda3/envs/pytorch/bin/python | |
# coding: utf-8 | |
import os | |
import lmdb | |
import time | |
import logging | |
import init | |
import lib.util | |
import PIL.Image | |
def run_convert_to_lmdb(args): | |
with open(args.sample_file, "r") as srcfile: | |
samples = [l.strip().split()[0] for l in srcfile] | |
### map_size单位为Byte, 下面是512G | |
map_size = 256 * 1024 * 1024 * 1024 | |
env = lmdb.open(args.lmdb_file, map_size=map_size) | |
with env.begin(write=True) as txn: | |
start_time = time.time() | |
for i, name in enumerate(samples): | |
path = os.path.join(args.sample_root, name) | |
image = PIL.Image.open(path).convert("RGB") | |
txn.put(key=name.encode(), value=image.tobytes()) | |
if time.time() - start_time > 5: | |
logging.info("Progress: {}/{}".format(i, len(samples))) | |
start_time = time.time() | |
env.close() | |
if __name__ == "__main__": | |
import argparse | |
parser = argparse.ArgumentParser( | |
description="Convert dataset to lmdb format.") | |
parser.add_argument( | |
"--lmdb_file", type=str, | |
help="directory where the lmdb-formated database is saved.") | |
lib.util.add_common_argument(parser, { | |
"sample_file": "", | |
"sample_root": "" | |
}) | |
args = parser.parse_args() | |
assert args.lmdb_file is not None | |
lib.util.initialize_logger() | |
run_convert_to_lmdb(args) | |
logging.info("Done!") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment