Skip to content

Instantly share code, notes, and snippets.

@dantin
Last active March 11, 2016 10:00
Show Gist options
  • Save dantin/ba5228cae47aa8df1a22 to your computer and use it in GitHub Desktop.
Save dantin/ba5228cae47aa8df1a22 to your computer and use it in GitHub Desktop.
PutMerge sample Hadoop example
/**
*
* 拷贝本地目录下的所有文件至HDFS
*
*/
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
public class PutMerge {
public static void main(String[] args) throws IOException {
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
FileSystem local = FileSystem.getLocal(conf);
Path inputDir = new Path(args[0]); // Specify input directory
Path hdfsFile = new Path(args[1]); // Specify output file
try {
// Get list of local files
FileStatus[] inputFiles = local.listStatus(inputDir);
// Create HDFS output stream
FSDataOutputStream out = hdfs.create(hdfsFile);
for (int i=0; i<inputFiles.length; i++) {
System.out.println(inputFiles[i].getPath().getName());
// Open local input stream
FSDataInputStream in = local.open(inputFiles[i].getPath());
// Copy local file to HDFS
byte buffer[] = new byte[256];
int bytesRead = 0;
while( (bytesRead = in.read(buffer)) > 0) {
out.write(buffer, 0, bytesRead);
}
in.close();
}
out.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment