Created
August 3, 2015 20:19
-
-
Save ewhitebloom/388e36db459949df90fa to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package edu.hu.bigdata; | |
import java.io.BufferedReader; | |
import java.io.FileReader; | |
import java.util.*; | |
import org.bson.Document; | |
import org.bson.json.JsonParseException; | |
import com.mongodb.MongoClient; | |
import com.mongodb.client.MongoCollection; | |
import com.mongodb.client.MongoDatabase; | |
public class MongoDBInterface { | |
public static void main(String[] args) throws Exception { | |
MongoDBInterface myInterface = new MongoDBInterface(); | |
MongoCollection care_providers = myInterface | |
.connectToDatabaseCollection("careport_development_mongo", | |
"care_providers"); | |
String csvPath = "/Users/eric.whitebloom/Desktop/NPPES_Data_Dissemination_April_2015/npidata_20050523-20150412.csv"; | |
try (BufferedReader br = new BufferedReader(new FileReader(csvPath))) { | |
String line; | |
String[] relevantCodes = { "314000000X", "251E00000X", | |
"282E00000X", "283X00000X" }; | |
long start = System.currentTimeMillis(); | |
while ((line = br.readLine()) != null) { | |
lineExtractor(line, relevantCodes, myInterface, care_providers); | |
} | |
long stop = System.currentTimeMillis(); | |
long seconds = (stop - start) / 1000; | |
System.out.println("Runtime(s): " + seconds); | |
} | |
}; | |
public MongoCollection<Document> connectToDatabaseCollection(String db, | |
String myCollection) { | |
MongoClient client = new MongoClient(); | |
MongoDatabase development_database = client.getDatabase(db); | |
return development_database.getCollection(myCollection); | |
} | |
public static void lineExtractor(String line, String[] relevantCodes, | |
MongoDBInterface myInterface, MongoCollection myCollection) { | |
String[] cells = line.split(","); | |
if (cells[1].compareTo("\"2\"") == 0) { | |
boolean relevantRow = false; | |
for (int i = 47; i < 58; i++) { | |
if (Arrays.asList(relevantCodes).contains( | |
cells[i].replace("\"", ""))) { | |
relevantRow = true; | |
break; | |
} | |
} | |
if (relevantRow) { | |
try { | |
String address = new String(); | |
for (int i = 20; i < 27; i++) { | |
if (!cells[i].trim().isEmpty()) | |
address += (cells[i] + ", "); | |
} | |
address = address.replace("\"", ""); | |
cells[0] = cells[0].replace("\"", ""); | |
cells[4] = cells[4].replace("\'", "").replace("\"", ""); | |
String documentString = "{'npi':'" + cells[0] | |
+ "','name':'" + cells[4] + "','address' :'" | |
+ address + "'}"; | |
Document newFacility = Document.parse(documentString); | |
myInterface.insertIntoCollection(myCollection, newFacility); | |
} catch (JsonParseException e) { | |
System.out.println(cells[0] + " " + cells[4]); | |
} | |
} | |
} | |
} | |
public void insertIntoCollection(MongoCollection<Document> collection, | |
Document document) { | |
collection.insertOne(document); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment