Created
August 15, 2017 14:27
-
-
Save satendrakumar/0214ba32e0e12ae548232b8566418cd1 to your computer and use it in GitHub Desktop.
Convert excel file into csv file
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import org.apache.http.annotation.NotThreadSafe | |
import org.apache.poi.ss.usermodel._ | |
import scala.collection.JavaConversions._ | |
import scala.util.control.NonFatal | |
object IXLS2CSV { | |
private val df = new DataFormatter(true) | |
@NotThreadSafe | |
def convert(xlsPath: String): List[String] = { | |
val file = new File(xlsPath) | |
val parentFolder = file.getParent | |
val fileName = file.getName.split("\\.")(0) | |
val workbook = WorkbookFactory.create(file) | |
val formulaEvaluator: FormulaEvaluator = workbook.getCreationHelper().createFormulaEvaluator() | |
val outputFilePaths = workbook.zipWithIndex.map { case (sheet, index) => | |
val outputPath = Option(parentFolder).fold(s"""$fileName-$index.csv""")(folder => s"""$folder/$fileName-$index.csv""") | |
convert(sheet, outputPath, formulaEvaluator) | |
}.toList | |
workbook.close() | |
outputFilePaths | |
} | |
private def convert(sheet: Sheet, outputPath: String, formulaEvaluator: FormulaEvaluator): String = { | |
var headersLength = 0 | |
var isHeadersWritten = false | |
val csvWriter = new CSVWriter(outputPath) | |
println("Start CSV writing......... ") | |
sheet.foreach { case row => | |
if (Option(row).isDefined) { | |
try { | |
val rowData = (0 until row.getLastCellNum).toArray | |
.map { index => removeLineBreakingChars(df.formatCellValue(row.getCell(index, Row.CREATE_NULL_AS_BLANK), formulaEvaluator)) } | |
if (isHeadersWritten) { | |
if (headersLength == rowData.length) { | |
println(s"writing row number ${row.getRowNum}") | |
csvWriter.write(rowData) | |
} else { | |
println(s"Invalid row [Row no - ${row.getRowNum}] [Headers count = $headersLength and current row columns count = ${rowData.length}]") | |
} | |
} else { | |
csvWriter.writeHeaders(rowData) | |
isHeadersWritten = true | |
headersLength = rowData.length | |
} | |
} catch { | |
case NonFatal(th) => | |
th.printStackTrace() | |
} | |
} | |
} | |
csvWriter.close() | |
outputPath | |
} | |
private def removeLineBreakingChars(cell: String): String = cell.replaceAll("[\\t\\n\\r]", " ") | |
} | |
//CSVWriter | |
import java.io.{BufferedWriter, FileOutputStream, OutputStreamWriter} | |
import com.univocity.parsers.csv.{CsvWriter, CsvWriterSettings} | |
class CSVWriter(path: String) { | |
private val writer = new BufferedWriter((new OutputStreamWriter((new FileOutputStream(path))))) | |
private val csvSettings = new CsvWriterSettings() | |
csvSettings.getFormat.setDelimiter('\t') | |
csvSettings.getFormat.setQuoteEscape('\\') | |
csvSettings.setQuoteAllFields(true) | |
private val csvWriter = new CsvWriter(writer, csvSettings) | |
def writeHeaders(headers: Array[String]): Unit = csvWriter.writeHeaders(headers: _ *) | |
def write(row: Array[String]): Unit = | |
csvWriter.writeRow(row) | |
def close(): Unit = { | |
csvWriter.flush() | |
csvWriter.close() | |
} | |
} | |
//dependencies | |
"org.apache.poi" % "poi" % "3.15" | |
"org.apache.poi" % "poi-ooxml" % "3.15" | |
"com.univocity" % "univocity-parsers" % "2.4.1" | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment