|
|
@@ -18,6 +18,7 @@
|
|
|
|
|
|
package org.apache.ambari.view.hive20.resources.uploads;
|
|
|
|
|
|
+import com.google.common.annotations.VisibleForTesting;
|
|
|
import com.google.common.base.Optional;
|
|
|
import com.sun.jersey.core.header.FormDataContentDisposition;
|
|
|
import com.sun.jersey.multipart.FormDataParam;
|
|
|
@@ -46,6 +47,8 @@ import org.apache.ambari.view.hive20.resources.uploads.query.InsertFromQueryInpu
|
|
|
import org.apache.ambari.view.hive20.utils.ServiceFormattedException;
|
|
|
import org.apache.ambari.view.hive20.utils.SharedObjectsFactory;
|
|
|
import org.apache.ambari.view.utils.ambari.AmbariApi;
|
|
|
+import org.apache.commons.io.ByteOrderMark;
|
|
|
+import org.apache.commons.io.input.BOMInputStream;
|
|
|
import org.apache.commons.io.input.ReaderInputStream;
|
|
|
import org.apache.hadoop.fs.FSDataInputStream;
|
|
|
import org.apache.hadoop.fs.FSDataOutputStream;
|
|
|
@@ -195,6 +198,19 @@ public class UploadService extends BaseService {
|
|
|
}
|
|
|
}
|
|
|
|
|
|
+ private Reader getInputStreamReader(InputStream is) throws IOException {
|
|
|
+ BOMInputStream bomInputStream = new BOMInputStream(is,
|
|
|
+ ByteOrderMark.UTF_8, ByteOrderMark.UTF_16LE, ByteOrderMark.UTF_16BE,
|
|
|
+ ByteOrderMark.UTF_32LE, ByteOrderMark.UTF_32BE
|
|
|
+ );
|
|
|
+ if (bomInputStream.hasBOM()) {
|
|
|
+ String charSetName = bomInputStream.getBOMCharsetName();
|
|
|
+ return new InputStreamReader(bomInputStream, charSetName); // return with the encoded charset encoding.
|
|
|
+ } else {
|
|
|
+ return new InputStreamReader(bomInputStream); //return with default charset
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
private CSVParams getCsvParams(String csvDelimiter, String csvQuote, String csvEscape) {
|
|
|
char csvq = CSVParams.DEFAULT_QUOTE_CHAR;
|
|
|
char csvd = CSVParams.DEFAULT_DELIMITER_CHAR;
|
|
|
@@ -455,7 +471,8 @@ public class UploadService extends BaseService {
|
|
|
else return e.getMessage();
|
|
|
}
|
|
|
|
|
|
- private PreviewData generatePreview(Boolean isFirstRowHeader, String inputFileType, CSVParams csvParams, InputStream uploadedInputStream) throws Exception {
|
|
|
+ @VisibleForTesting
|
|
|
+ PreviewData generatePreview(Boolean isFirstRowHeader, String inputFileType, CSVParams csvParams, InputStream uploadedInputStream) throws Exception {
|
|
|
ParseOptions parseOptions = new ParseOptions();
|
|
|
parseOptions.setOption(ParseOptions.OPTIONS_FILE_TYPE, inputFileType);
|
|
|
if (inputFileType.equals(ParseOptions.InputFileType.CSV.toString())){
|
|
|
@@ -473,7 +490,8 @@ public class UploadService extends BaseService {
|
|
|
|
|
|
LOG.info("isFirstRowHeader : {}, inputFileType : {}", isFirstRowHeader, inputFileType);
|
|
|
|
|
|
- DataParser dataParser = new DataParser(new InputStreamReader(uploadedInputStream), parseOptions);
|
|
|
+ Reader reader = getInputStreamReader(uploadedInputStream);
|
|
|
+ DataParser dataParser = new DataParser(reader, parseOptions);
|
|
|
|
|
|
return dataParser.parsePreview();
|
|
|
}
|
|
|
@@ -519,7 +537,8 @@ public class UploadService extends BaseService {
|
|
|
parseOptions.setOption(ParseOptions.OPTIONS_CSV_QUOTE, csvParams.getCsvQuote());
|
|
|
}
|
|
|
|
|
|
- DataParser dataParser = new DataParser(new InputStreamReader(uploadedInputStream), parseOptions);
|
|
|
+ Reader reader = getInputStreamReader(uploadedInputStream);
|
|
|
+ DataParser dataParser = new DataParser(reader, parseOptions);
|
|
|
|
|
|
Reader csvReader = new TableDataReader(dataParser.iterator(), header, containsEndlines); // encode column values into HEX so that \n etc dont appear in the hive table data
|
|
|
String path = uploadIntoTable(csvReader, databaseName, tableName);
|