-
Notifications
You must be signed in to change notification settings - Fork 16
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add new api to upload csv, table.yaml, and query into HDFS #48
base: dev
Are you sure you want to change the base?
Changes from all commits
2b06bf8
d579b21
5084736
0a55ff0
1c90fd5
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -66,15 +66,15 @@ public class NativeDataWriter implements DataWriter { | |
private DataOutputStream out = null; | ||
|
||
@Override | ||
public boolean Initialize() throws IOException { | ||
public boolean Initialize(String... fileName) throws IOException { | ||
if (initalized) return true; | ||
this.userKeyIndex = tableSchema.getUserKeyField(); | ||
// when there is no user key, using any field for the additional condition on chunk switch is ok. | ||
if (this.userKeyIndex == -1) this.userKeyIndex = 0; | ||
// cublet | ||
// create metaChunk instance, default offset to be 0, update offset when write later. | ||
this.metaChunk = MetaChunkWS.newMetaChunkWS(this.tableSchema, 0); | ||
this.out = newCublet(); | ||
this.out = newCublet(fileName); | ||
// chunk | ||
this.tupleCount = 0; | ||
this.offset = 0; | ||
|
@@ -137,8 +137,15 @@ private void finishCublet() throws IOException { | |
* @return DataOutputStream | ||
* @throws IOException | ||
*/ | ||
private DataOutputStream newCublet() throws IOException { | ||
String fileName = Long.toHexString(System.currentTimeMillis()) + ".dz"; | ||
private DataOutputStream newCublet(String... fileNameParams) throws IOException { | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why do we need to give a file name? |
||
|
||
// if file name is provided, then use it. | ||
String fileName; | ||
if (fileNameParams.length == 1){ | ||
fileName = fileNameParams[0]; | ||
}else{ | ||
fileName = Long.toHexString(System.currentTimeMillis()) + ".dz"; | ||
} | ||
System.out.println("[*] A new cublet "+ fileName + " is created!"); | ||
File cublet = new File(outputDir, fileName); | ||
DataOutputStream out = new DataOutputStream(new FileOutputStream(cublet)); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -33,6 +33,10 @@ public class LoadQuery { | |
private String outputPath; | ||
private String configPath; | ||
|
||
// after writing dz, table file, records the path inside the server. | ||
private String dzFilePath; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why do we need the dzFilePath? |
||
private String TableFilePath; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. TableFilePath should be the same as the schemaPath. |
||
|
||
public boolean isValid() throws IOException { | ||
boolean f = true; | ||
if (dataFileType == "AVRO") f = isExist(configPath); | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -40,12 +40,13 @@ public CoolLoader(DataLoaderConfig config){ | |
|
||
/** | ||
* | ||
* @param dataSourceName output cube name. Need to be specified when loading from the repository | ||
* @param schemaFileName path to the table.yaml | ||
* @param dataFileName path to the data.csv | ||
* @param cubeRepo the name of the output cube repository | ||
* @param dataSourceName output cube name. Need to be specified when loading from the repository, eg, sogamo | ||
* @param schemaFileName path to the table.yaml, eg. sogamo/table.yaml | ||
* @param dataFileName path to the data.csv, eg. sogamo/test.csv | ||
* @param cubeRepo the name of the output cube repository. eg. datasetSource | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It needs a @param for fileName |
||
* @return dz file path, table.yaml path. | ||
*/ | ||
public synchronized void load(String dataSourceName, String schemaFileName, String dataFileName, String cubeRepo) throws IOException{ | ||
public synchronized String[] load(String dataSourceName, String schemaFileName, String dataFileName, String cubeRepo, String... fileName) throws IOException{ | ||
|
||
// check the existence of the data repository | ||
File root = new File(cubeRepo); | ||
|
@@ -85,9 +86,19 @@ public synchronized void load(String dataSourceName, String schemaFileName, Stri | |
System.out.println("[*] New version " + outputCubeVersionDir.getName() + " is created!"); | ||
} | ||
DataLoader loader = DataLoader.builder(dataSourceName, schema, dataFile, outputCubeVersionDir, this.loaderConfig).build(); | ||
loader.load(); | ||
loader.load(fileName); | ||
// copy the table.yaml to new version folder | ||
Files.copy(schemaFile, new File(outputCubeVersionDir, "table.yaml")); | ||
|
||
String dzPath; | ||
String tablePath; | ||
if (fileName.length == 1){ | ||
dzPath = dataSourceName+"/"+outputCubeVersionDir.getName()+"/"+fileName[0]; | ||
tablePath = dataSourceName+"/"+outputCubeVersionDir.getName()+"/table.yaml"; | ||
return new String[]{dzPath, tablePath}; | ||
}else{ | ||
return new String[]{}; | ||
} | ||
} | ||
|
||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,52 @@ | ||
package com.nus.cool.clientservice; | ||
|
||
import org.apache.http.client.methods.HttpGet; | ||
import org.apache.http.impl.client.CloseableHttpClient; | ||
import org.apache.http.impl.client.HttpClients; | ||
|
||
import java.io.FileInputStream; | ||
import java.io.IOException; | ||
import java.io.InputStream; | ||
import java.net.URI; | ||
import java.net.URL; | ||
import java.util.Properties; | ||
|
||
public class Main { | ||
/** | ||
* Client package to send request to server. it will get broker's ip and send related execute request. | ||
* @param args query type, | ||
* @throws Exception Exception | ||
*/ | ||
public static void main(String[] args) throws Exception { | ||
|
||
if (args.length != 1) { | ||
System.err.println("Pass in query id (Example: q1)"); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It is hard coding. |
||
return; | ||
} | ||
|
||
CloseableHttpClient client = HttpClients.createDefault(); | ||
String params; | ||
// todo, add more apis. | ||
if (args[0].equals("q1")) { | ||
params = "queryId=1&type=cohort"; | ||
} else if (args[0].equals("q2")) { | ||
params = "queryId=2&type=iceberg"; | ||
} else { | ||
System.err.println("Unrecognized query id"); | ||
return; | ||
} | ||
String ip = ""; | ||
try (InputStream input = new FileInputStream("conf/app.properties")) { | ||
Properties prop = new Properties(); | ||
prop.load(input); | ||
ip = prop.getProperty("server.host"); | ||
} catch (IOException ex) { | ||
ex.printStackTrace(); | ||
} | ||
String request = "http://" + ip + ":9013/broker/execute?" + params; | ||
URL url = new URL(request); | ||
URI uri = new URI(url.getProtocol(), null, url.getHost(), url.getPort(), url.getPath(), url.getQuery(), null); | ||
HttpGet get = new HttpGet(uri); | ||
client.execute(get); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,23 +1,99 @@ | ||
package com.nus.cool.queryserver.handler; | ||
|
||
import com.fasterxml.jackson.databind.ObjectMapper; | ||
import com.nus.cool.core.iceberg.query.IcebergQuery; | ||
import com.nus.cool.loader.LoadQuery; | ||
import com.nus.cool.queryserver.model.QueryInfo; | ||
import com.nus.cool.queryserver.model.QueryServerModel; | ||
import com.nus.cool.queryserver.singleton.HDFSConnection; | ||
import com.nus.cool.queryserver.singleton.QueryIndex; | ||
import com.nus.cool.queryserver.singleton.TaskQueue; | ||
import com.nus.cool.queryserver.utils.Util; | ||
import org.springframework.http.HttpHeaders; | ||
import org.springframework.http.MediaType; | ||
import org.springframework.http.ResponseEntity; | ||
import org.springframework.web.bind.annotation.*; | ||
import com.nus.cool.queryserver.model.Parameter; | ||
import org.springframework.web.multipart.MultipartFile; | ||
|
||
import java.io.File; | ||
import java.io.IOException; | ||
import java.net.URISyntaxException; | ||
import java.util.List; | ||
import java.util.Map; | ||
|
||
@RestController | ||
@RequestMapping("/broker") | ||
public class BrokerController { | ||
|
||
@PostMapping(value = "/load-dfs") | ||
public ResponseEntity<String> loadToDfs(){ | ||
/** | ||
* Assume the csv file already at the server side. this | ||
* Load the local CSV file and upload it to hdfs | ||
* eg. input: '{"dataFileType": "CSV", "cubeName": "sogamo", "schemaPath": "sogamo/table.yaml", | ||
* "dimPath": "sogamo/dim.csv", "dataPath": "sogamo/test.csv", "outputPath": "datasetSource"}' | ||
* @param req request parsed from json. | ||
* @return response | ||
* @throws URISyntaxException exception | ||
* @throws IOException exception | ||
*/ | ||
@PostMapping(value = "/load-data-to-hdfs", | ||
produces = MediaType.APPLICATION_JSON_VALUE, | ||
consumes = MediaType.APPLICATION_JSON_VALUE) | ||
public ResponseEntity<String> loadDataToDfs(@RequestBody LoadQuery req) throws URISyntaxException, IOException { | ||
|
||
Util.getTimeClock(); | ||
|
||
// file name of the .dz | ||
String fileName = Long.toHexString(System.currentTimeMillis()) + ".dz"; | ||
|
||
QueryServerModel.loadCube(req, fileName); | ||
|
||
// 1. connect to hdfs, get data Source Name, cohort or iceberg | ||
HDFSConnection fs = HDFSConnection.getInstance(); | ||
|
||
String localPath1 = req.getOutputPath() + "/" + req.getDzFilePath();; | ||
String dfsPath1 = "/cube/" + req.getDzFilePath(); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
fs.uploadToDfs(localPath1, dfsPath1); | ||
|
||
String localPath2 = req.getOutputPath() + "/" + req.getTableFilePath(); | ||
String dfsPath2 = "/cube/" + req.getTableFilePath(); | ||
fs.uploadToDfs(localPath2, dfsPath2); | ||
|
||
System.out.println("[*] Data and file loaded"); | ||
return null; | ||
} | ||
|
||
/** | ||
* Receive query file from client, and store to local as temp_query.json, and then upload to hdfs. | ||
* @param queryFile query file | ||
* @return response | ||
* @throws URISyntaxException exception | ||
* @throws IOException exception | ||
*/ | ||
@PostMapping(value = "/load-query-to-hdfs", | ||
produces = MediaType.APPLICATION_JSON_VALUE, | ||
consumes = MediaType.MULTIPART_FORM_DATA_VALUE) | ||
public ResponseEntity<String> loadQueryToDfs(@RequestParam("queryFile") MultipartFile queryFile) throws URISyntaxException, IOException { | ||
|
||
// 1. connect to hdfs, get data Source Name, cohort or iceberg | ||
HDFSConnection fs = HDFSConnection.getInstance(); | ||
|
||
Util.getTimeClock(); | ||
System.out.println("[*] This query is for iceberg query: " + queryFile); | ||
String queryContent = new String(queryFile.getBytes()); | ||
ObjectMapper mapper = new ObjectMapper(); | ||
IcebergQuery q = mapper.readValue(queryContent, IcebergQuery.class); | ||
|
||
try { | ||
// Writing to a file | ||
mapper.writeValue(new File("temp_query.json"), q ); | ||
String localPath3 = "temp_query.json"; | ||
String dfsPath3 = "/tmp/1/query.json"; | ||
fs.uploadToDfs(localPath3, dfsPath3); | ||
|
||
} catch (IOException e) { | ||
e.printStackTrace(); | ||
} | ||
return null; | ||
} | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please use String[] instead of the String...