diff --git a/client/src/main/java/org/apache/uniffle/client/impl/ShuffleWriteClientImpl.java b/client/src/main/java/org/apache/uniffle/client/impl/ShuffleWriteClientImpl.java index b078d42f80..f5434d63d1 100644 --- a/client/src/main/java/org/apache/uniffle/client/impl/ShuffleWriteClientImpl.java +++ b/client/src/main/java/org/apache/uniffle/client/impl/ShuffleWriteClientImpl.java @@ -393,7 +393,8 @@ public ShuffleAssignmentsInfo getShuffleAssignments(String appId, int shuffleId, } } String msg = "Error happened when getShuffleAssignments with appId[" + appId + "], shuffleId[" + shuffleId - + "], numMaps[" + partitionNum + "], partitionNumPerRange[" + partitionNumPerRange + "] to coordinator"; + + "], numMaps[" + partitionNum + "], partitionNumPerRange[" + partitionNumPerRange + "] to coordinator. " + + "Error message: " + response.getMessage(); throwExceptionIfNecessary(response, msg); return new ShuffleAssignmentsInfo(response.getPartitionToServers(), response.getServerToPartitionRanges()); diff --git a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorGrpcService.java b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorGrpcService.java index b3bbf728cf..ce14458e39 100644 --- a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorGrpcService.java +++ b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorGrpcService.java @@ -123,8 +123,14 @@ public void getShuffleAssignments( logAssignmentResult(appId, shuffleId, pra); responseObserver.onNext(response); } catch (Exception e) { - LOG.error(e.getMessage()); - response = GetShuffleAssignmentsResponse.newBuilder().setStatus(StatusCode.INTERNAL_ERROR).build(); + LOG.error("Errors on getting shuffle assignments for app: {}, shuffleId: {}, partitionNum: {}, " + + "partitionNumPerRange: {}, replica: {}, requiredTags: {}", + appId, shuffleId, partitionNum, partitionNumPerRange, replica, requiredTags, e); + response = GetShuffleAssignmentsResponse + .newBuilder() + .setStatus(StatusCode.INTERNAL_ERROR) + .setRetMsg(e.getMessage()) + .build(); responseObserver.onNext(response); } finally { responseObserver.onCompleted(); diff --git a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java index 05fb64ba6f..1c8bafd12f 100644 --- a/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java +++ b/coordinator/src/main/java/org/apache/uniffle/coordinator/CoordinatorServer.java @@ -49,8 +49,12 @@ public class CoordinatorServer { public CoordinatorServer(CoordinatorConf coordinatorConf) throws Exception { this.coordinatorConf = coordinatorConf; - initialization(); - + try { + initialization(); + } catch (Exception e) { + LOG.error("Errors on initializing coordinator server.", e); + throw e; + } } public static void main(String[] args) throws Exception { diff --git a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/CoordinatorGrpcClient.java b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/CoordinatorGrpcClient.java index 41b2a86f57..dc1fa47fe9 100644 --- a/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/CoordinatorGrpcClient.java +++ b/internal-client/src/main/java/org/apache/uniffle/client/impl/grpc/CoordinatorGrpcClient.java @@ -238,7 +238,7 @@ public RssGetShuffleAssignmentsResponse getShuffleAssignments(RssGetShuffleAssig response = new RssGetShuffleAssignmentsResponse(ResponseStatusCode.TIMEOUT); break; default: - response = new RssGetShuffleAssignmentsResponse(ResponseStatusCode.INTERNAL_ERROR); + response = new RssGetShuffleAssignmentsResponse(ResponseStatusCode.INTERNAL_ERROR, rpcResponse.getRetMsg()); } return response; diff --git a/internal-client/src/main/java/org/apache/uniffle/client/response/RssGetShuffleAssignmentsResponse.java b/internal-client/src/main/java/org/apache/uniffle/client/response/RssGetShuffleAssignmentsResponse.java index 605ec79731..f916a85b43 100644 --- a/internal-client/src/main/java/org/apache/uniffle/client/response/RssGetShuffleAssignmentsResponse.java +++ b/internal-client/src/main/java/org/apache/uniffle/client/response/RssGetShuffleAssignmentsResponse.java @@ -32,6 +32,10 @@ public RssGetShuffleAssignmentsResponse(ResponseStatusCode statusCode) { super(statusCode); } + public RssGetShuffleAssignmentsResponse(ResponseStatusCode statusCode, String message) { + super(statusCode, message); + } + public Map> getPartitionToServers() { return partitionToServers; } diff --git a/proto/src/main/proto/Rss.proto b/proto/src/main/proto/Rss.proto index 491316d87b..647430d1c2 100644 --- a/proto/src/main/proto/Rss.proto +++ b/proto/src/main/proto/Rss.proto @@ -307,6 +307,7 @@ message PartitionRangeAssignment { message GetShuffleAssignmentsResponse { StatusCode status = 1; repeated PartitionRangeAssignment assignments = 2; + string retMsg = 3; } message ReportShuffleClientOpRequest { diff --git a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java index 8ddf6fc5ff..0cae0cf530 100644 --- a/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java +++ b/server/src/main/java/org/apache/uniffle/server/ShuffleServer.java @@ -67,7 +67,12 @@ public class ShuffleServer { public ShuffleServer(ShuffleServerConf shuffleServerConf) throws Exception { this.shuffleServerConf = shuffleServerConf; - initialization(); + try { + initialization(); + } catch (Exception e) { + LOG.error("Errors on initializing shuffle server.", e); + throw e; + } } /**