Skip to content

Commit

Permalink
Release tensorboard port before tensorflow process starts regardless …
Browse files Browse the repository at this point in the history
…of port reuse (#461)

#456 introduced port reuse based on the fact that tensorflow's gRPC server has the corresponding port reuse option. But it's not true for tensorboard(TB), so TB's port cannot be reused and needs to be released before tensorflow process regardless if user is using port reuse or not.

Co-authored-by: Cheng Ren <1428327+chengren311@users.noreply.github.com>
  • Loading branch information
burgerkingeater and burgerkingeater authored Aug 28, 2020
1 parent aa50e74 commit b6c1f0a
Showing 1 changed file with 18 additions and 11 deletions.
29 changes: 18 additions & 11 deletions tony-core/src/main/java/com/linkedin/tony/TaskExecutor.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,25 +87,28 @@ private void setupPorts() throws IOException, InterruptedException {
// With Estimator API, there is a separate lone "chief" task that runs TensorBoard.
// With the low-level distributed API, worker 0 runs TensorBoard.
if (isChief) {
this.tbPort = requireNonNull(allocatePort());
this.tbPort = requireNonNull(EphemeralPort.create());
this.registerTensorBoardUrl();
this.shellEnv.put(Constants.TB_PORT, String.valueOf(this.tbPort.getPort()));
LOG.info("Reserved tbPort: " + this.tbPort.getPort());
}
}


private void releasePort(ServerPort port) throws Exception {
if (port != null) {
port.close();
}
}

/**
* Releases the reserved ports if any. This method has to be invoked after ports are created.
*/
private void releasePorts() throws Exception {
try {
if (this.rpcPort != null) {
this.rpcPort.close();
}
this.releasePort(this.rpcPort);
} finally {
if (this.tbPort != null) {
this.tbPort.close();
}
this.releasePort(this.tbPort);
}
}

Expand Down Expand Up @@ -221,9 +224,13 @@ public static void main(String[] unused) throws Exception {
// If not reusing port, then reserve them up until before the underlying TF process is
// launched. See <a href="https://github.com/linkedin/TonY/issues/365">this issue</a> for
// details.
if (executor != null && !executor.isReusingPort()) {
LOG.info("Releasing reserved ports before launching tensorflow process.");
executor.releasePorts();
if (executor != null) {
LOG.info("Releasing reserved port(s) before launching tensorflow process.");
if (executor.isReusingPort()) {
executor.releasePort(executor.tbPort);
} else {
executor.releasePorts();
}
}

try {
Expand All @@ -237,7 +244,7 @@ public static void main(String[] unused) throws Exception {
} finally {
if (executor.isReusingPort()) {
LOG.info("Tensorflow process exited, releasing reserved ports.");
executor.releasePorts();
executor.releasePort(executor.rpcPort);
}
}
}
Expand Down

0 comments on commit b6c1f0a

Please sign in to comment.