Add “DRAINING” State to Buildserver

When running a set of buildservers behind a load balancer there is a
bunching effect that can happen when one buildserver winds up with a
large number of simultaneous builds while other buildservers are
idle. This happens when a buildserver becomes a little busy and as a
result runs slower then the other servers. This increases the likelihood
that it will get more work to do while it is still working on existing
jobs. Which slows it even further which results in more jobs until it
hits its maximum and rejects jobs (while the other buildservers are
idle!).

This change results in the buildserver returning an unhealthy status to
the load balancer (most load balancers can be configured to check a
buildservers health periodically) after it reaches 2/3 of its maximum
load. It then stays in this “DRAINING” state until the load becomes less
then 1/3 at which point the state becomes “UP” again.

Change-Id: Id6be52475151a0208000ced17fe1f2d3f15e7b94
parent 631b5bdc
...@@ -155,7 +155,17 @@ public class BuildServer { ...@@ -155,7 +155,17 @@ public class BuildServer {
private static String shutdownToken = null; private static String shutdownToken = null;
private enum ShutdownState { UP, SHUTTING, DOWN }; // ShutdownState: UP: We are up and running
// SHUTTING: We have been told to shutdown, with a time delay
// In this state we return bad health, but accept jobs
// DOWN: We return bad health and reject jobs
// DRAINING: We have reached > 2/3 of max permitted jobs
// We return bad health (but accept jobs) until
// the number of active jobs is < 1/3 of max
private enum ShutdownState { UP, SHUTTING, DOWN, DRAINING };
private static volatile boolean draining = false; // We have exceeded 2/3 max load, waiting for
// the load to become < 1/3 max load
@GET @GET
@Path("health") @Path("health")
...@@ -168,6 +178,9 @@ public class BuildServer { ...@@ -168,6 +178,9 @@ public class BuildServer {
} else if (shut == ShutdownState.DOWN) { } else if (shut == ShutdownState.DOWN) {
LOG.info("Healthcheck: DOWN"); LOG.info("Healthcheck: DOWN");
return Response.status(Response.Status.FORBIDDEN).type(MediaType.TEXT_PLAIN_TYPE).entity("Build Server is shutdown").build(); return Response.status(Response.Status.FORBIDDEN).type(MediaType.TEXT_PLAIN_TYPE).entity("Build Server is shutdown").build();
} else if (shut == ShutdownState.DRAINING) {
LOG.info("Healthcheck: DRAINING");
return Response.status(Response.Status.FORBIDDEN).type(MediaType.TEXT_PLAIN_TYPE).entity("Build Server is draining").build();
} else { } else {
LOG.info("Healthcheck: SHUTTING"); LOG.info("Healthcheck: SHUTTING");
return Response.status(Response.Status.FORBIDDEN).type(MediaType.TEXT_PLAIN_TYPE).entity("Build Server is shutting down").build(); return Response.status(Response.Status.FORBIDDEN).type(MediaType.TEXT_PLAIN_TYPE).entity("Build Server is shutting down").build();
...@@ -648,7 +661,26 @@ public class BuildServer { ...@@ -648,7 +661,26 @@ public class BuildServer {
private ShutdownState getShutdownState() { private ShutdownState getShutdownState() {
if (shuttingTime == 0) { if (shuttingTime == 0) {
int max = buildExecutor.getMaxActiveTasks();
if (max < 10) { // Only do this scheme if we are not unlimited
// (unlimited == 0) and allow more then 10 max builds
return ShutdownState.UP; return ShutdownState.UP;
}
int active = buildExecutor.getActiveTaskCount();
if (draining) {
if (active < max/3) {
draining = false;
}
} else {
if (active > max*2/3) {
draining = true;
}
}
if (draining) {
return ShutdownState.DRAINING;
} else {
return ShutdownState.UP;
}
} else if (System.currentTimeMillis() > shuttingTime) { } else if (System.currentTimeMillis() > shuttingTime) {
return ShutdownState.DOWN; return ShutdownState.DOWN;
} else { } else {
......
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment