Skip to content

Commit

Permalink
queue-runner: release machine reservation while copying outputs
Browse files Browse the repository at this point in the history
This allows for better builder usage when the queue runner is busy. To
avoid running into uncontrollable imbalances between builder/queue
runner, we only release the machine reservation after the local
throttler has found a slot to start copying the outputs for that build.
  • Loading branch information
delroth authored and mweinelt committed Oct 19, 2024
1 parent 10282dc commit 1c951d9
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 9 deletions.
10 changes: 10 additions & 0 deletions src/hydra-queue-runner/build-remote.cc
Original file line number Diff line number Diff line change
Expand Up @@ -424,6 +424,7 @@ class SemaphoreReleaser {
};

void State::buildRemote(ref<Store> destStore,
MachineReservation::ptr & reservation,
::Machine::ptr machine, Step::ptr step,
const ServeProto::BuildOptions & buildOptions,
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
Expand Down Expand Up @@ -569,6 +570,15 @@ void State::buildRemote(ref<Store> destStore,
}
SemaphoreReleaser releaser(&localWorkThrottler);

/* Once we've started copying outputs, release the machine reservation
* so further builds can happen. We do not release the machine earlier
* to avoid situations where the queue runner is bottlenecked on
* copying outputs and we end up building too many things that we
* haven't been able to allow copy slots for. */
assert(reservation.unique());
reservation = 0;
wakeDispatcher();

StorePathSet outputs;
for (auto & [_, realisation] : buildResult.builtOutputs)
outputs.insert(realisation.outPath);
Expand Down
18 changes: 10 additions & 8 deletions src/hydra-queue-runner/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,10 +46,12 @@ void State::builder(MachineReservation::ptr reservation)
}
}

/* Release the machine and wake up the dispatcher. */
assert(reservation.unique());
reservation = 0;
wakeDispatcher();
/* If the machine hasn't been released yet, release and wake up the dispatcher. */
if (reservation) {
assert(reservation.unique());
reservation = 0;
wakeDispatcher();
}

/* If there was a temporary failure, retry the step after an
exponentially increasing interval. */
Expand All @@ -72,11 +74,11 @@ void State::builder(MachineReservation::ptr reservation)


State::StepResult State::doBuildStep(nix::ref<Store> destStore,
MachineReservation::ptr reservation,
MachineReservation::ptr & reservation,
std::shared_ptr<ActiveStep> activeStep)
{
auto & step(reservation->step);
auto & machine(reservation->machine);
auto step(reservation->step);
auto machine(reservation->machine);

{
auto step_(step->state.lock());
Expand Down Expand Up @@ -211,7 +213,7 @@ State::StepResult State::doBuildStep(nix::ref<Store> destStore,

try {
/* FIXME: referring builds may have conflicting timeouts. */
buildRemote(destStore, machine, step, buildOptions, result, activeStep, updateStep, narMembers);
buildRemote(destStore, reservation, machine, step, buildOptions, result, activeStep, updateStep, narMembers);
} catch (Error & e) {
if (activeStep->state_.lock()->cancelled) {
printInfo("marking step %d of build %d as cancelled", stepNr, buildId);
Expand Down
3 changes: 2 additions & 1 deletion src/hydra-queue-runner/state.hh
Original file line number Diff line number Diff line change
Expand Up @@ -562,10 +562,11 @@ private:
retried. */
enum StepResult { sDone, sRetry, sMaybeCancelled };
StepResult doBuildStep(nix::ref<nix::Store> destStore,
MachineReservation::ptr reservation,
MachineReservation::ptr & reservation,
std::shared_ptr<ActiveStep> activeStep);

void buildRemote(nix::ref<nix::Store> destStore,
MachineReservation::ptr & reservation,
Machine::ptr machine, Step::ptr step,
const nix::ServeProto::BuildOptions & buildOptions,
RemoteResult & result, std::shared_ptr<ActiveStep> activeStep,
Expand Down

0 comments on commit 1c951d9

Please sign in to comment.