mirror of
https://github.com/SquidDev-CC/CC-Tweaked
synced 2025-01-28 09:54:47 +00:00
Rethink how computer timeouts are handled
Previously we would compute the current timeout flags every 128 instructions of the Lua machine. While computing these flags is _relatively_ cheap (just get the current time), it still all adds up. Instead we now set the timeout flags from the computer monitor/watchdog thread. This does mean that the monitor thread needs to wake up more often[^1] _if the queue is full_, otherwise we can sleep for 100ms as before. This does mean that pausing is a little less accurate (can technically take up 2*period instead). This isn't great, but in practice it seems fine - I've not noticed any playability issues. This offers a small (but measurable!) boost to computer performance. [^1]: We currently sleep for scaledPeriod, but we could choose to do less.
This commit is contained in:
parent
6322e72110
commit
65a7370db1
@ -13,6 +13,7 @@ import javax.annotation.Nullable;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.concurrent.locks.Condition;
|
||||
import java.util.concurrent.locks.LockSupport;
|
||||
@ -49,11 +50,11 @@ import static dan200.computercraft.core.computer.TimeoutState.TIMEOUT;
|
||||
public final class ComputerThread
|
||||
{
|
||||
/**
|
||||
* How often the computer thread monitor should run, in milliseconds.
|
||||
* How often the computer thread monitor should run.
|
||||
*
|
||||
* @see Monitor
|
||||
*/
|
||||
private static final int MONITOR_WAKEUP = 100;
|
||||
private static final long MONITOR_WAKEUP = TimeUnit.MILLISECONDS.toNanos( 100 );
|
||||
|
||||
/**
|
||||
* The target latency between executing two tasks on a single machine.
|
||||
@ -76,6 +77,13 @@ public final class ComputerThread
|
||||
*/
|
||||
private static final long LATENCY_MAX_TASKS = DEFAULT_LATENCY / DEFAULT_MIN_PERIOD;
|
||||
|
||||
/**
|
||||
* Time difference between reporting crashed threads.
|
||||
*
|
||||
* @see TaskRunner#reportTimeout(ComputerExecutor, long)
|
||||
*/
|
||||
private static final long REPORT_DEBOUNCE = TimeUnit.SECONDS.toNanos( 1 );
|
||||
|
||||
/**
|
||||
* Lock used for modifications to the array of current threads.
|
||||
*/
|
||||
@ -102,6 +110,8 @@ public final class ComputerThread
|
||||
private static final ReentrantLock computerLock = new ReentrantLock();
|
||||
|
||||
private static final Condition hasWork = computerLock.newCondition();
|
||||
private static final AtomicInteger idleWorkers = new AtomicInteger( 0 );
|
||||
private static final Condition monitorWakeup = computerLock.newCondition();
|
||||
|
||||
/**
|
||||
* Active queues to execute.
|
||||
@ -135,7 +145,7 @@ public final class ComputerThread
|
||||
|
||||
if( runners == null )
|
||||
{
|
||||
// TODO: Change the runners length on config reloads
|
||||
// TODO: Update this on config reloads. Or possibly on world restarts?
|
||||
runners = new TaskRunner[ComputerCraft.computerThreads];
|
||||
|
||||
// latency and minPeriod are scaled by 1 + floor(log2(threads)). We can afford to execute tasks for
|
||||
@ -227,9 +237,14 @@ public final class ComputerThread
|
||||
|
||||
executor.virtualRuntime = Math.max( newRuntime, executor.virtualRuntime );
|
||||
|
||||
boolean wasBusy = isBusy();
|
||||
// Add to the queue, and signal the workers.
|
||||
computerQueue.add( executor );
|
||||
hasWork.signal();
|
||||
|
||||
// If we've transitioned into a busy state, notify the monitor. This will cause it to sleep for scaledPeriod
|
||||
// instead of the longer wakeup duration.
|
||||
if( !wasBusy && isBusy() ) monitorWakeup.signal();
|
||||
}
|
||||
finally
|
||||
{
|
||||
@ -346,6 +361,17 @@ public final class ComputerThread
|
||||
return !computerQueue.isEmpty();
|
||||
}
|
||||
|
||||
/**
|
||||
* Check if we have more work queued than we have capacity for. Effectively a more fine-grained version of
|
||||
* {@link #hasPendingWork()}.
|
||||
*
|
||||
* @return If the computer threads are busy.
|
||||
*/
|
||||
private static boolean isBusy()
|
||||
{
|
||||
return computerQueue.size() > idleWorkers.get();
|
||||
}
|
||||
|
||||
/**
|
||||
* Observes all currently active {@link TaskRunner}s and terminates their tasks once they have exceeded the hard
|
||||
* abort limit.
|
||||
@ -357,76 +383,93 @@ public final class ComputerThread
|
||||
@Override
|
||||
public void run()
|
||||
{
|
||||
try
|
||||
while( true )
|
||||
{
|
||||
while( true )
|
||||
computerLock.lock();
|
||||
try
|
||||
{
|
||||
Thread.sleep( MONITOR_WAKEUP );
|
||||
// If we've got more work than we have capacity for it, then we'll need to pause a task soon, so
|
||||
// sleep for a single pause duration. Otherwise we only need to wake up to set the soft/hard abort
|
||||
// flags, which are far less granular.
|
||||
monitorWakeup.awaitNanos( isBusy() ? scaledPeriod() : MONITOR_WAKEUP );
|
||||
}
|
||||
catch( InterruptedException e )
|
||||
{
|
||||
ComputerCraft.log.error( "Monitor thread interrupted. Computers may behave very badly!", e );
|
||||
break;
|
||||
}
|
||||
finally
|
||||
{
|
||||
computerLock.unlock();
|
||||
}
|
||||
|
||||
TaskRunner[] currentRunners = ComputerThread.runners;
|
||||
if( currentRunners != null )
|
||||
checkRunners();
|
||||
}
|
||||
}
|
||||
|
||||
private static void checkRunners()
|
||||
{
|
||||
TaskRunner[] currentRunners = ComputerThread.runners;
|
||||
if( currentRunners == null ) return;
|
||||
|
||||
for( int i = 0; i < currentRunners.length; i++ )
|
||||
{
|
||||
TaskRunner runner = currentRunners[i];
|
||||
// If we've no runner, skip.
|
||||
if( runner == null || runner.owner == null || !runner.owner.isAlive() )
|
||||
{
|
||||
if( !running ) continue;
|
||||
|
||||
// Mark the old runner as dead and start a new one.
|
||||
ComputerCraft.log.warn( "Previous runner ({}) has crashed, restarting!",
|
||||
runner != null && runner.owner != null ? runner.owner.getName() : runner );
|
||||
if( runner != null ) runner.running = false;
|
||||
runnerFactory.newThread( runners[i] = new TaskRunner() ).start();
|
||||
}
|
||||
|
||||
// If the runner has no work, skip
|
||||
ComputerExecutor executor = runner.currentExecutor.get();
|
||||
if( executor == null ) continue;
|
||||
|
||||
// Refresh the timeout state. Will set the pause/soft timeout flags as appropriate.
|
||||
executor.timeout.refresh();
|
||||
|
||||
// If we're still within normal execution times (TIMEOUT) or soft abort (ABORT_TIMEOUT),
|
||||
// then we can let the Lua machine do its work.
|
||||
long afterStart = executor.timeout.nanoCumulative();
|
||||
long afterHardAbort = afterStart - TIMEOUT - ABORT_TIMEOUT;
|
||||
if( afterHardAbort < 0 ) continue;
|
||||
|
||||
// Set the hard abort flag.
|
||||
executor.timeout.hardAbort();
|
||||
executor.abort();
|
||||
|
||||
if( afterHardAbort >= ABORT_TIMEOUT * 2 )
|
||||
{
|
||||
// If we've hard aborted and interrupted, and we're still not dead, then mark the runner
|
||||
// as dead, finish off the task, and spawn a new runner.
|
||||
runner.reportTimeout( executor, afterStart );
|
||||
runner.running = false;
|
||||
runner.owner.interrupt();
|
||||
|
||||
ComputerExecutor thisExecutor = runner.currentExecutor.getAndSet( null );
|
||||
if( thisExecutor != null ) afterWork( runner, executor );
|
||||
|
||||
synchronized( threadLock )
|
||||
{
|
||||
for( int i = 0; i < currentRunners.length; i++ )
|
||||
if( running && runners.length > i && runners[i] == runner )
|
||||
{
|
||||
TaskRunner runner = currentRunners[i];
|
||||
// If we've no runner, skip.
|
||||
if( runner == null || runner.owner == null || !runner.owner.isAlive() )
|
||||
{
|
||||
if( !running ) continue;
|
||||
|
||||
// Mark the old runner as dead and start a new one.
|
||||
ComputerCraft.log.warn( "Previous runner ({}) has crashed, restarting!",
|
||||
runner != null && runner.owner != null ? runner.owner.getName() : runner );
|
||||
if( runner != null ) runner.running = false;
|
||||
runnerFactory.newThread( runners[i] = new TaskRunner() ).start();
|
||||
}
|
||||
|
||||
// If the runner has no work, skip
|
||||
ComputerExecutor executor = runner.currentExecutor.get();
|
||||
if( executor == null ) continue;
|
||||
|
||||
// If we're still within normal execution times (TIMEOUT) or soft abort (ABORT_TIMEOUT),
|
||||
// then we can let the Lua machine do its work.
|
||||
long afterStart = executor.timeout.nanoCumulative();
|
||||
long afterHardAbort = afterStart - TIMEOUT - ABORT_TIMEOUT;
|
||||
if( afterHardAbort < 0 ) continue;
|
||||
|
||||
// Set the hard abort flag.
|
||||
executor.timeout.hardAbort();
|
||||
executor.abort();
|
||||
|
||||
if( afterHardAbort >= ABORT_TIMEOUT * 2 )
|
||||
{
|
||||
// If we've hard aborted and interrupted, and we're still not dead, then mark the runner
|
||||
// as dead, finish off the task, and spawn a new runner.
|
||||
timeoutTask( executor, runner.owner, afterStart );
|
||||
runner.running = false;
|
||||
runner.owner.interrupt();
|
||||
|
||||
ComputerExecutor thisExecutor = runner.currentExecutor.getAndSet( null );
|
||||
if( thisExecutor != null ) afterWork( runner, executor );
|
||||
|
||||
synchronized( threadLock )
|
||||
{
|
||||
if( running && runners.length > i && runners[i] == runner )
|
||||
{
|
||||
runnerFactory.newThread( currentRunners[i] = new TaskRunner() ).start();
|
||||
}
|
||||
}
|
||||
}
|
||||
else if( afterHardAbort >= ABORT_TIMEOUT )
|
||||
{
|
||||
// If we've hard aborted but we're still not dead, dump the stack trace and interrupt
|
||||
// the task.
|
||||
timeoutTask( executor, runner.owner, afterStart );
|
||||
runner.owner.interrupt();
|
||||
}
|
||||
runnerFactory.newThread( currentRunners[i] = new TaskRunner() ).start();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
catch( InterruptedException ignored )
|
||||
{
|
||||
else if( afterHardAbort >= ABORT_TIMEOUT )
|
||||
{
|
||||
// If we've hard aborted but we're still not dead, dump the stack trace and interrupt
|
||||
// the task.
|
||||
runner.reportTimeout( executor, afterStart );
|
||||
runner.owner.interrupt();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -441,6 +484,7 @@ public final class ComputerThread
|
||||
private static final class TaskRunner implements Runnable
|
||||
{
|
||||
Thread owner;
|
||||
long lastReport = Long.MIN_VALUE;
|
||||
volatile boolean running = true;
|
||||
|
||||
final AtomicReference<ComputerExecutor> currentExecutor = new AtomicReference<>();
|
||||
@ -460,6 +504,7 @@ public final class ComputerThread
|
||||
computerLock.lockInterruptibly();
|
||||
try
|
||||
{
|
||||
idleWorkers.incrementAndGet();
|
||||
while( computerQueue.isEmpty() ) hasWork.await();
|
||||
executor = computerQueue.pollFirst();
|
||||
assert executor != null : "hasWork should ensure we never receive null work";
|
||||
@ -467,6 +512,7 @@ public final class ComputerThread
|
||||
finally
|
||||
{
|
||||
computerLock.unlock();
|
||||
idleWorkers.decrementAndGet();
|
||||
}
|
||||
}
|
||||
catch( InterruptedException ignored )
|
||||
@ -516,27 +562,32 @@ public final class ComputerThread
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void timeoutTask( ComputerExecutor executor, Thread thread, long time )
|
||||
{
|
||||
if( !ComputerCraft.logComputerErrors ) return;
|
||||
|
||||
StringBuilder builder = new StringBuilder()
|
||||
.append( "Terminating computer #" ).append( executor.getComputer().getID() )
|
||||
.append( " due to timeout (running for " ).append( time * 1e-9 )
|
||||
.append( " seconds). This is NOT a bug, but may mean a computer is misbehaving. " )
|
||||
.append( thread.getName() )
|
||||
.append( " is currently " )
|
||||
.append( thread.getState() );
|
||||
Object blocking = LockSupport.getBlocker( thread );
|
||||
if( blocking != null ) builder.append( "\n on " ).append( blocking );
|
||||
|
||||
for( StackTraceElement element : thread.getStackTrace() )
|
||||
private void reportTimeout( ComputerExecutor executor, long time )
|
||||
{
|
||||
builder.append( "\n at " ).append( element );
|
||||
}
|
||||
if( !ComputerCraft.logComputerErrors ) return;
|
||||
|
||||
ComputerCraft.log.warn( builder.toString() );
|
||||
// Attempt to debounce stack trace reporting, limiting ourselves to one every second.
|
||||
long now = System.nanoTime();
|
||||
if( lastReport != Long.MIN_VALUE && now - lastReport - REPORT_DEBOUNCE <= 0 ) return;
|
||||
lastReport = now;
|
||||
|
||||
StringBuilder builder = new StringBuilder()
|
||||
.append( "Terminating computer #" ).append( executor.getComputer().getID() )
|
||||
.append( " due to timeout (running for " ).append( time * 1e-9 )
|
||||
.append( " seconds). This is NOT a bug, but may mean a computer is misbehaving. " )
|
||||
.append( owner.getName() )
|
||||
.append( " is currently " )
|
||||
.append( owner.getState() );
|
||||
Object blocking = LockSupport.getBlocker( owner );
|
||||
if( blocking != null ) builder.append( "\n on " ).append( blocking );
|
||||
|
||||
for( StackTraceElement element : owner.getStackTrace() )
|
||||
{
|
||||
builder.append( "\n at " ).append( element );
|
||||
}
|
||||
|
||||
ComputerCraft.log.warn( builder.toString() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -86,7 +86,7 @@ public final class TimeoutState
|
||||
/**
|
||||
* Recompute the {@link #isSoftAborted()} and {@link #isPaused()} flags.
|
||||
*/
|
||||
public void refresh()
|
||||
public synchronized void refresh()
|
||||
{
|
||||
// Important: The weird arithmetic here is important, as nanoTime may return negative values, and so we
|
||||
// need to handle overflow.
|
||||
@ -153,7 +153,7 @@ public final class TimeoutState
|
||||
*
|
||||
* @see #nanoCumulative()
|
||||
*/
|
||||
void pauseTimer()
|
||||
synchronized void pauseTimer()
|
||||
{
|
||||
// We set the cumulative time to difference between current time and "nominal start time".
|
||||
cumulativeElapsed = System.nanoTime() - cumulativeStart;
|
||||
@ -163,7 +163,7 @@ public final class TimeoutState
|
||||
/**
|
||||
* Resets the cumulative time and resets the abort flags.
|
||||
*/
|
||||
void stopTimer()
|
||||
synchronized void stopTimer()
|
||||
{
|
||||
cumulativeElapsed = 0;
|
||||
paused = softAbort = hardAbort = false;
|
||||
|
@ -452,24 +452,9 @@ public class CobaltLuaMachine implements ILuaMachine
|
||||
// We check our current pause/abort state every 128 instructions.
|
||||
if( (count = (count + 1) & 127) == 0 )
|
||||
{
|
||||
// If we've been hard aborted or closed then abort.
|
||||
if( timeout.isHardAborted() || state == null ) throw HardAbortError.INSTANCE;
|
||||
|
||||
timeout.refresh();
|
||||
if( timeout.isPaused() )
|
||||
{
|
||||
// Preserve the current state
|
||||
isPaused = true;
|
||||
oldInHook = ds.inhook;
|
||||
oldFlags = di.flags;
|
||||
|
||||
// Suspend the state. This will probably throw, but we need to handle the case where it won't.
|
||||
di.flags |= FLAG_HOOKYIELD | FLAG_HOOKED;
|
||||
LuaThread.suspend( ds.getLuaState() );
|
||||
resetPaused( ds, di );
|
||||
}
|
||||
|
||||
handleSoftAbort();
|
||||
if( timeout.isPaused() ) handlePause( ds, di );
|
||||
if( timeout.isSoftAborted() ) handleSoftAbort();
|
||||
}
|
||||
|
||||
super.onInstruction( ds, di, pc );
|
||||
@ -478,13 +463,10 @@ public class CobaltLuaMachine implements ILuaMachine
|
||||
@Override
|
||||
public void poll() throws LuaError
|
||||
{
|
||||
// If we've been hard aborted or closed then abort.
|
||||
LuaState state = CobaltLuaMachine.this.state;
|
||||
if( timeout.isHardAborted() || state == null ) throw HardAbortError.INSTANCE;
|
||||
|
||||
timeout.refresh();
|
||||
if( timeout.isPaused() ) LuaThread.suspendBlocking( state );
|
||||
handleSoftAbort();
|
||||
if( timeout.isSoftAborted() ) handleSoftAbort();
|
||||
}
|
||||
|
||||
private void resetPaused( DebugState ds, DebugFrame di )
|
||||
@ -498,11 +480,24 @@ public class CobaltLuaMachine implements ILuaMachine
|
||||
private void handleSoftAbort() throws LuaError
|
||||
{
|
||||
// If we already thrown our soft abort error then don't do it again.
|
||||
if( !timeout.isSoftAborted() || thrownSoftAbort ) return;
|
||||
if( thrownSoftAbort ) return;
|
||||
|
||||
thrownSoftAbort = true;
|
||||
throw new LuaError( TimeoutState.ABORT_MESSAGE );
|
||||
}
|
||||
|
||||
private void handlePause( DebugState ds, DebugFrame di ) throws LuaError, UnwindThrowable
|
||||
{
|
||||
// Preserve the current state
|
||||
isPaused = true;
|
||||
oldInHook = ds.inhook;
|
||||
oldFlags = di.flags;
|
||||
|
||||
// Suspend the state. This will probably throw, but we need to handle the case where it won't.
|
||||
di.flags |= FLAG_HOOKYIELD | FLAG_HOOKED;
|
||||
LuaThread.suspend( ds.getLuaState() );
|
||||
resetPaused( ds, di );
|
||||
}
|
||||
}
|
||||
|
||||
private static final class HardAbortError extends Error
|
||||
|
@ -33,10 +33,7 @@ public class ComputerThreadTest
|
||||
FakeComputerManager.enqueue( computer, timeout -> {
|
||||
assertFalse( timeout.isSoftAborted(), "Should not start soft-aborted" );
|
||||
|
||||
long delay = ConcurrentHelpers.waitUntil( () -> {
|
||||
timeout.refresh();
|
||||
return timeout.isSoftAborted();
|
||||
} );
|
||||
long delay = ConcurrentHelpers.waitUntil( timeout::isSoftAborted );
|
||||
assertThat( "Should be soft aborted", delay * 1e-9, closeTo( 7, 0.5 ) );
|
||||
ComputerCraft.log.info( "Slept for {}", delay );
|
||||
|
||||
@ -69,10 +66,7 @@ public class ComputerThreadTest
|
||||
{
|
||||
Computer computer = FakeComputerManager.create();
|
||||
FakeComputerManager.enqueue( computer, timeout -> {
|
||||
boolean didPause = ConcurrentHelpers.waitUntil( () -> {
|
||||
timeout.refresh();
|
||||
return timeout.isPaused();
|
||||
}, 5, TimeUnit.SECONDS );
|
||||
boolean didPause = ConcurrentHelpers.waitUntil( timeout::isPaused, 5, TimeUnit.SECONDS );
|
||||
assertFalse( didPause, "Machine shouldn't have paused within 5s" );
|
||||
|
||||
computer.shutdown();
|
||||
@ -90,11 +84,8 @@ public class ComputerThreadTest
|
||||
long budget = ComputerThread.scaledPeriod();
|
||||
assertEquals( budget, TimeUnit.MILLISECONDS.toNanos( 25 ), "Budget should be 25ms" );
|
||||
|
||||
long delay = ConcurrentHelpers.waitUntil( () -> {
|
||||
timeout.refresh();
|
||||
return timeout.isPaused();
|
||||
} );
|
||||
assertThat( "Paused within 25ms", delay * 1e-9, closeTo( 0.025, 0.01 ) );
|
||||
long delay = ConcurrentHelpers.waitUntil( timeout::isPaused );
|
||||
assertThat( "Paused within 25ms", delay * 1e-9, closeTo( 0.03, 0.015 ) );
|
||||
|
||||
computer.shutdown();
|
||||
return MachineResult.OK;
|
||||
|
Loading…
Reference in New Issue
Block a user