mirror of
https://github.com/SquidDev-CC/CC-Tweaked
synced 2025-01-28 18:04:47 +00:00
Attempt to fix race condition in ComputerThread
- Runners would set their active executor before starting resetting the time, meaning it would be judged as running and terminated. - Similarly, the cumulative time start was reset to 0, meaning the computer had been judged to run for an impossibly long time. - If a computer hit the terminate threshold, but not the hard abort one, then we'd print the stack trace of the terminated thread - we now do it before interrupting. There's still race conditions here when terminating a computer, but hopefully these changes will mean they never occur under normal operations (only when a computer has run for far too long).
This commit is contained in:
parent
e34e833d3d
commit
f7cb526793
@ -29,7 +29,7 @@ import java.util.ArrayDeque;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
/**
|
||||
@ -153,9 +153,12 @@ final class ComputerExecutor
|
||||
private IWritableMount rootMount;
|
||||
|
||||
/**
|
||||
* {@code true} when inside {@link #work()}. We use this to ensure we're only doing one bit of work at one time.
|
||||
* The thread the executor is running on. This is non-null when performing work. We use this to ensure we're only
|
||||
* doing one bit of work at one time.
|
||||
*
|
||||
* @see ComputerThread
|
||||
*/
|
||||
private final AtomicBoolean isExecuting = new AtomicBoolean( false );
|
||||
final AtomicReference<Thread> executingThread = new AtomicReference<>();
|
||||
|
||||
ComputerExecutor( Computer computer )
|
||||
{
|
||||
@ -536,84 +539,72 @@ final class ComputerExecutor
|
||||
*/
|
||||
void work() throws InterruptedException
|
||||
{
|
||||
if( isExecuting.getAndSet( true ) )
|
||||
if( interruptedEvent )
|
||||
{
|
||||
throw new IllegalStateException( "Multiple threads running on computer the same time" );
|
||||
interruptedEvent = false;
|
||||
if( machine != null )
|
||||
{
|
||||
resumeMachine( null, null );
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
try
|
||||
StateCommand command;
|
||||
Event event = null;
|
||||
synchronized( queueLock )
|
||||
{
|
||||
if( interruptedEvent )
|
||||
command = this.command;
|
||||
this.command = null;
|
||||
|
||||
// If we've no command, pull something from the event queue instead.
|
||||
if( command == null )
|
||||
{
|
||||
interruptedEvent = false;
|
||||
if( machine != null )
|
||||
if( !isOn )
|
||||
{
|
||||
resumeMachine( null, null );
|
||||
// We're not on and had no command, but we had work queued. This should never happen, so clear
|
||||
// the event queue just in case.
|
||||
eventQueue.clear();
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
StateCommand command;
|
||||
Event event = null;
|
||||
synchronized( queueLock )
|
||||
{
|
||||
command = this.command;
|
||||
this.command = null;
|
||||
|
||||
// If we've no command, pull something from the event queue instead.
|
||||
if( command == null )
|
||||
{
|
||||
if( !isOn )
|
||||
{
|
||||
// We're not on and had no command, but we had work queued. This should never happen, so clear
|
||||
// the event queue just in case.
|
||||
eventQueue.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
event = eventQueue.poll();
|
||||
}
|
||||
}
|
||||
|
||||
if( command != null )
|
||||
{
|
||||
switch( command )
|
||||
{
|
||||
case TURN_ON:
|
||||
if( isOn ) return;
|
||||
turnOn();
|
||||
break;
|
||||
|
||||
case SHUTDOWN:
|
||||
|
||||
if( !isOn ) return;
|
||||
computer.getTerminal().reset();
|
||||
shutdown();
|
||||
break;
|
||||
|
||||
case REBOOT:
|
||||
if( !isOn ) return;
|
||||
computer.getTerminal().reset();
|
||||
shutdown();
|
||||
|
||||
computer.turnOn();
|
||||
break;
|
||||
|
||||
case ABORT:
|
||||
if( !isOn ) return;
|
||||
displayFailure( "Error running computer", TimeoutState.ABORT_MESSAGE );
|
||||
shutdown();
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if( event != null )
|
||||
{
|
||||
resumeMachine( event.name, event.args );
|
||||
event = eventQueue.poll();
|
||||
}
|
||||
}
|
||||
finally
|
||||
|
||||
if( command != null )
|
||||
{
|
||||
isExecuting.set( false );
|
||||
switch( command )
|
||||
{
|
||||
case TURN_ON:
|
||||
if( isOn ) return;
|
||||
turnOn();
|
||||
break;
|
||||
|
||||
case SHUTDOWN:
|
||||
|
||||
if( !isOn ) return;
|
||||
computer.getTerminal().reset();
|
||||
shutdown();
|
||||
break;
|
||||
|
||||
case REBOOT:
|
||||
if( !isOn ) return;
|
||||
computer.getTerminal().reset();
|
||||
shutdown();
|
||||
|
||||
computer.turnOn();
|
||||
break;
|
||||
|
||||
case ABORT:
|
||||
if( !isOn ) return;
|
||||
displayFailure( "Error running computer", TimeoutState.ABORT_MESSAGE );
|
||||
shutdown();
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if( event != null )
|
||||
{
|
||||
resumeMachine( event.name, event.args );
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -260,10 +260,21 @@ public class ComputerThread
|
||||
/**
|
||||
* Re-add this task to the queue
|
||||
*
|
||||
* @param runner The runner this task was on.
|
||||
* @param executor The executor to requeue
|
||||
*/
|
||||
private static void afterWork( ComputerExecutor executor )
|
||||
private static void afterWork( TaskRunner runner, ComputerExecutor executor )
|
||||
{
|
||||
// Clear the executor's thread.
|
||||
Thread currentThread = executor.executingThread.getAndSet( null );
|
||||
if( currentThread != runner.owner )
|
||||
{
|
||||
ComputerCraft.log.error(
|
||||
"Expected computer #{} to be running on {}, but already running on {}. This is a SERIOUS bug, please report with your debug.log.",
|
||||
executor.getComputer().getID(), runner.owner.getName(), currentThread == null ? "nothing" : currentThread.getName()
|
||||
);
|
||||
}
|
||||
|
||||
computerLock.lock();
|
||||
try
|
||||
{
|
||||
@ -346,15 +357,23 @@ public class ComputerThread
|
||||
executor.timeout.hardAbort();
|
||||
executor.abort();
|
||||
|
||||
if( afterHardAbort >= ABORT_TIMEOUT * 2 )
|
||||
if( afterHardAbort >= ABORT_TIMEOUT )
|
||||
{
|
||||
// If we've hard aborted but we're still not dead, dump the stack trace and interrupt
|
||||
// the task.
|
||||
timeoutTask( executor, runner.owner, afterStart );
|
||||
runner.owner.interrupt();
|
||||
}
|
||||
else if( afterHardAbort >= ABORT_TIMEOUT * 2 )
|
||||
{
|
||||
// If we've hard aborted and interrupted, and we're still not dead, then mark the runner
|
||||
// as dead, finish off the task, and spawn a new runner.
|
||||
// Note, we'll do the actual interruption of the thread in the next block.
|
||||
timeoutTask( executor, runner.owner, afterStart );
|
||||
runner.running = false;
|
||||
runner.owner.interrupt();
|
||||
|
||||
ComputerExecutor thisExecutor = runner.currentExecutor.getAndSet( null );
|
||||
if( thisExecutor != null ) afterWork( executor );
|
||||
if( thisExecutor != null ) afterWork( runner, executor );
|
||||
|
||||
synchronized( threadLock )
|
||||
{
|
||||
@ -364,14 +383,6 @@ public class ComputerThread
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( afterHardAbort >= ABORT_TIMEOUT )
|
||||
{
|
||||
// If we've hard aborted but we're still not dead, dump the stack trace and interrupt
|
||||
// the task.
|
||||
timeoutTask( executor, runner.owner );
|
||||
runner.owner.interrupt();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -401,6 +412,7 @@ public class ComputerThread
|
||||
{
|
||||
owner = Thread.currentThread();
|
||||
|
||||
tasks:
|
||||
while( running && ComputerThread.running )
|
||||
{
|
||||
// Wait for an active queue to execute
|
||||
@ -426,11 +438,29 @@ public class ComputerThread
|
||||
continue;
|
||||
}
|
||||
|
||||
// Pull a task from this queue, and set what we're currently executing.
|
||||
// If we're trying to executing some task on this computer while someone else is doing work, something
|
||||
// is seriously wrong.
|
||||
while( !executor.executingThread.compareAndSet( null, owner ) )
|
||||
{
|
||||
Thread existing = executor.executingThread.get();
|
||||
if( existing != null )
|
||||
{
|
||||
ComputerCraft.log.error(
|
||||
"Trying to run computer #{} on thread {}, but already running on {}. This is a SERIOUS bug, please report with your debug.log.",
|
||||
executor.getComputer().getID(), owner.getName(), existing.getName()
|
||||
);
|
||||
continue tasks;
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the timers
|
||||
executor.beforeWork();
|
||||
|
||||
// And then set the current executor. It's important to do it afterwards, as otherwise we introduce
|
||||
// race conditions with the monitor.
|
||||
currentExecutor.set( executor );
|
||||
|
||||
// Execute the task
|
||||
executor.beforeWork();
|
||||
try
|
||||
{
|
||||
executor.work();
|
||||
@ -442,19 +472,19 @@ public class ComputerThread
|
||||
finally
|
||||
{
|
||||
ComputerExecutor thisExecutor = currentExecutor.getAndSet( null );
|
||||
if( thisExecutor != null ) afterWork( executor );
|
||||
if( thisExecutor != null ) afterWork( this, executor );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void timeoutTask( ComputerExecutor executor, Thread thread )
|
||||
private static void timeoutTask( ComputerExecutor executor, Thread thread, long time )
|
||||
{
|
||||
if( !ComputerCraft.logPeripheralErrors ) return;
|
||||
|
||||
StringBuilder builder = new StringBuilder()
|
||||
.append( "Terminating computer #" ).append( executor.getComputer().getID() )
|
||||
.append( " due to timeout (running for " ).append( executor.timeout.nanoCumulative() * 1e-9 )
|
||||
.append( " due to timeout (running for " ).append( time * 1e-9 )
|
||||
.append( " seconds). This is NOT a bug, but may mean a computer is misbehaving. " )
|
||||
.append( thread.getName() )
|
||||
.append( " is currently " )
|
||||
|
@ -46,18 +46,34 @@ public final class TimeoutState
|
||||
private boolean softAbort;
|
||||
private volatile boolean hardAbort;
|
||||
|
||||
private long nanoCumulative;
|
||||
private long nanoCurrent;
|
||||
private long nanoDeadline;
|
||||
/**
|
||||
* When the cumulative time would have started had the whole event been processed in one go.
|
||||
*/
|
||||
private long cumulativeStart;
|
||||
|
||||
/**
|
||||
* How much cumulative time has elapsed. This is effectively {@code cumulativeStart - currentStart}.
|
||||
*/
|
||||
private long cumulativeElapsed;
|
||||
|
||||
/**
|
||||
* When this execution round started.
|
||||
*/
|
||||
private long currentStart;
|
||||
|
||||
/**
|
||||
* When this execution round should look potentially be paused.
|
||||
*/
|
||||
private long currentDeadline;
|
||||
|
||||
long nanoCumulative()
|
||||
{
|
||||
return System.nanoTime() - nanoCumulative;
|
||||
return System.nanoTime() - cumulativeStart;
|
||||
}
|
||||
|
||||
long nanoCurrent()
|
||||
{
|
||||
return System.nanoTime() - nanoCurrent;
|
||||
return System.nanoTime() - currentStart;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -66,8 +82,8 @@ public final class TimeoutState
|
||||
public void refresh()
|
||||
{
|
||||
long now = System.nanoTime();
|
||||
if( !paused ) paused = now >= nanoDeadline && ComputerThread.hasPendingWork();
|
||||
if( !softAbort ) softAbort = (now - nanoCumulative) >= TIMEOUT;
|
||||
if( !paused ) paused = now >= currentDeadline && ComputerThread.hasPendingWork();
|
||||
if( !softAbort ) softAbort = (now - cumulativeStart) >= TIMEOUT;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -113,10 +129,10 @@ public final class TimeoutState
|
||||
void startTimer()
|
||||
{
|
||||
long now = System.nanoTime();
|
||||
nanoCurrent = now;
|
||||
nanoDeadline = now + ComputerThread.scaledPeriod();
|
||||
currentStart = now;
|
||||
currentDeadline = now + ComputerThread.scaledPeriod();
|
||||
// Compute the "nominal start time".
|
||||
nanoCumulative = now - nanoCumulative;
|
||||
cumulativeStart = now - cumulativeElapsed;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -127,7 +143,7 @@ public final class TimeoutState
|
||||
void pauseTimer()
|
||||
{
|
||||
// We set the cumulative time to difference between current time and "nominal start time".
|
||||
nanoCumulative = System.nanoTime() - nanoCumulative;
|
||||
cumulativeElapsed = System.nanoTime() - cumulativeStart;
|
||||
paused = false;
|
||||
}
|
||||
|
||||
@ -136,7 +152,7 @@ public final class TimeoutState
|
||||
*/
|
||||
void stopTimer()
|
||||
{
|
||||
nanoCumulative = 0;
|
||||
cumulativeElapsed = 0;
|
||||
paused = softAbort = hardAbort = false;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user