1
0
mirror of https://github.com/SquidDev-CC/CC-Tweaked synced 2025-02-11 00:20:05 +00:00

Attempt to fix race condition in ComputerThread

- Runners would set their active executor before starting resetting the
   time, meaning it would be judged as running and terminated.
 - Similarly, the cumulative time start was reset to 0, meaning the
   computer had been judged to run for an impossibly long time.
 - If a computer hit the terminate threshold, but not the hard abort
   one, then we'd print the stack trace of the terminated thread - we
   now do it before interrupting.

There's still race conditions here when terminating a computer, but
hopefully these changes will mean they never occur under normal
operations (only when a computer has run for far too long).
This commit is contained in:
SquidDev 2019-03-04 09:22:14 +00:00
parent e34e833d3d
commit f7cb526793
3 changed files with 135 additions and 98 deletions

View File

@ -29,7 +29,7 @@ import java.util.ArrayDeque;
import java.util.ArrayList; import java.util.ArrayList;
import java.util.List; import java.util.List;
import java.util.Queue; import java.util.Queue;
import java.util.concurrent.atomic.AtomicBoolean; import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock; import java.util.concurrent.locks.ReentrantLock;
/** /**
@ -153,9 +153,12 @@ final class ComputerExecutor
private IWritableMount rootMount; private IWritableMount rootMount;
/** /**
* {@code true} when inside {@link #work()}. We use this to ensure we're only doing one bit of work at one time. * The thread the executor is running on. This is non-null when performing work. We use this to ensure we're only
* doing one bit of work at one time.
*
* @see ComputerThread
*/ */
private final AtomicBoolean isExecuting = new AtomicBoolean( false ); final AtomicReference<Thread> executingThread = new AtomicReference<>();
ComputerExecutor( Computer computer ) ComputerExecutor( Computer computer )
{ {
@ -535,13 +538,6 @@ final class ComputerExecutor
* @see #eventQueue * @see #eventQueue
*/ */
void work() throws InterruptedException void work() throws InterruptedException
{
if( isExecuting.getAndSet( true ) )
{
throw new IllegalStateException( "Multiple threads running on computer the same time" );
}
try
{ {
if( interruptedEvent ) if( interruptedEvent )
{ {
@ -611,11 +607,6 @@ final class ComputerExecutor
resumeMachine( event.name, event.args ); resumeMachine( event.name, event.args );
} }
} }
finally
{
isExecuting.set( false );
}
}
private void displayFailure( String message, String extra ) private void displayFailure( String message, String extra )
{ {

View File

@ -260,10 +260,21 @@ public class ComputerThread
/** /**
* Re-add this task to the queue * Re-add this task to the queue
* *
* @param runner The runner this task was on.
* @param executor The executor to requeue * @param executor The executor to requeue
*/ */
private static void afterWork( ComputerExecutor executor ) private static void afterWork( TaskRunner runner, ComputerExecutor executor )
{ {
// Clear the executor's thread.
Thread currentThread = executor.executingThread.getAndSet( null );
if( currentThread != runner.owner )
{
ComputerCraft.log.error(
"Expected computer #{} to be running on {}, but already running on {}. This is a SERIOUS bug, please report with your debug.log.",
executor.getComputer().getID(), runner.owner.getName(), currentThread == null ? "nothing" : currentThread.getName()
);
}
computerLock.lock(); computerLock.lock();
try try
{ {
@ -346,15 +357,23 @@ public class ComputerThread
executor.timeout.hardAbort(); executor.timeout.hardAbort();
executor.abort(); executor.abort();
if( afterHardAbort >= ABORT_TIMEOUT * 2 ) if( afterHardAbort >= ABORT_TIMEOUT )
{
// If we've hard aborted but we're still not dead, dump the stack trace and interrupt
// the task.
timeoutTask( executor, runner.owner, afterStart );
runner.owner.interrupt();
}
else if( afterHardAbort >= ABORT_TIMEOUT * 2 )
{ {
// If we've hard aborted and interrupted, and we're still not dead, then mark the runner // If we've hard aborted and interrupted, and we're still not dead, then mark the runner
// as dead, finish off the task, and spawn a new runner. // as dead, finish off the task, and spawn a new runner.
// Note, we'll do the actual interruption of the thread in the next block. timeoutTask( executor, runner.owner, afterStart );
runner.running = false; runner.running = false;
runner.owner.interrupt();
ComputerExecutor thisExecutor = runner.currentExecutor.getAndSet( null ); ComputerExecutor thisExecutor = runner.currentExecutor.getAndSet( null );
if( thisExecutor != null ) afterWork( executor ); if( thisExecutor != null ) afterWork( runner, executor );
synchronized( threadLock ) synchronized( threadLock )
{ {
@ -364,14 +383,6 @@ public class ComputerThread
} }
} }
} }
if( afterHardAbort >= ABORT_TIMEOUT )
{
// If we've hard aborted but we're still not dead, dump the stack trace and interrupt
// the task.
timeoutTask( executor, runner.owner );
runner.owner.interrupt();
}
} }
} }
} }
@ -401,6 +412,7 @@ public class ComputerThread
{ {
owner = Thread.currentThread(); owner = Thread.currentThread();
tasks:
while( running && ComputerThread.running ) while( running && ComputerThread.running )
{ {
// Wait for an active queue to execute // Wait for an active queue to execute
@ -426,11 +438,29 @@ public class ComputerThread
continue; continue;
} }
// Pull a task from this queue, and set what we're currently executing. // If we're trying to executing some task on this computer while someone else is doing work, something
// is seriously wrong.
while( !executor.executingThread.compareAndSet( null, owner ) )
{
Thread existing = executor.executingThread.get();
if( existing != null )
{
ComputerCraft.log.error(
"Trying to run computer #{} on thread {}, but already running on {}. This is a SERIOUS bug, please report with your debug.log.",
executor.getComputer().getID(), owner.getName(), existing.getName()
);
continue tasks;
}
}
// Reset the timers
executor.beforeWork();
// And then set the current executor. It's important to do it afterwards, as otherwise we introduce
// race conditions with the monitor.
currentExecutor.set( executor ); currentExecutor.set( executor );
// Execute the task // Execute the task
executor.beforeWork();
try try
{ {
executor.work(); executor.work();
@ -442,19 +472,19 @@ public class ComputerThread
finally finally
{ {
ComputerExecutor thisExecutor = currentExecutor.getAndSet( null ); ComputerExecutor thisExecutor = currentExecutor.getAndSet( null );
if( thisExecutor != null ) afterWork( executor ); if( thisExecutor != null ) afterWork( this, executor );
} }
} }
} }
} }
private static void timeoutTask( ComputerExecutor executor, Thread thread ) private static void timeoutTask( ComputerExecutor executor, Thread thread, long time )
{ {
if( !ComputerCraft.logPeripheralErrors ) return; if( !ComputerCraft.logPeripheralErrors ) return;
StringBuilder builder = new StringBuilder() StringBuilder builder = new StringBuilder()
.append( "Terminating computer #" ).append( executor.getComputer().getID() ) .append( "Terminating computer #" ).append( executor.getComputer().getID() )
.append( " due to timeout (running for " ).append( executor.timeout.nanoCumulative() * 1e-9 ) .append( " due to timeout (running for " ).append( time * 1e-9 )
.append( " seconds). This is NOT a bug, but may mean a computer is misbehaving. " ) .append( " seconds). This is NOT a bug, but may mean a computer is misbehaving. " )
.append( thread.getName() ) .append( thread.getName() )
.append( " is currently " ) .append( " is currently " )

View File

@ -46,18 +46,34 @@ public final class TimeoutState
private boolean softAbort; private boolean softAbort;
private volatile boolean hardAbort; private volatile boolean hardAbort;
private long nanoCumulative; /**
private long nanoCurrent; * When the cumulative time would have started had the whole event been processed in one go.
private long nanoDeadline; */
private long cumulativeStart;
/**
* How much cumulative time has elapsed. This is effectively {@code cumulativeStart - currentStart}.
*/
private long cumulativeElapsed;
/**
* When this execution round started.
*/
private long currentStart;
/**
* When this execution round should look potentially be paused.
*/
private long currentDeadline;
long nanoCumulative() long nanoCumulative()
{ {
return System.nanoTime() - nanoCumulative; return System.nanoTime() - cumulativeStart;
} }
long nanoCurrent() long nanoCurrent()
{ {
return System.nanoTime() - nanoCurrent; return System.nanoTime() - currentStart;
} }
/** /**
@ -66,8 +82,8 @@ public final class TimeoutState
public void refresh() public void refresh()
{ {
long now = System.nanoTime(); long now = System.nanoTime();
if( !paused ) paused = now >= nanoDeadline && ComputerThread.hasPendingWork(); if( !paused ) paused = now >= currentDeadline && ComputerThread.hasPendingWork();
if( !softAbort ) softAbort = (now - nanoCumulative) >= TIMEOUT; if( !softAbort ) softAbort = (now - cumulativeStart) >= TIMEOUT;
} }
/** /**
@ -113,10 +129,10 @@ public final class TimeoutState
void startTimer() void startTimer()
{ {
long now = System.nanoTime(); long now = System.nanoTime();
nanoCurrent = now; currentStart = now;
nanoDeadline = now + ComputerThread.scaledPeriod(); currentDeadline = now + ComputerThread.scaledPeriod();
// Compute the "nominal start time". // Compute the "nominal start time".
nanoCumulative = now - nanoCumulative; cumulativeStart = now - cumulativeElapsed;
} }
/** /**
@ -127,7 +143,7 @@ public final class TimeoutState
void pauseTimer() void pauseTimer()
{ {
// We set the cumulative time to difference between current time and "nominal start time". // We set the cumulative time to difference between current time and "nominal start time".
nanoCumulative = System.nanoTime() - nanoCumulative; cumulativeElapsed = System.nanoTime() - cumulativeStart;
paused = false; paused = false;
} }
@ -136,7 +152,7 @@ public final class TimeoutState
*/ */
void stopTimer() void stopTimer()
{ {
nanoCumulative = 0; cumulativeElapsed = 0;
paused = softAbort = hardAbort = false; paused = softAbort = hardAbort = false;
} }
} }