mirror of
https://github.com/SquidDev-CC/CC-Tweaked
synced 2025-03-23 11:56:58 +00:00
Merge pull request #119 from SquidDev-CC/feature/computer-thread-schedule
- CobaltLuaMachine/ComputerExecutor can now be paused - this suspends the machine via a debug hook. When doing work again, we resume the machine, rather than starting a new task. - TimeoutState keeps track of how long the current execution of this task has gone on for, when its deadline is, and the cumulative execution time of this task. - ComputerThread now uses a CFS based scheduler in order to determine which computer to next run.
This commit is contained in:
commit
72d079ef61
@ -29,7 +29,7 @@ import java.util.ArrayDeque;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
import java.util.Queue;
|
||||
import java.util.concurrent.atomic.AtomicBoolean;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
/**
|
||||
@ -95,10 +95,28 @@ final class ComputerExecutor
|
||||
private final Object queueLock = new Object();
|
||||
|
||||
/**
|
||||
* Determines if this executer is present within {@link ComputerThread}.
|
||||
* Determines if this executor is present within {@link ComputerThread}.
|
||||
*
|
||||
* @see #queueLock
|
||||
* @see #enqueue()
|
||||
* @see #afterWork()
|
||||
*/
|
||||
volatile boolean onComputerQueue = false;
|
||||
|
||||
/**
|
||||
* The amount of time this computer has used on a theoretical machine which shares work evenly amongst computers.
|
||||
*
|
||||
* @see ComputerThread
|
||||
*/
|
||||
long virtualRuntime = 0;
|
||||
|
||||
/**
|
||||
* The last time at which we updated {@link #virtualRuntime}.
|
||||
*
|
||||
* @see ComputerThread
|
||||
*/
|
||||
long vRuntimeStart;
|
||||
|
||||
/**
|
||||
* The command that {@link #work()} should execute on the computer thread.
|
||||
*
|
||||
@ -117,6 +135,14 @@ final class ComputerExecutor
|
||||
*/
|
||||
private final Queue<Event> eventQueue = new ArrayDeque<>();
|
||||
|
||||
/**
|
||||
* Whether we interrupted an event and so should resume it instead of executing another task.
|
||||
*
|
||||
* @see #work()
|
||||
* @see #resumeMachine(String, Object[])
|
||||
*/
|
||||
private boolean interruptedEvent = false;
|
||||
|
||||
/**
|
||||
* Whether this executor has been closed, and will no longer accept any incoming commands or events.
|
||||
*
|
||||
@ -127,9 +153,12 @@ final class ComputerExecutor
|
||||
private IWritableMount rootMount;
|
||||
|
||||
/**
|
||||
* {@code true} when inside {@link #work()}. We use this to ensure we're only doing one bit of work at one time.
|
||||
* The thread the executor is running on. This is non-null when performing work. We use this to ensure we're only
|
||||
* doing one bit of work at one time.
|
||||
*
|
||||
* @see ComputerThread
|
||||
*/
|
||||
private final AtomicBoolean isExecuting = new AtomicBoolean( false );
|
||||
final AtomicReference<Thread> executingThread = new AtomicReference<>();
|
||||
|
||||
ComputerExecutor( Computer computer )
|
||||
{
|
||||
@ -268,9 +297,7 @@ final class ComputerExecutor
|
||||
{
|
||||
synchronized( queueLock )
|
||||
{
|
||||
if( onComputerQueue ) return;
|
||||
onComputerQueue = true;
|
||||
ComputerThread.queue( this );
|
||||
if( !onComputerQueue ) ComputerThread.queue( this );
|
||||
}
|
||||
}
|
||||
|
||||
@ -391,6 +418,7 @@ final class ComputerExecutor
|
||||
{
|
||||
// Reset the terminal and event queue
|
||||
computer.getTerminal().reset();
|
||||
interruptedEvent = false;
|
||||
synchronized( queueLock )
|
||||
{
|
||||
eventQueue.clear();
|
||||
@ -432,6 +460,7 @@ final class ComputerExecutor
|
||||
try
|
||||
{
|
||||
isOn = false;
|
||||
interruptedEvent = false;
|
||||
synchronized( queueLock )
|
||||
{
|
||||
eventQueue.clear();
|
||||
@ -468,27 +497,34 @@ final class ComputerExecutor
|
||||
*/
|
||||
void beforeWork()
|
||||
{
|
||||
timeout.reset();
|
||||
vRuntimeStart = System.nanoTime();
|
||||
timeout.startTimer();
|
||||
}
|
||||
|
||||
/**
|
||||
* Called after executing {@link #work()}. Adds this back to the {@link ComputerThread} if we have more work,
|
||||
* otherwise remove it.
|
||||
* Called after executing {@link #work()}.
|
||||
*
|
||||
* @return If we have more work to do.
|
||||
*/
|
||||
void afterWork()
|
||||
boolean afterWork()
|
||||
{
|
||||
Tracking.addTaskTiming( getComputer(), timeout.nanoSinceStart() );
|
||||
if( interruptedEvent )
|
||||
{
|
||||
timeout.pauseTimer();
|
||||
}
|
||||
else
|
||||
{
|
||||
timeout.stopTimer();
|
||||
}
|
||||
|
||||
Tracking.addTaskTiming( getComputer(), timeout.nanoCurrent() );
|
||||
|
||||
if( interruptedEvent ) return true;
|
||||
|
||||
synchronized( queueLock )
|
||||
{
|
||||
if( eventQueue.isEmpty() && command == null )
|
||||
{
|
||||
onComputerQueue = false;
|
||||
}
|
||||
else
|
||||
{
|
||||
ComputerThread.queue( this );
|
||||
}
|
||||
if( eventQueue.isEmpty() && command == null ) return onComputerQueue = false;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
@ -503,74 +539,72 @@ final class ComputerExecutor
|
||||
*/
|
||||
void work() throws InterruptedException
|
||||
{
|
||||
if( isExecuting.getAndSet( true ) )
|
||||
if( interruptedEvent )
|
||||
{
|
||||
throw new IllegalStateException( "Multiple threads running on computer the same time" );
|
||||
}
|
||||
|
||||
try
|
||||
{
|
||||
StateCommand command;
|
||||
Event event = null;
|
||||
synchronized( queueLock )
|
||||
interruptedEvent = false;
|
||||
if( machine != null )
|
||||
{
|
||||
command = this.command;
|
||||
this.command = null;
|
||||
|
||||
// If we've no command, pull something from the event queue instead.
|
||||
if( command == null )
|
||||
{
|
||||
if( !isOn )
|
||||
{
|
||||
// We're not on and had no command, but we had work queued. This should never happen, so clear
|
||||
// the event queue just in case.
|
||||
eventQueue.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
event = eventQueue.poll();
|
||||
}
|
||||
}
|
||||
|
||||
if( command != null )
|
||||
{
|
||||
switch( command )
|
||||
{
|
||||
case TURN_ON:
|
||||
if( isOn ) return;
|
||||
turnOn();
|
||||
break;
|
||||
|
||||
case SHUTDOWN:
|
||||
|
||||
if( !isOn ) return;
|
||||
computer.getTerminal().reset();
|
||||
shutdown();
|
||||
break;
|
||||
|
||||
case REBOOT:
|
||||
if( !isOn ) return;
|
||||
computer.getTerminal().reset();
|
||||
shutdown();
|
||||
|
||||
computer.turnOn();
|
||||
break;
|
||||
|
||||
case ABORT:
|
||||
if( !isOn ) return;
|
||||
displayFailure( "Error running computer", TimeoutState.ABORT_MESSAGE );
|
||||
shutdown();
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if( event != null )
|
||||
{
|
||||
resumeMachine( event.name, event.args );
|
||||
resumeMachine( null, null );
|
||||
return;
|
||||
}
|
||||
}
|
||||
finally
|
||||
|
||||
StateCommand command;
|
||||
Event event = null;
|
||||
synchronized( queueLock )
|
||||
{
|
||||
isExecuting.set( false );
|
||||
command = this.command;
|
||||
this.command = null;
|
||||
|
||||
// If we've no command, pull something from the event queue instead.
|
||||
if( command == null )
|
||||
{
|
||||
if( !isOn )
|
||||
{
|
||||
// We're not on and had no command, but we had work queued. This should never happen, so clear
|
||||
// the event queue just in case.
|
||||
eventQueue.clear();
|
||||
return;
|
||||
}
|
||||
|
||||
event = eventQueue.poll();
|
||||
}
|
||||
}
|
||||
|
||||
if( command != null )
|
||||
{
|
||||
switch( command )
|
||||
{
|
||||
case TURN_ON:
|
||||
if( isOn ) return;
|
||||
turnOn();
|
||||
break;
|
||||
|
||||
case SHUTDOWN:
|
||||
|
||||
if( !isOn ) return;
|
||||
computer.getTerminal().reset();
|
||||
shutdown();
|
||||
break;
|
||||
|
||||
case REBOOT:
|
||||
if( !isOn ) return;
|
||||
computer.getTerminal().reset();
|
||||
shutdown();
|
||||
|
||||
computer.turnOn();
|
||||
break;
|
||||
|
||||
case ABORT:
|
||||
if( !isOn ) return;
|
||||
displayFailure( "Error running computer", TimeoutState.ABORT_MESSAGE );
|
||||
shutdown();
|
||||
break;
|
||||
}
|
||||
}
|
||||
else if( event != null )
|
||||
{
|
||||
resumeMachine( event.name, event.args );
|
||||
}
|
||||
}
|
||||
|
||||
@ -601,6 +635,7 @@ final class ComputerExecutor
|
||||
private void resumeMachine( String event, Object[] args ) throws InterruptedException
|
||||
{
|
||||
MachineResult result = machine.handleEvent( event, args );
|
||||
interruptedEvent = result.isPause();
|
||||
if( !result.isError() ) return;
|
||||
|
||||
displayFailure( "Error running computer", result.getMessage() );
|
||||
|
@ -10,11 +10,13 @@ import dan200.computercraft.ComputerCraft;
|
||||
import dan200.computercraft.shared.util.ThreadUtils;
|
||||
|
||||
import javax.annotation.Nonnull;
|
||||
import java.util.concurrent.BlockingQueue;
|
||||
import java.util.concurrent.LinkedBlockingQueue;
|
||||
import java.util.TreeSet;
|
||||
import java.util.concurrent.ThreadFactory;
|
||||
import java.util.concurrent.TimeUnit;
|
||||
import java.util.concurrent.atomic.AtomicReference;
|
||||
import java.util.concurrent.locks.Condition;
|
||||
import java.util.concurrent.locks.LockSupport;
|
||||
import java.util.concurrent.locks.ReentrantLock;
|
||||
|
||||
import static dan200.computercraft.core.computer.TimeoutState.ABORT_TIMEOUT;
|
||||
import static dan200.computercraft.core.computer.TimeoutState.TIMEOUT;
|
||||
@ -23,7 +25,26 @@ import static dan200.computercraft.core.computer.TimeoutState.TIMEOUT;
|
||||
* Responsible for running all tasks from a {@link Computer}.
|
||||
*
|
||||
* This is split into two components: the {@link TaskRunner}s, which pull an executor from the queue and execute it, and
|
||||
* a single {@link Monitor} which observes all runners and kills them if they are behaving badly.
|
||||
* a single {@link Monitor} which observes all runners and kills them if they have not been terminated by
|
||||
* {@link TimeoutState#isSoftAborted()}.
|
||||
*
|
||||
* Computers are executed using a priority system, with those who have spent less time executing having a higher
|
||||
* priority than those hogging the thread. This, combined with {@link TimeoutState#isPaused()} means we can reduce the
|
||||
* risk of badly behaved computers stalling execution for everyone else.
|
||||
*
|
||||
* This is done using an implementation of Linux's Completely Fair Scheduler. When a computer executes, we compute what
|
||||
* share of execution time it has used (time executed/number of tasks). We then pick the computer who has the least
|
||||
* "virtual execution time" (aka {@link ComputerExecutor#virtualRuntime}).
|
||||
*
|
||||
* When adding a computer to the queue, we make sure its "virtual runtime" is at least as big as the smallest runtime.
|
||||
* This means that adding computers which have slept a lot do not then have massive priority over everyone else. See
|
||||
* {@link #queue(ComputerExecutor)} for how this is implemented.
|
||||
*
|
||||
* In reality, it's unlikely that more than a few computers are waiting to execute at once, so this will not have much
|
||||
* effect unless you have a computer hogging execution time. However, it is pretty effective in those situations.
|
||||
*
|
||||
* @see TimeoutState For how hard timeouts are handled.
|
||||
* @see ComputerExecutor For how computers actually do execution.
|
||||
*/
|
||||
public class ComputerThread
|
||||
{
|
||||
@ -34,16 +55,32 @@ public class ComputerThread
|
||||
*/
|
||||
private static final int MONITOR_WAKEUP = 100;
|
||||
|
||||
/**
|
||||
* The target latency between executing two tasks on a single machine.
|
||||
*
|
||||
* An average tick takes 50ms, and so we ideally need to have handled a couple of events within that window in order
|
||||
* to have a perceived low latency.
|
||||
*/
|
||||
private static final long DEFAULT_LATENCY = TimeUnit.MILLISECONDS.toNanos( 50 );
|
||||
|
||||
/**
|
||||
* The minimum value that {@link #DEFAULT_LATENCY} can have when scaled.
|
||||
*
|
||||
* From statistics gathered on SwitchCraft, almost all machines will execute under 15ms, 75% under 1.5ms, with the
|
||||
* mean being about 3ms. Most computers shouldn't be too impacted with having such a short period to execute in.
|
||||
*/
|
||||
private static final long DEFAULT_MIN_PERIOD = TimeUnit.MILLISECONDS.toNanos( 5 );
|
||||
|
||||
/**
|
||||
* The maximum number of tasks before we have to start scaling latency linearly.
|
||||
*/
|
||||
private static final long LATENCY_MAX_TASKS = DEFAULT_LATENCY / DEFAULT_MIN_PERIOD;
|
||||
|
||||
/**
|
||||
* Lock used for modifications to the array of current threads.
|
||||
*/
|
||||
private static final Object threadLock = new Object();
|
||||
|
||||
/**
|
||||
* Active executors to run
|
||||
*/
|
||||
private static final BlockingQueue<ComputerExecutor> computersActive = new LinkedBlockingQueue<>();
|
||||
|
||||
/**
|
||||
* Whether the computer thread system is currently running
|
||||
*/
|
||||
@ -59,6 +96,29 @@ public class ComputerThread
|
||||
*/
|
||||
private static TaskRunner[] runners;
|
||||
|
||||
private static long latency;
|
||||
private static long minPeriod;
|
||||
|
||||
private static final ReentrantLock computerLock = new ReentrantLock();
|
||||
|
||||
private static final Condition hasWork = computerLock.newCondition();
|
||||
|
||||
/**
|
||||
* Active queues to execute
|
||||
*/
|
||||
private static final TreeSet<ComputerExecutor> computerQueue = new TreeSet<>( ( a, b ) -> {
|
||||
if( a == b ) return 0; // Should never happen, but let's be consistent here
|
||||
|
||||
long at = a.virtualRuntime, bt = b.virtualRuntime;
|
||||
if( at == bt ) return Integer.compare( a.hashCode(), b.hashCode() );
|
||||
return Long.compare( at, bt );
|
||||
} );
|
||||
|
||||
/**
|
||||
* The minimum {@link ComputerExecutor#virtualRuntime} time on the tree.
|
||||
*/
|
||||
private static long minimumVirtualRuntime = 0;
|
||||
|
||||
private static final ThreadFactory monitorFactory = ThreadUtils.factory( "Computer-Monitor" );
|
||||
private static final ThreadFactory runnerFactory = ThreadUtils.factory( "Computer-Runner" );
|
||||
|
||||
@ -72,10 +132,16 @@ public class ComputerThread
|
||||
running = true;
|
||||
if( monitor == null || !monitor.isAlive() ) (monitor = monitorFactory.newThread( new Monitor() )).start();
|
||||
|
||||
if( runners == null || runners.length != ComputerCraft.computer_threads )
|
||||
if( runners == null )
|
||||
{
|
||||
// TODO: Resize this + kill old runners and start new ones.
|
||||
// TODO: Change the runners length on config reloads
|
||||
runners = new TaskRunner[ComputerCraft.computer_threads];
|
||||
|
||||
// latency and minPeriod are scaled by 1 + floor(log2(threads)). We can afford to execute tasks for
|
||||
// longer when executing on more than one thread.
|
||||
long factor = 64 - Long.numberOfLeadingZeros( runners.length );
|
||||
latency = DEFAULT_LATENCY * factor;
|
||||
minPeriod = DEFAULT_MIN_PERIOD * factor;
|
||||
}
|
||||
|
||||
for( int i = 0; i < runners.length; i++ )
|
||||
@ -112,18 +178,164 @@ public class ComputerThread
|
||||
}
|
||||
}
|
||||
|
||||
computersActive.clear();
|
||||
computerLock.lock();
|
||||
try
|
||||
{
|
||||
computerQueue.clear();
|
||||
}
|
||||
finally
|
||||
{
|
||||
computerLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Mark a computer as having work, enqueuing it on the thread.
|
||||
* Mark a computer as having work, enqueuing it on the thread
|
||||
*
|
||||
* @param computer The computer to execute work on.
|
||||
* You must be holding {@link ComputerExecutor}'s {@code queueLock} when calling this method - it should only
|
||||
* be called from {@code enqueue}.
|
||||
*
|
||||
* @param executor The computer to execute work on.
|
||||
*/
|
||||
static void queue( @Nonnull ComputerExecutor computer )
|
||||
static void queue( @Nonnull ComputerExecutor executor )
|
||||
{
|
||||
if( !computer.onComputerQueue ) throw new IllegalStateException( "Computer must be on queue" );
|
||||
computersActive.add( computer );
|
||||
computerLock.lock();
|
||||
try
|
||||
{
|
||||
if( executor.onComputerQueue ) throw new IllegalStateException( "Cannot queue already queued executor" );
|
||||
executor.onComputerQueue = true;
|
||||
|
||||
updateRuntimes();
|
||||
|
||||
// We're not currently on the queue, so update its current execution time to
|
||||
// ensure its at least as high as the minimum.
|
||||
long newRuntime = minimumVirtualRuntime;
|
||||
|
||||
if( executor.virtualRuntime == 0 )
|
||||
{
|
||||
// Slow down new computers a little bit.
|
||||
newRuntime += scaledPeriod();
|
||||
}
|
||||
else
|
||||
{
|
||||
// Give a small boost to computers which have slept a little.
|
||||
newRuntime -= latency / 2;
|
||||
}
|
||||
|
||||
executor.virtualRuntime = Math.max( newRuntime, executor.virtualRuntime );
|
||||
|
||||
// Add to the queue, and signal the workers.
|
||||
computerQueue.add( executor );
|
||||
hasWork.signal();
|
||||
}
|
||||
finally
|
||||
{
|
||||
computerLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Update the {@link ComputerExecutor#virtualRuntime}s of all running tasks, and then update the
|
||||
* {@link #minimumVirtualRuntime} based on the current tasks.
|
||||
*
|
||||
* This is called before queueing tasks, to ensure that {@link #minimumVirtualRuntime} is up-to-date.
|
||||
*/
|
||||
private static void updateRuntimes()
|
||||
{
|
||||
long minRuntime = Long.MAX_VALUE;
|
||||
|
||||
// If we've a task on the queue, use that as our base time.
|
||||
if( !computerQueue.isEmpty() ) minRuntime = computerQueue.first().virtualRuntime;
|
||||
|
||||
// Update all the currently executing tasks
|
||||
TaskRunner[] currentRunners = runners;
|
||||
if( currentRunners != null )
|
||||
{
|
||||
long now = System.nanoTime();
|
||||
int tasks = 1 + computerQueue.size();
|
||||
|
||||
for( TaskRunner runner : currentRunners )
|
||||
{
|
||||
if( runner == null ) continue;
|
||||
ComputerExecutor executor = runner.currentExecutor.get();
|
||||
if( executor == null ) continue;
|
||||
|
||||
// We do two things here: first we update the task's virtual runtime based on when we
|
||||
// last checked, and then we check the minimum.
|
||||
minRuntime = Math.min( minRuntime, executor.virtualRuntime += (now - executor.vRuntimeStart) / tasks );
|
||||
executor.vRuntimeStart = now;
|
||||
}
|
||||
}
|
||||
|
||||
if( minRuntime > minimumVirtualRuntime && minRuntime < Long.MAX_VALUE )
|
||||
{
|
||||
minimumVirtualRuntime = minRuntime;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Ensure the "currently working" state of the executor is reset, the timings are updated, and then requeue the
|
||||
* executor if needed.
|
||||
*
|
||||
* @param runner The runner this task was on.
|
||||
* @param executor The executor to requeue
|
||||
*/
|
||||
private static void afterWork( TaskRunner runner, ComputerExecutor executor )
|
||||
{
|
||||
// Clear the executor's thread.
|
||||
Thread currentThread = executor.executingThread.getAndSet( null );
|
||||
if( currentThread != runner.owner )
|
||||
{
|
||||
ComputerCraft.log.error(
|
||||
"Expected computer #{} to be running on {}, but already running on {}. This is a SERIOUS bug, please report with your debug.log.",
|
||||
executor.getComputer().getID(), runner.owner.getName(), currentThread == null ? "nothing" : currentThread.getName()
|
||||
);
|
||||
}
|
||||
|
||||
computerLock.lock();
|
||||
try
|
||||
{
|
||||
// Update the virtual runtime of this task.
|
||||
long now = System.nanoTime();
|
||||
executor.virtualRuntime += (now - executor.vRuntimeStart) / (1 + computerQueue.size());
|
||||
|
||||
// If we've no more tasks, just return.
|
||||
if( !executor.afterWork() ) return;
|
||||
|
||||
// Otherwise, add to the queue, and signal any waiting workers.
|
||||
computerQueue.add( executor );
|
||||
hasWork.signal();
|
||||
}
|
||||
finally
|
||||
{
|
||||
computerLock.unlock();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* The scaled period for a single task
|
||||
*
|
||||
* @return The scaled period for the task
|
||||
* @see #DEFAULT_LATENCY
|
||||
* @see #DEFAULT_MIN_PERIOD
|
||||
* @see #LATENCY_MAX_TASKS
|
||||
*/
|
||||
static long scaledPeriod()
|
||||
{
|
||||
// +1 to include the current task
|
||||
int count = 1 + computerQueue.size();
|
||||
return count < LATENCY_MAX_TASKS ? latency / count : minPeriod;
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if the thread has computers queued up
|
||||
*
|
||||
* @return If we have work queued up.
|
||||
*/
|
||||
static boolean hasPendingWork()
|
||||
{
|
||||
return computerQueue.size() > 0;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -158,7 +370,7 @@ public class ComputerThread
|
||||
|
||||
// If we're still within normal execution times (TIMEOUT) or soft abort (ABORT_TIMEOUT),
|
||||
// then we can let the Lua machine do its work.
|
||||
long afterStart = executor.timeout.nanoSinceStart();
|
||||
long afterStart = executor.timeout.nanoCumulative();
|
||||
long afterHardAbort = afterStart - TIMEOUT - ABORT_TIMEOUT;
|
||||
if( afterHardAbort < 0 ) continue;
|
||||
|
||||
@ -166,15 +378,23 @@ public class ComputerThread
|
||||
executor.timeout.hardAbort();
|
||||
executor.abort();
|
||||
|
||||
if( afterHardAbort >= ABORT_TIMEOUT * 2 )
|
||||
if( afterHardAbort >= ABORT_TIMEOUT )
|
||||
{
|
||||
// If we've hard aborted but we're still not dead, dump the stack trace and interrupt
|
||||
// the task.
|
||||
timeoutTask( executor, runner.owner, afterStart );
|
||||
runner.owner.interrupt();
|
||||
}
|
||||
else if( afterHardAbort >= ABORT_TIMEOUT * 2 )
|
||||
{
|
||||
// If we've hard aborted and interrupted, and we're still not dead, then mark the runner
|
||||
// as dead, finish off the task, and spawn a new runner.
|
||||
// Note, we'll do the actual interruption of the thread in the next block.
|
||||
timeoutTask( executor, runner.owner, afterStart );
|
||||
runner.running = false;
|
||||
runner.owner.interrupt();
|
||||
|
||||
ComputerExecutor thisExecutor = runner.currentExecutor.getAndSet( null );
|
||||
if( thisExecutor != null ) executor.afterWork();
|
||||
if( thisExecutor != null ) afterWork( runner, executor );
|
||||
|
||||
synchronized( threadLock )
|
||||
{
|
||||
@ -184,14 +404,6 @@ public class ComputerThread
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if( afterHardAbort >= ABORT_TIMEOUT )
|
||||
{
|
||||
// If we've hard aborted but we're still not dead, dump the stack trace and interrupt
|
||||
// the task.
|
||||
timeoutTask( executor, runner.owner, afterStart );
|
||||
runner.owner.interrupt();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -203,7 +415,7 @@ public class ComputerThread
|
||||
}
|
||||
|
||||
/**
|
||||
* Pulls tasks from the {@link #computersActive} queue and runs them.
|
||||
* Pulls tasks from the {@link #computerQueue} queue and runs them.
|
||||
*
|
||||
* This is responsible for running the {@link ComputerExecutor#work()}, {@link ComputerExecutor#beforeWork()} and
|
||||
* {@link ComputerExecutor#afterWork()} functions. Everything else is either handled by the executor, timeout
|
||||
@ -221,13 +433,24 @@ public class ComputerThread
|
||||
{
|
||||
owner = Thread.currentThread();
|
||||
|
||||
tasks:
|
||||
while( running && ComputerThread.running )
|
||||
{
|
||||
// Wait for an active queue to execute
|
||||
ComputerExecutor executor;
|
||||
try
|
||||
{
|
||||
executor = computersActive.take();
|
||||
computerLock.lockInterruptibly();
|
||||
try
|
||||
{
|
||||
while( computerQueue.isEmpty() ) hasWork.await();
|
||||
executor = computerQueue.pollFirst();
|
||||
assert executor != null : "hasWork should ensure we never receive null work";
|
||||
}
|
||||
finally
|
||||
{
|
||||
computerLock.unlock();
|
||||
}
|
||||
}
|
||||
catch( InterruptedException ignored )
|
||||
{
|
||||
@ -236,11 +459,29 @@ public class ComputerThread
|
||||
continue;
|
||||
}
|
||||
|
||||
// Pull a task from this queue, and set what we're currently executing.
|
||||
// If we're trying to executing some task on this computer while someone else is doing work, something
|
||||
// is seriously wrong.
|
||||
while( !executor.executingThread.compareAndSet( null, owner ) )
|
||||
{
|
||||
Thread existing = executor.executingThread.get();
|
||||
if( existing != null )
|
||||
{
|
||||
ComputerCraft.log.error(
|
||||
"Trying to run computer #{} on thread {}, but already running on {}. This is a SERIOUS bug, please report with your debug.log.",
|
||||
executor.getComputer().getID(), owner.getName(), existing.getName()
|
||||
);
|
||||
continue tasks;
|
||||
}
|
||||
}
|
||||
|
||||
// Reset the timers
|
||||
executor.beforeWork();
|
||||
|
||||
// And then set the current executor. It's important to do it afterwards, as otherwise we introduce
|
||||
// race conditions with the monitor.
|
||||
currentExecutor.set( executor );
|
||||
|
||||
// Execute the task
|
||||
executor.beforeWork();
|
||||
try
|
||||
{
|
||||
executor.work();
|
||||
@ -252,19 +493,19 @@ public class ComputerThread
|
||||
finally
|
||||
{
|
||||
ComputerExecutor thisExecutor = currentExecutor.getAndSet( null );
|
||||
if( thisExecutor != null ) executor.afterWork();
|
||||
if( thisExecutor != null ) afterWork( this, executor );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private static void timeoutTask( ComputerExecutor executor, Thread thread, long nanotime )
|
||||
private static void timeoutTask( ComputerExecutor executor, Thread thread, long time )
|
||||
{
|
||||
if( !ComputerCraft.logPeripheralErrors ) return;
|
||||
|
||||
StringBuilder builder = new StringBuilder()
|
||||
.append( "Terminating computer #" ).append( executor.getComputer().getID() )
|
||||
.append( " due to timeout (running for " ).append( nanotime / 1e9 )
|
||||
.append( " due to timeout (running for " ).append( time * 1e-9 )
|
||||
.append( " seconds). This is NOT a bug, but may mean a computer is misbehaving. " )
|
||||
.append( thread.getName() )
|
||||
.append( " is currently " )
|
||||
|
@ -6,6 +6,9 @@
|
||||
|
||||
package dan200.computercraft.core.computer;
|
||||
|
||||
import dan200.computercraft.core.lua.ILuaMachine;
|
||||
import dan200.computercraft.core.lua.MachineResult;
|
||||
|
||||
import java.util.concurrent.TimeUnit;
|
||||
|
||||
/**
|
||||
@ -22,8 +25,13 @@ import java.util.concurrent.TimeUnit;
|
||||
* abort ({@link #ABORT_TIMEOUT}), we trigger a hard abort (note, this is done from the computer thread manager). This
|
||||
* will destroy the entire Lua runtime and shut the computer down.
|
||||
*
|
||||
* The Lua runtime is also allowed to pause execution if there are other computers contesting for work. All computers
|
||||
* are allowed to run for {@link ComputerThread#scaledPeriod()} nanoseconds (see {@link #currentDeadline}). After that
|
||||
* period, if any computers are waiting to be executed then we'll set the paused flag to true ({@link #isPaused()}.
|
||||
*
|
||||
* @see ComputerThread
|
||||
* @see dan200.computercraft.core.lua.ILuaMachine
|
||||
* @see ILuaMachine
|
||||
* @see MachineResult#isPause()
|
||||
*/
|
||||
public final class TimeoutState
|
||||
{
|
||||
@ -37,16 +45,68 @@ public final class TimeoutState
|
||||
*/
|
||||
static final long ABORT_TIMEOUT = TimeUnit.MILLISECONDS.toNanos( 1500 );
|
||||
|
||||
/**
|
||||
* The error message to display when we trigger an abort.
|
||||
*/
|
||||
public static final String ABORT_MESSAGE = "Too long without yielding";
|
||||
|
||||
private volatile boolean softAbort;
|
||||
private boolean paused;
|
||||
private boolean softAbort;
|
||||
private volatile boolean hardAbort;
|
||||
|
||||
private long nanoTime;
|
||||
/**
|
||||
* When the cumulative time would have started had the whole event been processed in one go.
|
||||
*/
|
||||
private long cumulativeStart;
|
||||
|
||||
long nanoSinceStart()
|
||||
/**
|
||||
* How much cumulative time has elapsed. This is effectively {@code cumulativeStart - currentStart}.
|
||||
*/
|
||||
private long cumulativeElapsed;
|
||||
|
||||
/**
|
||||
* When this execution round started.
|
||||
*/
|
||||
private long currentStart;
|
||||
|
||||
/**
|
||||
* When this execution round should look potentially be paused.
|
||||
*/
|
||||
private long currentDeadline;
|
||||
|
||||
long nanoCumulative()
|
||||
{
|
||||
return System.nanoTime() - nanoTime;
|
||||
return System.nanoTime() - cumulativeStart;
|
||||
}
|
||||
|
||||
long nanoCurrent()
|
||||
{
|
||||
return System.nanoTime() - currentStart;
|
||||
}
|
||||
|
||||
/**
|
||||
* Recompute the {@link #isSoftAborted()} and {@link #isPaused()} flags.
|
||||
*/
|
||||
public void refresh()
|
||||
{
|
||||
// Important: The weird arithmetic here is important, as nanoTime may return negative values, and so we
|
||||
// need to handle overflow.
|
||||
long now = System.nanoTime();
|
||||
if( !paused ) paused = currentDeadline - now <= 0 && ComputerThread.hasPendingWork(); // now >= currentDeadline
|
||||
if( !softAbort ) softAbort = (now - cumulativeStart - TIMEOUT) >= 0; // now - cumulativeStart >= TIMEOUT
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether we should pause execution of this machine.
|
||||
*
|
||||
* This is determined by whether we've consumed our time slice, and if there are other computers waiting to perform
|
||||
* work.
|
||||
*
|
||||
* @return Whether we should pause execution.
|
||||
*/
|
||||
public boolean isPaused()
|
||||
{
|
||||
return paused;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -54,7 +114,7 @@ public final class TimeoutState
|
||||
*/
|
||||
public boolean isSoftAborted()
|
||||
{
|
||||
return softAbort || (softAbort = (System.nanoTime() - nanoTime) >= TIMEOUT);
|
||||
return softAbort;
|
||||
}
|
||||
|
||||
/**
|
||||
@ -74,11 +134,35 @@ public final class TimeoutState
|
||||
}
|
||||
|
||||
/**
|
||||
* Reset all abort flags and start the abort timer.
|
||||
* Start the current and cumulative timers again.
|
||||
*/
|
||||
void reset()
|
||||
void startTimer()
|
||||
{
|
||||
softAbort = hardAbort = false;
|
||||
nanoTime = System.nanoTime();
|
||||
long now = System.nanoTime();
|
||||
currentStart = now;
|
||||
currentDeadline = now + ComputerThread.scaledPeriod();
|
||||
// Compute the "nominal start time".
|
||||
cumulativeStart = now - cumulativeElapsed;
|
||||
}
|
||||
|
||||
/**
|
||||
* Pauses the cumulative time, to be resumed by {@link #startTimer()}
|
||||
*
|
||||
* @see #nanoCumulative()
|
||||
*/
|
||||
void pauseTimer()
|
||||
{
|
||||
// We set the cumulative time to difference between current time and "nominal start time".
|
||||
cumulativeElapsed = System.nanoTime() - cumulativeStart;
|
||||
paused = false;
|
||||
}
|
||||
|
||||
/**
|
||||
* Resets the cumulative time and resets the abort flags.
|
||||
*/
|
||||
void stopTimer()
|
||||
{
|
||||
cumulativeElapsed = 0;
|
||||
paused = softAbort = hardAbort = false;
|
||||
}
|
||||
}
|
||||
|
@ -40,6 +40,8 @@ import java.util.concurrent.TimeUnit;
|
||||
import static org.squiddev.cobalt.Constants.NONE;
|
||||
import static org.squiddev.cobalt.ValueFactory.valueOf;
|
||||
import static org.squiddev.cobalt.ValueFactory.varargsOf;
|
||||
import static org.squiddev.cobalt.debug.DebugFrame.FLAG_HOOKED;
|
||||
import static org.squiddev.cobalt.debug.DebugFrame.FLAG_HOOKYIELD;
|
||||
|
||||
public class CobaltLuaMachine implements ILuaMachine
|
||||
{
|
||||
@ -165,6 +167,7 @@ public class CobaltLuaMachine implements ILuaMachine
|
||||
}
|
||||
|
||||
// If the soft abort has been cleared then we can reset our flag.
|
||||
timeout.refresh();
|
||||
if( !timeout.isSoftAborted() ) debug.thrownSoftAbort = false;
|
||||
|
||||
try
|
||||
@ -175,8 +178,13 @@ public class CobaltLuaMachine implements ILuaMachine
|
||||
resumeArgs = varargsOf( valueOf( eventName ), toValues( arguments ) );
|
||||
}
|
||||
|
||||
Varargs results = LuaThread.run( m_mainRoutine, resumeArgs );
|
||||
// Resume the current thread, or the main one when first starting off.
|
||||
LuaThread thread = m_state.getCurrentThread();
|
||||
if( thread == null || thread == m_state.getMainThread() ) thread = m_mainRoutine;
|
||||
|
||||
Varargs results = LuaThread.run( thread, resumeArgs );
|
||||
if( timeout.isHardAborted() ) throw HardAbortError.INSTANCE;
|
||||
if( results == null ) return MachineResult.PAUSE;
|
||||
|
||||
LuaValue filter = results.first();
|
||||
m_eventFilter = filter.isString() ? filter.toString() : null;
|
||||
@ -620,6 +628,10 @@ public class CobaltLuaMachine implements ILuaMachine
|
||||
private int count = 0;
|
||||
boolean thrownSoftAbort;
|
||||
|
||||
private boolean isPaused;
|
||||
private int oldFlags;
|
||||
private boolean oldInHook;
|
||||
|
||||
TimeoutDebugHandler()
|
||||
{
|
||||
this.timeout = CobaltLuaMachine.this.timeout;
|
||||
@ -628,8 +640,32 @@ public class CobaltLuaMachine implements ILuaMachine
|
||||
@Override
|
||||
public void onInstruction( DebugState ds, DebugFrame di, int pc ) throws LuaError, UnwindThrowable
|
||||
{
|
||||
// We check our current abort state every 128 instructions.
|
||||
if( (count = (count + 1) & 127) == 0 ) handleAbort();
|
||||
di.pc = pc;
|
||||
|
||||
if( isPaused ) resetPaused( ds, di );
|
||||
|
||||
// We check our current pause/abort state every 128 instructions.
|
||||
if( (count = (count + 1) & 127) == 0 )
|
||||
{
|
||||
// If we've been hard aborted or closed then abort.
|
||||
if( timeout.isHardAborted() || m_state == null ) throw HardAbortError.INSTANCE;
|
||||
|
||||
timeout.refresh();
|
||||
if( timeout.isPaused() )
|
||||
{
|
||||
// Preserve the current state
|
||||
isPaused = true;
|
||||
oldInHook = ds.inhook;
|
||||
oldFlags = di.flags;
|
||||
|
||||
// Suspend the state. This will probably throw, but we need to handle the case where it won't.
|
||||
di.flags |= FLAG_HOOKYIELD | FLAG_HOOKED;
|
||||
LuaThread.suspend( ds.getLuaState() );
|
||||
resetPaused( ds, di );
|
||||
}
|
||||
|
||||
handleSoftAbort();
|
||||
}
|
||||
|
||||
super.onInstruction( ds, di, pc );
|
||||
}
|
||||
@ -637,14 +673,25 @@ public class CobaltLuaMachine implements ILuaMachine
|
||||
@Override
|
||||
public void poll() throws LuaError
|
||||
{
|
||||
handleAbort();
|
||||
// If we've been hard aborted or closed then abort.
|
||||
LuaState state = m_state;
|
||||
if( timeout.isHardAborted() || state == null ) throw HardAbortError.INSTANCE;
|
||||
|
||||
timeout.refresh();
|
||||
if( timeout.isPaused() ) LuaThread.suspendBlocking( state );
|
||||
handleSoftAbort();
|
||||
}
|
||||
|
||||
private void handleAbort() throws LuaError
|
||||
private void resetPaused( DebugState ds, DebugFrame di )
|
||||
{
|
||||
// If we've been hard aborted or closed then abort.
|
||||
if( timeout.isHardAborted() || m_state == null ) throw HardAbortError.INSTANCE;
|
||||
// Restore the previous paused state
|
||||
isPaused = false;
|
||||
ds.inhook = oldInHook;
|
||||
di.flags = oldFlags;
|
||||
}
|
||||
|
||||
private void handleSoftAbort() throws LuaError
|
||||
{
|
||||
// If we already thrown our soft abort error then don't do it again.
|
||||
if( !timeout.isSoftAborted() || thrownSoftAbort ) return;
|
||||
|
||||
|
@ -23,37 +23,44 @@ import java.io.InputStream;
|
||||
public final class MachineResult
|
||||
{
|
||||
/**
|
||||
* Represents a successful execution
|
||||
* A successful complete execution.
|
||||
*/
|
||||
public static final MachineResult OK = new MachineResult( false, null );
|
||||
public static final MachineResult OK = new MachineResult( false, false, null );
|
||||
|
||||
/**
|
||||
* A successful paused execution.
|
||||
*/
|
||||
public static final MachineResult PAUSE = new MachineResult( false, true, null );
|
||||
|
||||
/**
|
||||
* An execution which timed out.
|
||||
*/
|
||||
public static final MachineResult TIMEOUT = new MachineResult( true, TimeoutState.ABORT_MESSAGE );
|
||||
public static final MachineResult TIMEOUT = new MachineResult( true, false, TimeoutState.ABORT_MESSAGE );
|
||||
|
||||
/**
|
||||
* An error with no user-friendly error message
|
||||
*/
|
||||
public static final MachineResult GENERIC_ERROR = new MachineResult( true, null );
|
||||
public static final MachineResult GENERIC_ERROR = new MachineResult( true, false, null );
|
||||
|
||||
private final boolean error;
|
||||
private final boolean pause;
|
||||
private final String message;
|
||||
|
||||
private MachineResult( boolean error, String message )
|
||||
private MachineResult( boolean error, boolean pause, String message )
|
||||
{
|
||||
this.pause = pause;
|
||||
this.message = message;
|
||||
this.error = error;
|
||||
}
|
||||
|
||||
public static MachineResult error( @Nonnull String error )
|
||||
{
|
||||
return new MachineResult( true, error );
|
||||
return new MachineResult( true, false, error );
|
||||
}
|
||||
|
||||
public static MachineResult error( @Nonnull Exception error )
|
||||
{
|
||||
return new MachineResult( true, error.getMessage() );
|
||||
return new MachineResult( true, false, error.getMessage() );
|
||||
}
|
||||
|
||||
public boolean isError()
|
||||
@ -61,6 +68,11 @@ public final class MachineResult
|
||||
return error;
|
||||
}
|
||||
|
||||
public boolean isPause()
|
||||
{
|
||||
return pause;
|
||||
}
|
||||
|
||||
@Nullable
|
||||
public String getMessage()
|
||||
{
|
||||
|
@ -106,7 +106,7 @@ public class Config
|
||||
computerThreads = config.get( CATEGORY_GENERAL, "computer_threads", ComputerCraft.computer_threads );
|
||||
computerThreads
|
||||
.setMinValue( 1 )
|
||||
.setRequiresWorldRestart( true )
|
||||
.setRequiresMcRestart( true )
|
||||
.setComment( "Set the number of threads computers can run on. A higher number means more computers can run at once, but may induce lag.\n" +
|
||||
"Please note that some mods may not work with a thread count higher than 1. Use with caution." );
|
||||
|
||||
|
@ -84,10 +84,10 @@ public class ComputerBootstrap
|
||||
|
||||
if( computer.isOn() || !api.didAssert )
|
||||
{
|
||||
StringBuilder builder = new StringBuilder().append( "Did not correctly" );
|
||||
StringBuilder builder = new StringBuilder().append( "Did not correctly [" );
|
||||
if( !api.didAssert ) builder.append( " assert" );
|
||||
if( computer.isOn() ) builder.append( " shutdown" );
|
||||
builder.append( "\n" );
|
||||
builder.append( " ]\n" );
|
||||
|
||||
for( int line = 0; line < 19; line++ )
|
||||
{
|
||||
|
Loading…
x
Reference in New Issue
Block a user