Compare commits

..

No commits in common. "master" and "master" have entirely different histories.

6 changed files with 93 additions and 226 deletions

View File

@ -1,7 +0,0 @@
Copyright © 2021 osmarks
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1 +1 @@
-d:ssl --threads:on -d:ssl

View File

@ -12,5 +12,4 @@ bin = @["onstat"]
requires "nim >= 1.4.2" requires "nim >= 1.4.2"
requires "https://github.com/GULPF/tiny_sqlite#8fe760d9" requires "https://github.com/GULPF/tiny_sqlite#8fe760d9"
requires "karax >= 1.2.1" requires "karax >= 1.2.1"
requires "cligen >= 1" requires "cligen >= 1"
requires "imageman >= 0.8"

View File

@ -3,34 +3,21 @@ import options
let migrations: seq[string] = @[ let migrations: seq[string] = @[
""" """
CREATE TABLE sites ( CREATE TABLE sites (
sid INTEGER PRIMARY KEY, sid INTEGER PRIMARY KEY,
url TEXT NOT NULL url TEXT NOT NULL
); );
CREATE TABLE reqs ( CREATE TABLE reqs (
rid INTEGER PRIMARY KEY, rid INTEGER PRIMARY KEY,
site INTEGER NOT NULL REFERENCES sites(sid), site INTEGER NOT NULL REFERENCES sites(sid),
timestamp INTEGER NOT NULL, timestamp INTEGER NOT NULL,
status INTEGER NOT NULL, status INTEGER NOT NULL,
latency INTEGER NOT NULL latency INTEGER NOT NULL
); );
""", """,
""" """
CREATE INDEX req_ts_idx ON reqs (timestamp); CREATE INDEX req_ts_idx ON reqs (timestamp);
""",
# rolling total/successful ping and latency count
# rc_data_since holds the older end of the interval the counters are from
# this slightly horribly migrates the existing data using a hardcoded 1 week window
"""
ALTER TABLE sites ADD COLUMN rc_total INTEGER NOT NULL DEFAULT 0;
ALTER TABLE sites ADD COLUMN rc_success INTEGER NOT NULL DEFAULT 0;
ALTER TABLE sites ADD COLUMN rc_latency INTEGER NOT NULL DEFAULT 0;
ALTER TABLE sites ADD COLUMN rc_data_since INTEGER;
UPDATE sites SET rc_total = (SELECT COUNT(*) FROM reqs WHERE site = sid AND timestamp >= (strftime('%s') - (86400*7)) * 1000000);
UPDATE sites SET rc_success = (SELECT SUM(status <= 0) FROM reqs WHERE site = sid AND timestamp >= (strftime('%s') - (86400*7)) * 1000000);
UPDATE sites SET rc_latency = (SELECT SUM(latency) FROM reqs WHERE site = sid AND timestamp >= (strftime('%s') - (86400*7)) * 1000000);
UPDATE sites SET rc_data_since = (strftime('%s') - (86400*7)) * 1000000;
""" """
] ]

View File

@ -10,12 +10,8 @@ import sugar
import net import net
import sequtils import sequtils
import strformat import strformat
import strutils import std/exitprocs
import cligen import cligen
import imageman
import math
import hashes
import tables
import ./db import ./db
@ -23,6 +19,19 @@ macro includeFile(x: string): string = newStrLitNode(readFile(x.strVal))
const css = includeFile("./src/style.css") const css = includeFile("./src/style.css")
var threadDB {.threadvar.}: Option[DbConn]
proc getDB(): DbConn {.gcsafe.} =
if isNone threadDB:
let x = openDatabase("./monitoring.sqlite3")
x.exec("PRAGMA journal_mode=WAL")
proc closeDB() =
try: close(x)
except: discard
addExitProc(closeDB)
when declared(onThreadDestroy): onThreadDestroy(closeDB)
threadDB = some x
get threadDB
func timeToTimestamp*(t: Time): int64 = toUnix(t) * 1000000 + (nanosecond(t) div 1000) func timeToTimestamp*(t: Time): int64 = toUnix(t) * 1000000 + (nanosecond(t) div 1000)
func timestampToTime*(ts: int64): Time = initTime(ts div 1000000, (ts mod 1000000) * 1000) func timestampToTime*(ts: int64): Time = initTime(ts div 1000000, (ts mod 1000000) * 1000)
@ -30,48 +39,42 @@ proc toDbValue(t: Time): DbValue = DbValue(kind: sqliteInteger, intVal: timeToTi
proc fromDbValue(value: DbValue, T: typedesc[Time]): Time = timestampToTime(value.intVal) proc fromDbValue(value: DbValue, T: typedesc[Time]): Time = timestampToTime(value.intVal)
type type
ResponseType {.pure.} = enum ResponseType = enum
HttpTeapot = -1 rtHttpTeapot = -1
Ok = 0 rtOk = 0
HttpError = 1 rtHttpError = 1
Timeout = 2 rtTimeout = 2
FetchError = 3 rtFetchError = 3
Response = object Response = object
rtype: ResponseType rtype: ResponseType
latency: int64 # microseconds latency: int64 # microseconds
SiteStatus = object SiteStatus = object
id: int
url: string url: string
lastPing: Time lastPing: Time
lastResponse: ResponseType lastResponse: ResponseType
lastLatency: float lastLatency: float
uptimePercent: float uptimePercent: float
averageLatency: float
Ctx = object
db: DbConn
dbPath: string
images: TableRef[int, (seq[byte], int)]
interval: int
proc fetchLatest(ctx: Ctx, row: ResultRow): Option[SiteStatus] = proc uptimeSince(sid: int, time: Time): float =
let okPings = fromDbValue(get getDB().value("SELECT COUNT(*) FROM reqs WHERE site = ? AND status <= 0", sid), int)
let totalPings = fromDbValue(get getDB().value("SELECT COUNT(*) FROM reqs WHERE site = ?", sid), int)
okPings / totalPings
proc fetchLatest(row: ResultRow): Option[SiteStatus] =
let weekAgo = getTime() + initTimeInterval(weeks= -1) let weekAgo = getTime() + initTimeInterval(weeks= -1)
let (site, url, rollingTotalPings, rollingSuccessfulPings, rollingLatency, rollingDataSince) = row.unpack((int, string, int64, int64, int64, Option[int64])) let (site, url) = row.unpack((int, string))
# work around bizarre SQLite query planner issue - it appears that if it has a literal value to compare site against it generates very fast VM code let row = getDB().one("SELECT timestamp, status, latency FROM reqs WHERE site = ? ORDER BY timestamp DESC LIMIT 1", site)
# but if it has a prepared state parameter it somehow refuses to use the index
let row = ctx.db.one("SELECT timestamp, status, latency FROM reqs WHERE site = -1 OR site = ? ORDER BY timestamp DESC LIMIT 1", site)
if isNone row: return none(SiteStatus) if isNone row: return none(SiteStatus)
let (ts, status, latency) = (get row).unpack((Time, int, int)) let (ts, status, latency) = (get row).unpack((Time, int, int))
some SiteStatus(url: url, lastPing: ts, lastResponse: ResponseType(status), lastLatency: float(latency) / 1e3, id: site, some SiteStatus(url: url, lastPing: ts, lastResponse: ResponseType(status), lastLatency: float64(latency) / 1e3, uptimePercent: uptimeSince(site, weekAgo))
uptimePercent: float(rollingSuccessfulPings) / float(rollingTotalPings), averageLatency: float(rollingLatency) / float(rollingTotalPings) / 1e3)
proc mainPage(ctx: Ctx): string = proc mainPage(): string =
let sites = ctx.db.all("SELECT * FROM sites ORDER BY sid").map(x => ctx.fetchLatest(x)).filter(x => x.isSome).map(x => x.get) let sites = getDB().all("SELECT * FROM sites ORDER BY sid").map(fetchLatest).filter(x => isSome x).map(x => get x)
let up = sites.filter(x => int(x.lastResponse) <= 0).len() let up = sites.filter(x => int(x.lastResponse) <= 0).len()
let vnode = buildHtml(html()): let vnode = buildHtml(html()):
head: head:
meta(charset="utf8") meta(charset="utf8")
meta(http-equiv="refresh", content="60") meta(http-equiv="refresh", content="60")
meta(name="viewport", content="width=device-width, initial-scale=1")
title: text &"{up}/{sites.len} up - OnStat" title: text &"{up}/{sites.len} up - OnStat"
style: text css style: text css
body: body:
@ -79,25 +82,23 @@ proc mainPage(ctx: Ctx): string =
h2(class="title"): text &"{up}/{sites.len} up" h2(class="title"): text &"{up}/{sites.len} up"
for site in sites: for site in sites:
tdiv(class="card " & $site.lastResponse): tdiv(class="card " & $site.lastResponse):
tdiv(class="left"): h2:
h2: case site.lastResponse
case site.lastResponse of rtOk: text ""
of ResponseType.Ok: text "" of rtHttpError: text ""
of ResponseType.HttpError: text "" of rtTimeout: text ""
of ResponseType.Timeout: text "" of rtFetchError: text ""
of ResponseType.FetchError: text "" of rtHttpTeapot: text "🫖 "
of ResponseType.HttpTeapot: text "🫖 " text site.url
text site.url tdiv: text("Last pinged " & format(site.lastPing, "HH:mm:ss dd-MM-yyyy"))
tdiv: text("Last pinged " & format(site.lastPing, "HH:mm:ss dd-MM-yyyy")) tdiv:
tdiv: case site.lastResponse
case site.lastResponse of rtOk: text &"Latency {site.lastLatency}ms"
of ResponseType.Ok: text &"Latency {site.lastLatency}ms" of rtHttpError: text "HTTP error"
of ResponseType.HttpError: text "HTTP error" of rtHttpTeapot: text &"Teapot, latency {site.lastLatency}ms"
of ResponseType.HttpTeapot: text &"Teapot, latency {site.lastLatency:.5f}ms" of rtTimeout: text "Timed out"
of ResponseType.Timeout: text "Timed out" of rtFetchError: text "Fetch failed"
of ResponseType.FetchError: text "Fetch failed" tdiv: text &"{site.uptimePercent * 100}% up in last week"
tdiv: text &"{site.uptimePercent * 100:.5f}% up, {site.averageLatency:.5f}ms latency in last week"
if site.id in ctx.images: img(src= &"/vis/{site.id}", class="right", title= &"{site.url} 12-week status visualization")
hr() hr()
small: small:
text "made by " text "made by "
@ -107,158 +108,53 @@ proc mainPage(ctx: Ctx): string =
text "." text "."
$vnode $vnode
var imageReturnChannel: Channel[(int, seq[byte])] proc onRequest(req: Request) {.async.} =
if req.reqMethod == HttpGet:
case req.url.path
of "/": await req.respond(Http200, mainPage(), headers=newHttpHeaders([("Content-Type", "text/html")]))
else: await req.respond(Http404, "not found")
else:
await req.respond(Http404, "not found")
proc readIntoContext(ctx: Ctx) = proc pollTarget(s: string): Future[Response] {.async.} =
# this is a horrible workaround to avoid having to something something shared hash table
var available = true
while available:
let (av, data) = imageReturnChannel.tryRecv()
available = av
if available:
let (id, image) = data
ctx.images[id] = (image, image.hash)
proc onRequest(ctx: Ctx): (proc(req: Request): Future[void] {.gcsafe.}) =
result = proc(req: Request) {.async.} =
readIntoContext(ctx)
if req.reqMethod == HttpGet:
var path = req.url.path
if path == "/":
await req.respond(Http200, mainPage(ctx), headers=newHttpHeaders([("Content-Type", "text/html")]))
elif path.startsWith("/vis/"):
path.removePrefix("/vis/")
var id = 0
try:
id = parseInt path
except:
await req.respond(Http404, "not found")
return
if id in ctx.images:
let (image, hash) = ctx.images[id]
let etag = &"\"{hash}\""
if etag == req.headers.getOrDefault("if-none-match"):
await req.respond(Http304, "")
else:
await req.respond(Http200, cast[string](image), headers=newHttpHeaders([
("Content-Type", "image/png"), ("ETag", etag)]))
else: await req.respond(Http404, "not found")
else: await req.respond(Http404, "not found")
else:
await req.respond(Http405, "GET only")
proc pollTarget(ctx: Ctx, s: string): Future[Response] {.async.} =
var client = newAsyncHttpClient() var client = newAsyncHttpClient()
var x = Response(rtype: ResponseType.Timeout, latency: 0) var x = Response(rtype: rtTimeout, latency: 0)
proc doFetch() {.async.} = proc doFetch() {.async.} =
let ts = now().utc let ts = now().utc
let res = await client.get(s) let res = await client.get(s)
let latency = (now().utc - ts).inMicroseconds let latency = (now().utc - ts).inMicroseconds
if res.code.int == 418: x = Response(rtype: ResponseType.HttpTeapot, latency: latency) if res.code.int == 418: x = Response(rtype: rtHttpTeapot, latency: latency)
elif res.code.is4xx or res.code.is5xx: x = Response(rtype: ResponseType.HttpError, latency: latency) elif res.code.is4xx or res.code.is5xx: x = Response(rtype: rtHttpError, latency: latency)
else: x = Response(rtype: ResponseType.Ok, latency: latency) else: x = Response(rtype: rtOk, latency: latency)
try: try:
discard await withTimeout(doFetch(), 10000) discard await withTimeout(doFetch(), 10000)
except: except:
x = Response(rtype: ResponseType.FetchError, latency: 0) x = Response(rtype: rtFetchError, latency: 0)
client.close()
return x return x
proc pollTargets(ctx: Ctx) {.async.} = proc pollTargets() {.async.} =
for row in ctx.db.all("SELECT * FROM sites"): for row in getDB().all("SELECT * FROM sites"):
var (id, url, rollingTotalPings, rollingSuccessfulPings, rollingLatency, rollingDataSince) = row.unpack((int64, string, int64, int64, int64, Option[Time])) let (id, url) = row.unpack((int64, string))
let res = await ctx.pollTarget(url) let res = await pollTarget(url)
let threshold = getTime() + initTimeInterval(weeks= -1) getDB().exec("INSERT INTO reqs (site, timestamp, status, latency) VALUES (?, ?, ?, ?)", id, getTime(), int(res.rtype), res.latency)
# drop old data from rolling counters proc timerCallback(fd: AsyncFD): bool =
if rollingDataSince.isSome: asyncCheck pollTargets()
for row in ctx.db.iterate("SELECT status, latency FROM reqs WHERE timestamp >= ? AND timestamp <= ? AND site = ?", rollingDataSince.get, threshold, id): false
let (statusRaw, latency) = row.unpack((int, int))
rollingTotalPings -= 1
rollingLatency -= latency
if statusRaw <= 0:
rollingSuccessfulPings -= 1
# add new data
rollingTotalPings += 1
rollingLatency += res.latency
if int(res.rtype) <= 0:
rollingSuccessfulPings += 1
ctx.db.transaction:
ctx.db.exec("UPDATE sites SET rc_total = ?, rc_success = ?, rc_latency = ?, rc_data_since = ? WHERE sid = ?", rollingTotalPings, rollingSuccessfulPings, rollingLatency, threshold, id)
ctx.db.exec("INSERT INTO reqs (site, timestamp, status, latency) VALUES (?, ?, ?, ?)", id, getTime(), int(res.rtype), res.latency)
proc drawLatencyImage(db: DbConn, site: int, interval: int): seq[byte] =
const width = 120 * 6
const height = 168 * 2
var image = initImage[ColorRGBU](width, height)
var count = 0
var lastTs = getTime()
for row in db.iterate("SELECT timestamp, status, latency FROM reqs WHERE site = ? ORDER BY timestamp DESC LIMIT ?", site, width * height):
let (ts, statusRaw, latency) = row.unpack((Time, int, int))
let timeGap = lastTs - ts
if timeGap > initDuration(milliseconds = interval + 10000):
let pixels = timeGap.inMilliseconds div interval
for _ in 1..pixels:
image.data[count] = ColorRGBU([0x7Eu8, 0x1E, 0x9C])
count += 1
if count >= image.data.len: break
else:
let status = ResponseType(statusRaw)
case status
of ResponseType.HttpError:
image.data[count] = ColorRGBU([255u8, 127, 0])
of ResponseType.Timeout:
image.data[count] = ColorRGBU([0u8, 0, 0])
of ResponseType.FetchError:
image.data[count] = ColorRGBU([255u8, 0, 0])
else:
let latencyMultiplier = max(min(pow(10.0, 1.1) / pow(float(latency), 0.25), 1.0), 0.2)
image.data[count] = ColorRGBU([0u8, uint8(latencyMultiplier * 255.0), 0])
count += 1
if count >= image.data.len: break
lastTs = ts
writePNG(image, compression=6)
proc generateImages(args: (string, int)) =
let (dbPath, interval) = args
let db = openDatabase(dbPath)
db.exec("PRAGMA journal_mode = WAL")
for row in db.all("SELECT sid FROM sites"):
let id = row[0].fromDbValue(int)
imageReturnChannel.send((id, drawLatencyImage(db, id, interval)))
close(db)
proc run(dbPath="./monitoring.sqlite3", port=7800, interval=30000, urls: seq[string]) = proc run(dbPath="./monitoring.sqlite3", port=7800, interval=30000, urls: seq[string]) =
## Run onstat. Note that the URLs you configure will be persisted in the monitoring database. To remove them, you must manually update this. ## Run onstat. Note that the URLs you configure will be persisted in the monitoring database. To remove them, you must manually update this.
let database = openDatabase(dbPath) let database = openDatabase(dbPath)
database.exec("PRAGMA journal_mode = WAL")
migrate(database) migrate(database)
for url in urls: for url in urls:
echo &"Adding {url}" echo &"Adding {url}"
database.exec("INSERT INTO sites (url) VALUES (?)", url) database.exec("INSERT INTO sites (url) VALUES (?)", url)
close(database)
var ctx = Ctx(db: database, dbPath: dbPath, images: newTable[int, (seq[byte], int)](), interval: interval)
echo "Starting up" echo "Starting up"
asyncCheck pollTargets(ctx) asyncCheck pollTargets()
imageReturnChannel.open() addTimer(interval, false, timerCallback)
var thread: Thread[(string, int)]
createThread(thread, generateImages, (dbPath, interval))
echo "Ready"
addTimer(interval, false, proc(fd: AsyncFD): bool =
asyncCheck pollTargets(ctx)
false)
addTimer(interval * 60, false, proc(fd: AsyncFD): bool =
createThread(thread, generateImages, (dbPath, interval))
let fut = sleepAsync(10000)
fut.addCallback(() => readIntoContext(ctx))
asyncCheck fut
false)
var server = newAsyncHttpServer() var server = newAsyncHttpServer()
waitFor server.serve(Port(port), onRequest(ctx)) waitFor server.serve(Port(port), onRequest)
dispatch(run, help={ dispatch(run, help={
"dbPath": "path to SQLite3 database for historical data logging", "dbPath": "path to SQLite3 database for historical data logging",
"port": "port to serve HTTP on", "port": "port to serve HTTP on",

View File

@ -1,4 +1,5 @@
body { body {
max-width: 40em;
font-family: sans-serif; font-family: sans-serif;
} }
@ -17,29 +18,20 @@ h1 {
.card { .card {
margin-bottom: 1em; margin-bottom: 1em;
display: flex;
justify-content: space-between;
flex-wrap: wrap;
} }
.card.Ok h2 { .card.rtOk h2 {
color: green; color: green;
} }
.card.HttpError h2 { .card.rtHttpError h2 {
color: orange; color: orange;
} }
.card.HttpTeapot h2 { .card.rtHttpTeapot h2 {
color: blue; color: blue;
} }
.card.FetchError h2 { .card.rtFetchError h2 {
color: red; color: red;
} }
.card.Timeout h2 { .card.rtTimeout h2 {
color: red; color: red;
} }
img {
image-rendering: pixelated;
-ms-interpolation-mode: nearest-neighbor;
image-rendering: crisp-edges;
}