Compare commits

...

8 Commits

Author SHA1 Message Date
osmarks 511a8e62e9 hotfix for new sites 2021-04-25 22:28:36 +01:00
osmarks 30474d55cf fix image title 2021-04-25 22:15:02 +01:00
osmarks 65b19dbd96 Big refactors, performance, status images 2021-04-25 22:10:52 +01:00
osmarks dfbda82731 fix FD leaks 2021-01-29 11:28:01 +00:00
osmarks 7136b827d3 make uptimeSince actually filter by timestamps 2021-01-28 22:29:17 +00:00
osmarks bc2cb0de45 licenseify 2021-01-28 21:24:53 +00:00
osmarks 78673b1319 viewport tag for mobile phones 2021-01-28 21:17:21 +00:00
osmarks 2d7dc14936 Merge pull request 'add footer thing' (#2) from ubq323/onstat:master into master
Reviewed-on: osmarks/onstat#2
It might be better to make this configurable, but I can probably do that later and I think the only active instance is the official one anyway.
2021-01-28 21:15:11 +00:00
6 changed files with 226 additions and 93 deletions

7
LICENSE Normal file
View File

@ -0,0 +1,7 @@
Copyright © 2021 osmarks
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.

View File

@ -1 +1 @@
-d:ssl
-d:ssl --threads:on

View File

@ -12,4 +12,5 @@ bin = @["onstat"]
requires "nim >= 1.4.2"
requires "https://github.com/GULPF/tiny_sqlite#8fe760d9"
requires "karax >= 1.2.1"
requires "cligen >= 1"
requires "cligen >= 1"
requires "imageman >= 0.8"

View File

@ -3,21 +3,34 @@ import options
let migrations: seq[string] = @[
"""
CREATE TABLE sites (
sid INTEGER PRIMARY KEY,
url TEXT NOT NULL
);
CREATE TABLE sites (
sid INTEGER PRIMARY KEY,
url TEXT NOT NULL
);
CREATE TABLE reqs (
rid INTEGER PRIMARY KEY,
site INTEGER NOT NULL REFERENCES sites(sid),
timestamp INTEGER NOT NULL,
status INTEGER NOT NULL,
latency INTEGER NOT NULL
);
CREATE TABLE reqs (
rid INTEGER PRIMARY KEY,
site INTEGER NOT NULL REFERENCES sites(sid),
timestamp INTEGER NOT NULL,
status INTEGER NOT NULL,
latency INTEGER NOT NULL
);
""",
"""
CREATE INDEX req_ts_idx ON reqs (timestamp);
CREATE INDEX req_ts_idx ON reqs (timestamp);
""",
# rolling total/successful ping and latency count
# rc_data_since holds the older end of the interval the counters are from
# this slightly horribly migrates the existing data using a hardcoded 1 week window
"""
ALTER TABLE sites ADD COLUMN rc_total INTEGER NOT NULL DEFAULT 0;
ALTER TABLE sites ADD COLUMN rc_success INTEGER NOT NULL DEFAULT 0;
ALTER TABLE sites ADD COLUMN rc_latency INTEGER NOT NULL DEFAULT 0;
ALTER TABLE sites ADD COLUMN rc_data_since INTEGER;
UPDATE sites SET rc_total = (SELECT COUNT(*) FROM reqs WHERE site = sid AND timestamp >= (strftime('%s') - (86400*7)) * 1000000);
UPDATE sites SET rc_success = (SELECT SUM(status <= 0) FROM reqs WHERE site = sid AND timestamp >= (strftime('%s') - (86400*7)) * 1000000);
UPDATE sites SET rc_latency = (SELECT SUM(latency) FROM reqs WHERE site = sid AND timestamp >= (strftime('%s') - (86400*7)) * 1000000);
UPDATE sites SET rc_data_since = (strftime('%s') - (86400*7)) * 1000000;
"""
]

View File

@ -10,8 +10,12 @@ import sugar
import net
import sequtils
import strformat
import std/exitprocs
import strutils
import cligen
import imageman
import math
import hashes
import tables
import ./db
@ -19,19 +23,6 @@ macro includeFile(x: string): string = newStrLitNode(readFile(x.strVal))
const css = includeFile("./src/style.css")
var threadDB {.threadvar.}: Option[DbConn]
proc getDB(): DbConn {.gcsafe.} =
if isNone threadDB:
let x = openDatabase("./monitoring.sqlite3")
x.exec("PRAGMA journal_mode=WAL")
proc closeDB() =
try: close(x)
except: discard
addExitProc(closeDB)
when declared(onThreadDestroy): onThreadDestroy(closeDB)
threadDB = some x
get threadDB
func timeToTimestamp*(t: Time): int64 = toUnix(t) * 1000000 + (nanosecond(t) div 1000)
func timestampToTime*(ts: int64): Time = initTime(ts div 1000000, (ts mod 1000000) * 1000)
@ -39,42 +30,48 @@ proc toDbValue(t: Time): DbValue = DbValue(kind: sqliteInteger, intVal: timeToTi
proc fromDbValue(value: DbValue, T: typedesc[Time]): Time = timestampToTime(value.intVal)
type
ResponseType = enum
rtHttpTeapot = -1
rtOk = 0
rtHttpError = 1
rtTimeout = 2
rtFetchError = 3
ResponseType {.pure.} = enum
HttpTeapot = -1
Ok = 0
HttpError = 1
Timeout = 2
FetchError = 3
Response = object
rtype: ResponseType
latency: int64 # microseconds
SiteStatus = object
id: int
url: string
lastPing: Time
lastResponse: ResponseType
lastLatency: float
uptimePercent: float
averageLatency: float
Ctx = object
db: DbConn
dbPath: string
images: TableRef[int, (seq[byte], int)]
interval: int
proc uptimeSince(sid: int, time: Time): float =
let okPings = fromDbValue(get getDB().value("SELECT COUNT(*) FROM reqs WHERE site = ? AND status <= 0", sid), int)
let totalPings = fromDbValue(get getDB().value("SELECT COUNT(*) FROM reqs WHERE site = ?", sid), int)
okPings / totalPings
proc fetchLatest(row: ResultRow): Option[SiteStatus] =
proc fetchLatest(ctx: Ctx, row: ResultRow): Option[SiteStatus] =
let weekAgo = getTime() + initTimeInterval(weeks= -1)
let (site, url) = row.unpack((int, string))
let row = getDB().one("SELECT timestamp, status, latency FROM reqs WHERE site = ? ORDER BY timestamp DESC LIMIT 1", site)
let (site, url, rollingTotalPings, rollingSuccessfulPings, rollingLatency, rollingDataSince) = row.unpack((int, string, int64, int64, int64, Option[int64]))
# work around bizarre SQLite query planner issue - it appears that if it has a literal value to compare site against it generates very fast VM code
# but if it has a prepared state parameter it somehow refuses to use the index
let row = ctx.db.one("SELECT timestamp, status, latency FROM reqs WHERE site = -1 OR site = ? ORDER BY timestamp DESC LIMIT 1", site)
if isNone row: return none(SiteStatus)
let (ts, status, latency) = (get row).unpack((Time, int, int))
some SiteStatus(url: url, lastPing: ts, lastResponse: ResponseType(status), lastLatency: float64(latency) / 1e3, uptimePercent: uptimeSince(site, weekAgo))
some SiteStatus(url: url, lastPing: ts, lastResponse: ResponseType(status), lastLatency: float(latency) / 1e3, id: site,
uptimePercent: float(rollingSuccessfulPings) / float(rollingTotalPings), averageLatency: float(rollingLatency) / float(rollingTotalPings) / 1e3)
proc mainPage(): string =
let sites = getDB().all("SELECT * FROM sites ORDER BY sid").map(fetchLatest).filter(x => isSome x).map(x => get x)
proc mainPage(ctx: Ctx): string =
let sites = ctx.db.all("SELECT * FROM sites ORDER BY sid").map(x => ctx.fetchLatest(x)).filter(x => x.isSome).map(x => x.get)
let up = sites.filter(x => int(x.lastResponse) <= 0).len()
let vnode = buildHtml(html()):
head:
meta(charset="utf8")
meta(http-equiv="refresh", content="60")
meta(name="viewport", content="width=device-width, initial-scale=1")
title: text &"{up}/{sites.len} up - OnStat"
style: text css
body:
@ -82,23 +79,25 @@ proc mainPage(): string =
h2(class="title"): text &"{up}/{sites.len} up"
for site in sites:
tdiv(class="card " & $site.lastResponse):
h2:
case site.lastResponse
of rtOk: text ""
of rtHttpError: text ""
of rtTimeout: text ""
of rtFetchError: text ""
of rtHttpTeapot: text "🫖 "
text site.url
tdiv: text("Last pinged " & format(site.lastPing, "HH:mm:ss dd-MM-yyyy"))
tdiv:
case site.lastResponse
of rtOk: text &"Latency {site.lastLatency}ms"
of rtHttpError: text "HTTP error"
of rtHttpTeapot: text &"Teapot, latency {site.lastLatency}ms"
of rtTimeout: text "Timed out"
of rtFetchError: text "Fetch failed"
tdiv: text &"{site.uptimePercent * 100}% up in last week"
tdiv(class="left"):
h2:
case site.lastResponse
of ResponseType.Ok: text ""
of ResponseType.HttpError: text ""
of ResponseType.Timeout: text ""
of ResponseType.FetchError: text ""
of ResponseType.HttpTeapot: text "🫖 "
text site.url
tdiv: text("Last pinged " & format(site.lastPing, "HH:mm:ss dd-MM-yyyy"))
tdiv:
case site.lastResponse
of ResponseType.Ok: text &"Latency {site.lastLatency}ms"
of ResponseType.HttpError: text "HTTP error"
of ResponseType.HttpTeapot: text &"Teapot, latency {site.lastLatency:.5f}ms"
of ResponseType.Timeout: text "Timed out"
of ResponseType.FetchError: text "Fetch failed"
tdiv: text &"{site.uptimePercent * 100:.5f}% up, {site.averageLatency:.5f}ms latency in last week"
if site.id in ctx.images: img(src= &"/vis/{site.id}", class="right", title= &"{site.url} 12-week status visualization")
hr()
small:
text "made by "
@ -108,53 +107,158 @@ proc mainPage(): string =
text "."
$vnode
proc onRequest(req: Request) {.async.} =
if req.reqMethod == HttpGet:
case req.url.path
of "/": await req.respond(Http200, mainPage(), headers=newHttpHeaders([("Content-Type", "text/html")]))
else: await req.respond(Http404, "not found")
else:
await req.respond(Http404, "not found")
var imageReturnChannel: Channel[(int, seq[byte])]
proc pollTarget(s: string): Future[Response] {.async.} =
proc readIntoContext(ctx: Ctx) =
# this is a horrible workaround to avoid having to something something shared hash table
var available = true
while available:
let (av, data) = imageReturnChannel.tryRecv()
available = av
if available:
let (id, image) = data
ctx.images[id] = (image, image.hash)
proc onRequest(ctx: Ctx): (proc(req: Request): Future[void] {.gcsafe.}) =
result = proc(req: Request) {.async.} =
readIntoContext(ctx)
if req.reqMethod == HttpGet:
var path = req.url.path
if path == "/":
await req.respond(Http200, mainPage(ctx), headers=newHttpHeaders([("Content-Type", "text/html")]))
elif path.startsWith("/vis/"):
path.removePrefix("/vis/")
var id = 0
try:
id = parseInt path
except:
await req.respond(Http404, "not found")
return
if id in ctx.images:
let (image, hash) = ctx.images[id]
let etag = &"\"{hash}\""
if etag == req.headers.getOrDefault("if-none-match"):
await req.respond(Http304, "")
else:
await req.respond(Http200, cast[string](image), headers=newHttpHeaders([
("Content-Type", "image/png"), ("ETag", etag)]))
else: await req.respond(Http404, "not found")
else: await req.respond(Http404, "not found")
else:
await req.respond(Http405, "GET only")
proc pollTarget(ctx: Ctx, s: string): Future[Response] {.async.} =
var client = newAsyncHttpClient()
var x = Response(rtype: rtTimeout, latency: 0)
var x = Response(rtype: ResponseType.Timeout, latency: 0)
proc doFetch() {.async.} =
let ts = now().utc
let res = await client.get(s)
let latency = (now().utc - ts).inMicroseconds
if res.code.int == 418: x = Response(rtype: rtHttpTeapot, latency: latency)
elif res.code.is4xx or res.code.is5xx: x = Response(rtype: rtHttpError, latency: latency)
else: x = Response(rtype: rtOk, latency: latency)
if res.code.int == 418: x = Response(rtype: ResponseType.HttpTeapot, latency: latency)
elif res.code.is4xx or res.code.is5xx: x = Response(rtype: ResponseType.HttpError, latency: latency)
else: x = Response(rtype: ResponseType.Ok, latency: latency)
try:
discard await withTimeout(doFetch(), 10000)
except:
x = Response(rtype: rtFetchError, latency: 0)
x = Response(rtype: ResponseType.FetchError, latency: 0)
client.close()
return x
proc pollTargets() {.async.} =
for row in getDB().all("SELECT * FROM sites"):
let (id, url) = row.unpack((int64, string))
let res = await pollTarget(url)
getDB().exec("INSERT INTO reqs (site, timestamp, status, latency) VALUES (?, ?, ?, ?)", id, getTime(), int(res.rtype), res.latency)
proc pollTargets(ctx: Ctx) {.async.} =
for row in ctx.db.all("SELECT * FROM sites"):
var (id, url, rollingTotalPings, rollingSuccessfulPings, rollingLatency, rollingDataSince) = row.unpack((int64, string, int64, int64, int64, Option[Time]))
let res = await ctx.pollTarget(url)
let threshold = getTime() + initTimeInterval(weeks= -1)
proc timerCallback(fd: AsyncFD): bool =
asyncCheck pollTargets()
false
# drop old data from rolling counters
if rollingDataSince.isSome:
for row in ctx.db.iterate("SELECT status, latency FROM reqs WHERE timestamp >= ? AND timestamp <= ? AND site = ?", rollingDataSince.get, threshold, id):
let (statusRaw, latency) = row.unpack((int, int))
rollingTotalPings -= 1
rollingLatency -= latency
if statusRaw <= 0:
rollingSuccessfulPings -= 1
# add new data
rollingTotalPings += 1
rollingLatency += res.latency
if int(res.rtype) <= 0:
rollingSuccessfulPings += 1
ctx.db.transaction:
ctx.db.exec("UPDATE sites SET rc_total = ?, rc_success = ?, rc_latency = ?, rc_data_since = ? WHERE sid = ?", rollingTotalPings, rollingSuccessfulPings, rollingLatency, threshold, id)
ctx.db.exec("INSERT INTO reqs (site, timestamp, status, latency) VALUES (?, ?, ?, ?)", id, getTime(), int(res.rtype), res.latency)
proc drawLatencyImage(db: DbConn, site: int, interval: int): seq[byte] =
const width = 120 * 6
const height = 168 * 2
var image = initImage[ColorRGBU](width, height)
var count = 0
var lastTs = getTime()
for row in db.iterate("SELECT timestamp, status, latency FROM reqs WHERE site = ? ORDER BY timestamp DESC LIMIT ?", site, width * height):
let (ts, statusRaw, latency) = row.unpack((Time, int, int))
let timeGap = lastTs - ts
if timeGap > initDuration(milliseconds = interval + 10000):
let pixels = timeGap.inMilliseconds div interval
for _ in 1..pixels:
image.data[count] = ColorRGBU([0x7Eu8, 0x1E, 0x9C])
count += 1
if count >= image.data.len: break
else:
let status = ResponseType(statusRaw)
case status
of ResponseType.HttpError:
image.data[count] = ColorRGBU([255u8, 127, 0])
of ResponseType.Timeout:
image.data[count] = ColorRGBU([0u8, 0, 0])
of ResponseType.FetchError:
image.data[count] = ColorRGBU([255u8, 0, 0])
else:
let latencyMultiplier = max(min(pow(10.0, 1.1) / pow(float(latency), 0.25), 1.0), 0.2)
image.data[count] = ColorRGBU([0u8, uint8(latencyMultiplier * 255.0), 0])
count += 1
if count >= image.data.len: break
lastTs = ts
writePNG(image, compression=6)
proc generateImages(args: (string, int)) =
let (dbPath, interval) = args
let db = openDatabase(dbPath)
db.exec("PRAGMA journal_mode = WAL")
for row in db.all("SELECT sid FROM sites"):
let id = row[0].fromDbValue(int)
imageReturnChannel.send((id, drawLatencyImage(db, id, interval)))
close(db)
proc run(dbPath="./monitoring.sqlite3", port=7800, interval=30000, urls: seq[string]) =
## Run onstat. Note that the URLs you configure will be persisted in the monitoring database. To remove them, you must manually update this.
let database = openDatabase(dbPath)
database.exec("PRAGMA journal_mode = WAL")
migrate(database)
for url in urls:
echo &"Adding {url}"
database.exec("INSERT INTO sites (url) VALUES (?)", url)
close(database)
var ctx = Ctx(db: database, dbPath: dbPath, images: newTable[int, (seq[byte], int)](), interval: interval)
echo "Starting up"
asyncCheck pollTargets()
addTimer(interval, false, timerCallback)
asyncCheck pollTargets(ctx)
imageReturnChannel.open()
var thread: Thread[(string, int)]
createThread(thread, generateImages, (dbPath, interval))
echo "Ready"
addTimer(interval, false, proc(fd: AsyncFD): bool =
asyncCheck pollTargets(ctx)
false)
addTimer(interval * 60, false, proc(fd: AsyncFD): bool =
createThread(thread, generateImages, (dbPath, interval))
let fut = sleepAsync(10000)
fut.addCallback(() => readIntoContext(ctx))
asyncCheck fut
false)
var server = newAsyncHttpServer()
waitFor server.serve(Port(port), onRequest)
waitFor server.serve(Port(port), onRequest(ctx))
dispatch(run, help={
"dbPath": "path to SQLite3 database for historical data logging",
"port": "port to serve HTTP on",

View File

@ -1,5 +1,4 @@
body {
max-width: 40em;
font-family: sans-serif;
}
@ -18,20 +17,29 @@ h1 {
.card {
margin-bottom: 1em;
display: flex;
justify-content: space-between;
flex-wrap: wrap;
}
.card.rtOk h2 {
.card.Ok h2 {
color: green;
}
.card.rtHttpError h2 {
.card.HttpError h2 {
color: orange;
}
.card.rtHttpTeapot h2 {
.card.HttpTeapot h2 {
color: blue;
}
.card.rtFetchError h2 {
.card.FetchError h2 {
color: red;
}
.card.rtTimeout h2 {
.card.Timeout h2 {
color: red;
}
img {
image-rendering: pixelated;
-ms-interpolation-mode: nearest-neighbor;
image-rendering: crisp-edges;
}