forked from osmarks/onstat
Compare commits
15 Commits
Author | SHA1 | Date | |
---|---|---|---|
511a8e62e9 | |||
30474d55cf | |||
65b19dbd96 | |||
dfbda82731 | |||
7136b827d3 | |||
bc2cb0de45 | |||
78673b1319 | |||
2d7dc14936 | |||
|
b63b675e95 | ||
|
6f9043bd1d | ||
1406232927 | |||
c1138fba8d | |||
013c072bd7 | |||
07f5d22082 | |||
a0eabc3990 |
7
LICENSE
Normal file
7
LICENSE
Normal file
@ -0,0 +1,7 @@
|
||||
Copyright © 2021 osmarks
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
@ -12,3 +12,5 @@ bin = @["onstat"]
|
||||
requires "nim >= 1.4.2"
|
||||
requires "https://github.com/GULPF/tiny_sqlite#8fe760d9"
|
||||
requires "karax >= 1.2.1"
|
||||
requires "cligen >= 1"
|
||||
requires "imageman >= 0.8"
|
38
src/db.nim
38
src/db.nim
@ -1,24 +1,36 @@
|
||||
import tiny_sqlite
|
||||
import options
|
||||
import times
|
||||
|
||||
let migrations: seq[string] = @[
|
||||
"""
|
||||
CREATE TABLE sites (
|
||||
sid INTEGER PRIMARY KEY,
|
||||
url TEXT NOT NULL
|
||||
);
|
||||
CREATE TABLE sites (
|
||||
sid INTEGER PRIMARY KEY,
|
||||
url TEXT NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE reqs (
|
||||
rid INTEGER PRIMARY KEY,
|
||||
site INTEGER NOT NULL REFERENCES sites(sid),
|
||||
timestamp INTEGER NOT NULL,
|
||||
status INTEGER NOT NULL,
|
||||
latency INTEGER NOT NULL
|
||||
);
|
||||
CREATE TABLE reqs (
|
||||
rid INTEGER PRIMARY KEY,
|
||||
site INTEGER NOT NULL REFERENCES sites(sid),
|
||||
timestamp INTEGER NOT NULL,
|
||||
status INTEGER NOT NULL,
|
||||
latency INTEGER NOT NULL
|
||||
);
|
||||
""",
|
||||
"""
|
||||
CREATE INDEX req_ts_idx ON reqs (timestamp);
|
||||
CREATE INDEX req_ts_idx ON reqs (timestamp);
|
||||
""",
|
||||
# rolling total/successful ping and latency count
|
||||
# rc_data_since holds the older end of the interval the counters are from
|
||||
# this slightly horribly migrates the existing data using a hardcoded 1 week window
|
||||
"""
|
||||
ALTER TABLE sites ADD COLUMN rc_total INTEGER NOT NULL DEFAULT 0;
|
||||
ALTER TABLE sites ADD COLUMN rc_success INTEGER NOT NULL DEFAULT 0;
|
||||
ALTER TABLE sites ADD COLUMN rc_latency INTEGER NOT NULL DEFAULT 0;
|
||||
ALTER TABLE sites ADD COLUMN rc_data_since INTEGER;
|
||||
UPDATE sites SET rc_total = (SELECT COUNT(*) FROM reqs WHERE site = sid AND timestamp >= (strftime('%s') - (86400*7)) * 1000000);
|
||||
UPDATE sites SET rc_success = (SELECT SUM(status <= 0) FROM reqs WHERE site = sid AND timestamp >= (strftime('%s') - (86400*7)) * 1000000);
|
||||
UPDATE sites SET rc_latency = (SELECT SUM(latency) FROM reqs WHERE site = sid AND timestamp >= (strftime('%s') - (86400*7)) * 1000000);
|
||||
UPDATE sites SET rc_data_since = (strftime('%s') - (86400*7)) * 1000000;
|
||||
"""
|
||||
]
|
||||
|
||||
|
272
src/onstat.nim
272
src/onstat.nim
@ -10,22 +10,18 @@ import sugar
|
||||
import net
|
||||
import sequtils
|
||||
import strformat
|
||||
import strutils
|
||||
import cligen
|
||||
import imageman
|
||||
import math
|
||||
import hashes
|
||||
import tables
|
||||
|
||||
import ./db
|
||||
|
||||
macro includeFile(x: string): string = newStrLitNode(readFile(x.strVal))
|
||||
|
||||
const css = includeFile("./src/style.css")
|
||||
let database = openDatabase("./monitoring.sqlite3")
|
||||
migrate(database)
|
||||
|
||||
var threadDB {.threadvar.}: Option[DbConn]
|
||||
proc getDB(): DbConn =
|
||||
if isNone threadDB:
|
||||
let x = openDatabase("./monitoring.sqlite3")
|
||||
x.exec("PRAGMA journal_mode=WAL")
|
||||
threadDB = some x
|
||||
get threadDB
|
||||
|
||||
func timeToTimestamp*(t: Time): int64 = toUnix(t) * 1000000 + (nanosecond(t) div 1000)
|
||||
func timestampToTime*(ts: int64): Time = initTime(ts div 1000000, (ts mod 1000000) * 1000)
|
||||
@ -34,41 +30,48 @@ proc toDbValue(t: Time): DbValue = DbValue(kind: sqliteInteger, intVal: timeToTi
|
||||
proc fromDbValue(value: DbValue, T: typedesc[Time]): Time = timestampToTime(value.intVal)
|
||||
|
||||
type
|
||||
ResponseType = enum
|
||||
rtOk = 0
|
||||
rtHttpError = 1
|
||||
rtHttpTeapot = 2
|
||||
rtTimeout = 3
|
||||
rtFetchError = 4
|
||||
ResponseType {.pure.} = enum
|
||||
HttpTeapot = -1
|
||||
Ok = 0
|
||||
HttpError = 1
|
||||
Timeout = 2
|
||||
FetchError = 3
|
||||
Response = object
|
||||
rtype: ResponseType
|
||||
latency: int64 # microseconds
|
||||
SiteStatus = object
|
||||
id: int
|
||||
url: string
|
||||
lastPing: Time
|
||||
lastResponse: ResponseType
|
||||
lastLatency: float
|
||||
uptimePercent: float
|
||||
averageLatency: float
|
||||
Ctx = object
|
||||
db: DbConn
|
||||
dbPath: string
|
||||
images: TableRef[int, (seq[byte], int)]
|
||||
interval: int
|
||||
|
||||
proc uptimeSince(sid: int, time: Time): float =
|
||||
let okPings = fromDbValue(get getDB().value("SELECT COUNT(*) FROM reqs WHERE site = ? AND (status = 0 OR status = 2)", sid), int)
|
||||
let totalPings = fromDbValue(get getDB().value("SELECT COUNT(*) FROM reqs WHERE site = ?", sid), int)
|
||||
okPings / totalPings
|
||||
|
||||
proc fetchLatest(row: ResultRow): Option[SiteStatus] =
|
||||
proc fetchLatest(ctx: Ctx, row: ResultRow): Option[SiteStatus] =
|
||||
let weekAgo = getTime() + initTimeInterval(weeks= -1)
|
||||
let (site, url) = row.unpack((int, string))
|
||||
let row = getDB().one("SELECT timestamp, status, latency FROM reqs WHERE site = ? ORDER BY timestamp DESC LIMIT 1", site)
|
||||
let (site, url, rollingTotalPings, rollingSuccessfulPings, rollingLatency, rollingDataSince) = row.unpack((int, string, int64, int64, int64, Option[int64]))
|
||||
# work around bizarre SQLite query planner issue - it appears that if it has a literal value to compare site against it generates very fast VM code
|
||||
# but if it has a prepared state parameter it somehow refuses to use the index
|
||||
let row = ctx.db.one("SELECT timestamp, status, latency FROM reqs WHERE site = -1 OR site = ? ORDER BY timestamp DESC LIMIT 1", site)
|
||||
if isNone row: return none(SiteStatus)
|
||||
let (ts, status, latency) = (get row).unpack((Time, int, int))
|
||||
some SiteStatus(url: url, lastPing: ts, lastResponse: ResponseType(status), lastLatency: float64(latency) / 1e3, uptimePercent: uptimeSince(site, weekAgo))
|
||||
some SiteStatus(url: url, lastPing: ts, lastResponse: ResponseType(status), lastLatency: float(latency) / 1e3, id: site,
|
||||
uptimePercent: float(rollingSuccessfulPings) / float(rollingTotalPings), averageLatency: float(rollingLatency) / float(rollingTotalPings) / 1e3)
|
||||
|
||||
proc mainPage(): string =
|
||||
let sites = getDB().all("SELECT * FROM sites ORDER BY sid").map(fetchLatest).filter(x => isSome x).map(x => get x)
|
||||
let up = sites.filter(x => (x.lastResponse == rtOk) or (x.lastResponse == rtHttpTeapot)).len()
|
||||
proc mainPage(ctx: Ctx): string =
|
||||
let sites = ctx.db.all("SELECT * FROM sites ORDER BY sid").map(x => ctx.fetchLatest(x)).filter(x => x.isSome).map(x => x.get)
|
||||
let up = sites.filter(x => int(x.lastResponse) <= 0).len()
|
||||
let vnode = buildHtml(html()):
|
||||
head:
|
||||
meta(charset="utf8")
|
||||
meta(http-equiv="refresh", content="60")
|
||||
meta(name="viewport", content="width=device-width, initial-scale=1")
|
||||
title: text &"{up}/{sites.len} up - OnStat"
|
||||
style: text css
|
||||
body:
|
||||
@ -76,61 +79,188 @@ proc mainPage(): string =
|
||||
h2(class="title"): text &"{up}/{sites.len} up"
|
||||
for site in sites:
|
||||
tdiv(class="card " & $site.lastResponse):
|
||||
h2:
|
||||
case site.lastResponse
|
||||
of rtOk: text "✓ "
|
||||
of rtHttpError: text "⚠ "
|
||||
of rtTimeout: text "✕ "
|
||||
of rtFetchError: text "✕ "
|
||||
of rtHttpTeapot: text "🫖 "
|
||||
text site.url
|
||||
tdiv: text("Last pinged " & format(site.lastPing, "HH:mm:ss dd-MM-yyyy"))
|
||||
tdiv:
|
||||
case site.lastResponse
|
||||
of rtOk: text &"Latency {site.lastLatency}ms"
|
||||
of rtHttpError: text "HTTP error"
|
||||
of rtHttpTeapot: text &"Teapot, latency {site.lastLatency}ms"
|
||||
of rtTimeout: text "Timed out"
|
||||
of rtFetchError: text "Fetch failed"
|
||||
tdiv: text &"{site.uptimePercent * 100}% up in last week"
|
||||
tdiv(class="left"):
|
||||
h2:
|
||||
case site.lastResponse
|
||||
of ResponseType.Ok: text "✓ "
|
||||
of ResponseType.HttpError: text "⚠ "
|
||||
of ResponseType.Timeout: text "✕ "
|
||||
of ResponseType.FetchError: text "✕ "
|
||||
of ResponseType.HttpTeapot: text "🫖 "
|
||||
text site.url
|
||||
tdiv: text("Last pinged " & format(site.lastPing, "HH:mm:ss dd-MM-yyyy"))
|
||||
tdiv:
|
||||
case site.lastResponse
|
||||
of ResponseType.Ok: text &"Latency {site.lastLatency}ms"
|
||||
of ResponseType.HttpError: text "HTTP error"
|
||||
of ResponseType.HttpTeapot: text &"Teapot, latency {site.lastLatency:.5f}ms"
|
||||
of ResponseType.Timeout: text "Timed out"
|
||||
of ResponseType.FetchError: text "Fetch failed"
|
||||
tdiv: text &"{site.uptimePercent * 100:.5f}% up, {site.averageLatency:.5f}ms latency in last week"
|
||||
if site.id in ctx.images: img(src= &"/vis/{site.id}", class="right", title= &"{site.url} 12-week status visualization")
|
||||
hr()
|
||||
small:
|
||||
text "made by "
|
||||
a(href="https://osmarks.net"): text "gollark"
|
||||
text ", currently hosted by "
|
||||
a(href="https://ubq323.website"): text "ubq323"
|
||||
text "."
|
||||
$vnode
|
||||
|
||||
proc onRequest(req: Request) {.async.} =
|
||||
if req.reqMethod == HttpGet:
|
||||
case req.url.path
|
||||
of "/": await req.respond(Http200, mainPage(), headers=newHttpHeaders([("Content-Type", "text/html")]))
|
||||
else: await req.respond(Http404, "not found")
|
||||
else:
|
||||
await req.respond(Http404, "not found")
|
||||
var imageReturnChannel: Channel[(int, seq[byte])]
|
||||
|
||||
proc pollTarget(s: string): Future[Response] {.async.} =
|
||||
proc readIntoContext(ctx: Ctx) =
|
||||
# this is a horrible workaround to avoid having to something something shared hash table
|
||||
var available = true
|
||||
while available:
|
||||
let (av, data) = imageReturnChannel.tryRecv()
|
||||
available = av
|
||||
if available:
|
||||
let (id, image) = data
|
||||
ctx.images[id] = (image, image.hash)
|
||||
|
||||
proc onRequest(ctx: Ctx): (proc(req: Request): Future[void] {.gcsafe.}) =
|
||||
result = proc(req: Request) {.async.} =
|
||||
readIntoContext(ctx)
|
||||
if req.reqMethod == HttpGet:
|
||||
var path = req.url.path
|
||||
if path == "/":
|
||||
await req.respond(Http200, mainPage(ctx), headers=newHttpHeaders([("Content-Type", "text/html")]))
|
||||
elif path.startsWith("/vis/"):
|
||||
path.removePrefix("/vis/")
|
||||
var id = 0
|
||||
try:
|
||||
id = parseInt path
|
||||
except:
|
||||
await req.respond(Http404, "not found")
|
||||
return
|
||||
if id in ctx.images:
|
||||
let (image, hash) = ctx.images[id]
|
||||
let etag = &"\"{hash}\""
|
||||
if etag == req.headers.getOrDefault("if-none-match"):
|
||||
await req.respond(Http304, "")
|
||||
else:
|
||||
await req.respond(Http200, cast[string](image), headers=newHttpHeaders([
|
||||
("Content-Type", "image/png"), ("ETag", etag)]))
|
||||
else: await req.respond(Http404, "not found")
|
||||
else: await req.respond(Http404, "not found")
|
||||
else:
|
||||
await req.respond(Http405, "GET only")
|
||||
|
||||
proc pollTarget(ctx: Ctx, s: string): Future[Response] {.async.} =
|
||||
var client = newAsyncHttpClient()
|
||||
var x = Response(rtype: rtTimeout, latency: 0)
|
||||
var x = Response(rtype: ResponseType.Timeout, latency: 0)
|
||||
proc doFetch() {.async.} =
|
||||
let ts = now().utc
|
||||
let res = await client.get(s)
|
||||
let latency = (now().utc - ts).inMicroseconds
|
||||
if res.code.int == 418: x = Response(rtype: rtHttpTeapot, latency: latency)
|
||||
elif res.code.is4xx or res.code.is5xx: x = Response(rtype: rtHttpError, latency: latency)
|
||||
else: x = Response(rtype: rtOk, latency: latency)
|
||||
if res.code.int == 418: x = Response(rtype: ResponseType.HttpTeapot, latency: latency)
|
||||
elif res.code.is4xx or res.code.is5xx: x = Response(rtype: ResponseType.HttpError, latency: latency)
|
||||
else: x = Response(rtype: ResponseType.Ok, latency: latency)
|
||||
try:
|
||||
discard await withTimeout(doFetch(), 10000)
|
||||
except:
|
||||
x = Response(rtype: rtFetchError, latency: 0)
|
||||
x = Response(rtype: ResponseType.FetchError, latency: 0)
|
||||
client.close()
|
||||
return x
|
||||
|
||||
proc pollTargets() {.async.} =
|
||||
for row in getDB().all("SELECT * FROM sites"):
|
||||
let (id, url) = row.unpack((int64, string))
|
||||
let res = await pollTarget(url)
|
||||
getDB().exec("INSERT INTO reqs (site, timestamp, status, latency) VALUES (?, ?, ?, ?)", id, getTime(), int(res.rtype), res.latency)
|
||||
proc pollTargets(ctx: Ctx) {.async.} =
|
||||
for row in ctx.db.all("SELECT * FROM sites"):
|
||||
var (id, url, rollingTotalPings, rollingSuccessfulPings, rollingLatency, rollingDataSince) = row.unpack((int64, string, int64, int64, int64, Option[Time]))
|
||||
let res = await ctx.pollTarget(url)
|
||||
let threshold = getTime() + initTimeInterval(weeks= -1)
|
||||
|
||||
proc timerCallback(fd: AsyncFD): bool =
|
||||
asyncCheck pollTargets()
|
||||
false
|
||||
# drop old data from rolling counters
|
||||
if rollingDataSince.isSome:
|
||||
for row in ctx.db.iterate("SELECT status, latency FROM reqs WHERE timestamp >= ? AND timestamp <= ? AND site = ?", rollingDataSince.get, threshold, id):
|
||||
let (statusRaw, latency) = row.unpack((int, int))
|
||||
rollingTotalPings -= 1
|
||||
rollingLatency -= latency
|
||||
if statusRaw <= 0:
|
||||
rollingSuccessfulPings -= 1
|
||||
|
||||
echo "Starting up"
|
||||
asyncCheck pollTargets()
|
||||
addTimer(5000, false, timerCallback)
|
||||
var server = newAsyncHttpServer()
|
||||
waitFor server.serve(Port(7800), onRequest)
|
||||
# add new data
|
||||
rollingTotalPings += 1
|
||||
rollingLatency += res.latency
|
||||
if int(res.rtype) <= 0:
|
||||
rollingSuccessfulPings += 1
|
||||
|
||||
ctx.db.transaction:
|
||||
ctx.db.exec("UPDATE sites SET rc_total = ?, rc_success = ?, rc_latency = ?, rc_data_since = ? WHERE sid = ?", rollingTotalPings, rollingSuccessfulPings, rollingLatency, threshold, id)
|
||||
ctx.db.exec("INSERT INTO reqs (site, timestamp, status, latency) VALUES (?, ?, ?, ?)", id, getTime(), int(res.rtype), res.latency)
|
||||
|
||||
proc drawLatencyImage(db: DbConn, site: int, interval: int): seq[byte] =
|
||||
const width = 120 * 6
|
||||
const height = 168 * 2
|
||||
var image = initImage[ColorRGBU](width, height)
|
||||
var count = 0
|
||||
var lastTs = getTime()
|
||||
for row in db.iterate("SELECT timestamp, status, latency FROM reqs WHERE site = ? ORDER BY timestamp DESC LIMIT ?", site, width * height):
|
||||
let (ts, statusRaw, latency) = row.unpack((Time, int, int))
|
||||
let timeGap = lastTs - ts
|
||||
if timeGap > initDuration(milliseconds = interval + 10000):
|
||||
let pixels = timeGap.inMilliseconds div interval
|
||||
for _ in 1..pixels:
|
||||
image.data[count] = ColorRGBU([0x7Eu8, 0x1E, 0x9C])
|
||||
count += 1
|
||||
if count >= image.data.len: break
|
||||
else:
|
||||
let status = ResponseType(statusRaw)
|
||||
case status
|
||||
of ResponseType.HttpError:
|
||||
image.data[count] = ColorRGBU([255u8, 127, 0])
|
||||
of ResponseType.Timeout:
|
||||
image.data[count] = ColorRGBU([0u8, 0, 0])
|
||||
of ResponseType.FetchError:
|
||||
image.data[count] = ColorRGBU([255u8, 0, 0])
|
||||
else:
|
||||
let latencyMultiplier = max(min(pow(10.0, 1.1) / pow(float(latency), 0.25), 1.0), 0.2)
|
||||
image.data[count] = ColorRGBU([0u8, uint8(latencyMultiplier * 255.0), 0])
|
||||
|
||||
count += 1
|
||||
if count >= image.data.len: break
|
||||
lastTs = ts
|
||||
writePNG(image, compression=6)
|
||||
|
||||
proc generateImages(args: (string, int)) =
|
||||
let (dbPath, interval) = args
|
||||
let db = openDatabase(dbPath)
|
||||
db.exec("PRAGMA journal_mode = WAL")
|
||||
for row in db.all("SELECT sid FROM sites"):
|
||||
let id = row[0].fromDbValue(int)
|
||||
imageReturnChannel.send((id, drawLatencyImage(db, id, interval)))
|
||||
close(db)
|
||||
|
||||
proc run(dbPath="./monitoring.sqlite3", port=7800, interval=30000, urls: seq[string]) =
|
||||
## Run onstat. Note that the URLs you configure will be persisted in the monitoring database. To remove them, you must manually update this.
|
||||
let database = openDatabase(dbPath)
|
||||
database.exec("PRAGMA journal_mode = WAL")
|
||||
migrate(database)
|
||||
for url in urls:
|
||||
echo &"Adding {url}"
|
||||
database.exec("INSERT INTO sites (url) VALUES (?)", url)
|
||||
|
||||
var ctx = Ctx(db: database, dbPath: dbPath, images: newTable[int, (seq[byte], int)](), interval: interval)
|
||||
|
||||
echo "Starting up"
|
||||
asyncCheck pollTargets(ctx)
|
||||
imageReturnChannel.open()
|
||||
var thread: Thread[(string, int)]
|
||||
createThread(thread, generateImages, (dbPath, interval))
|
||||
echo "Ready"
|
||||
addTimer(interval, false, proc(fd: AsyncFD): bool =
|
||||
asyncCheck pollTargets(ctx)
|
||||
false)
|
||||
addTimer(interval * 60, false, proc(fd: AsyncFD): bool =
|
||||
createThread(thread, generateImages, (dbPath, interval))
|
||||
let fut = sleepAsync(10000)
|
||||
fut.addCallback(() => readIntoContext(ctx))
|
||||
asyncCheck fut
|
||||
false)
|
||||
var server = newAsyncHttpServer()
|
||||
waitFor server.serve(Port(port), onRequest(ctx))
|
||||
dispatch(run, help={
|
||||
"dbPath": "path to SQLite3 database for historical data logging",
|
||||
"port": "port to serve HTTP on",
|
||||
"interval": "interval at which to poll other services (milliseconds)"
|
||||
})
|
@ -1,5 +1,4 @@
|
||||
body {
|
||||
max-width: 40em;
|
||||
font-family: sans-serif;
|
||||
}
|
||||
|
||||
@ -18,20 +17,29 @@ h1 {
|
||||
|
||||
.card {
|
||||
margin-bottom: 1em;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
.card.rtOk h2 {
|
||||
.card.Ok h2 {
|
||||
color: green;
|
||||
}
|
||||
.card.rtHttpError h2 {
|
||||
.card.HttpError h2 {
|
||||
color: orange;
|
||||
}
|
||||
.card.rtHttpTeapot h2 {
|
||||
.card.HttpTeapot h2 {
|
||||
color: blue;
|
||||
}
|
||||
.card.rtFetchError h2 {
|
||||
.card.FetchError h2 {
|
||||
color: red;
|
||||
}
|
||||
.card.rtTimeout h2 {
|
||||
.card.Timeout h2 {
|
||||
color: red;
|
||||
}
|
||||
|
||||
img {
|
||||
image-rendering: pixelated;
|
||||
-ms-interpolation-mode: nearest-neighbor;
|
||||
image-rendering: crisp-edges;
|
||||
}
|
Loading…
Reference in New Issue
Block a user