mirror of
https://github.com/osmarks/random-stuff
synced 2025-12-19 20:38:09 +00:00
misc python
This commit is contained in:
93
lesswrong_to_tiktok.py
Normal file
93
lesswrong_to_tiktok.py
Normal file
@@ -0,0 +1,93 @@
|
||||
import wave, sys
|
||||
import nltk
|
||||
from balacoon_tts import TTS
|
||||
from collections import namedtuple
|
||||
import struct
|
||||
from PIL import Image, ImageDraw
|
||||
import cv2
|
||||
import numpy
|
||||
import subprocess
|
||||
|
||||
WIDTH = 400
|
||||
|
||||
AUDIO = "/tmp/x.wav"
|
||||
VIDEO = "/tmp/x.avi"
|
||||
BACKDROP = "/tmp/x.mkv"
|
||||
OUTPUT = "/tmp/x.mp4"
|
||||
|
||||
def render_text(text: str):
|
||||
render_params = {"font_size": 24}
|
||||
im = Image.new("RGBA", (WIDTH, WIDTH))
|
||||
draw = ImageDraw.Draw(im)
|
||||
loc = [0, 0]
|
||||
toks = text.split()
|
||||
toks.reverse()
|
||||
text_commands = []
|
||||
while toks:
|
||||
chunk = []
|
||||
while draw.textbbox(loc, " ".join(chunk), **render_params)[2] < WIDTH:
|
||||
if not toks: break
|
||||
chunk.append(toks.pop())
|
||||
else: toks.append(chunk.pop())
|
||||
bbox = draw.textbbox(loc, " ".join(chunk), **render_params)
|
||||
text_commands.append((tuple(loc), " ".join(chunk)))
|
||||
loc[1] = bbox[3]
|
||||
draw.rectangle([0, 0, WIDTH, loc[1]], fill="white")
|
||||
for loc, text in text_commands:
|
||||
draw.text(loc, text, fill="black", **render_params)
|
||||
return im
|
||||
|
||||
Pause = namedtuple("Pause", ["length"])
|
||||
|
||||
text = open("/home/osmarks/Downloads/seq1.txt").read()
|
||||
|
||||
tts = TTS("/home/osmarks/Downloads/en_us_hifi_jets_cpu.addon")
|
||||
#supported_speakers = tts.get_speakers()
|
||||
speaker = "6670"
|
||||
|
||||
def chunks(text: str) -> list[str | Pause]:
|
||||
out = []
|
||||
for line in text.splitlines():
|
||||
if line:
|
||||
for sent in nltk.sent_tokenize(line):
|
||||
out.append(sent)
|
||||
out.append(Pause(0.5))
|
||||
out.append(Pause(1))
|
||||
return out
|
||||
|
||||
RATE = tts.get_sampling_rate()
|
||||
FPS = 30
|
||||
|
||||
def wavblank(seconds):
|
||||
return struct.pack(">h", 0) * round(seconds * RATE / 2)
|
||||
|
||||
blank_frame = render_text("")
|
||||
|
||||
fourcc = cv2.VideoWriter_fourcc(*"MJPG")
|
||||
video_writer = cv2.VideoWriter(VIDEO, fourcc, FPS, (WIDTH, WIDTH))
|
||||
total_dur = 0
|
||||
with wave.open(AUDIO, "w") as fp:
|
||||
fp.setparams((1, 2, RATE, 0, "NONE", "NONE"))
|
||||
for chunk in chunks(text):
|
||||
if isinstance(chunk, str):
|
||||
samples = tts.synthesize(chunk, speaker)
|
||||
image = render_text(chunk)
|
||||
elif isinstance(chunk, Pause):
|
||||
samples = wavblank(chunk.length)
|
||||
image = blank_frame
|
||||
fp.writeframes(samples)
|
||||
duration = len(samples) / RATE # what
|
||||
total_dur += duration
|
||||
frame = cv2.cvtColor(numpy.array(image), cv2.COLOR_RGBA2BGR)
|
||||
for _ in range(round(duration * FPS)): video_writer.write(frame)
|
||||
print(chunk, duration)
|
||||
video_writer.release()
|
||||
subprocess.run([
|
||||
"ffmpeg",
|
||||
"-i", BACKDROP, "-i", AUDIO, "-i", VIDEO,
|
||||
"-filter_complex", "overlay=x=200:y=200,format=nv12,hwupload",
|
||||
"-to", str(total_dur),
|
||||
"-y",
|
||||
"-vaapi_device", "/dev/dri/renderD128", "-c:v", "h264_vaapi",
|
||||
OUTPUT
|
||||
]).check_returncode()
|
||||
Reference in New Issue
Block a user