import os import subprocess import time import threading from flask import Flask, Response
app = Flask(name)
@app.route("/
def generate():
try:
process = subprocess.Popen(
[
"/opt/llama.cpp/main",
"-ngl",
"32",
"-m",
"/opt/llama.cpp/models/wizardLM-7B.ggmlv3.q4_0.bin",
"-n",
"1024",
"-p",
f"{prompt}\n\n### Response:",
],
stdout=subprocess.PIPE,
)
for c in iter(lambda: process.stdout.read(1), b""):
yield c
finally:
process.kill()
return Response(generate(), mimetype="text/plain")
path = "/srv/http/pages/textgen"
def fixperms(): time.sleep(0.1) os.chmod(path, 660)
threading.Thread(target=fixperms).start()
app.run(host="unix://" + path)