Spaces:

Lyte
/

DeepSeek-R1-Distill-Qwen-1.5B-Demo-GGUF

Running

Upload start.sh

5ddae8e verified 13 days ago

400 Bytes

	#!/bin/bash

	# Start llama-server in background
	cd /llama.cpp/build
	./bin/llama-server --host 0.0.0.0 --port 8080 --model /models/model.q8_0.gguf --ctx-size 32768 &

	# Wait for server to initialize
	echo "Waiting for server to start..."
	until curl -s "http://localhost:8080/v1/models" >/dev/null; do
	sleep 1
	done

	echo "Server is ready. Starting Gradio app."

	# Start Gradio UI
	cd /
	python3 app.py