Spaces:

nananie143
/

agentic-system

Runtime error

agentic-system / space.yml

Cascade Bot

Added llama-cpp-python and system dependencies

6cc7431 3 days ago

2.75 kB

	title: Advanced Agentic System
	emoji: 🤖
	colorFrom: indigo
	colorTo: purple
	sdk: gradio
	sdk_version: latest
	app_file: startup.sh
	pinned: true
	license: apache-2.0
	duplicated_from: nananie143/agentic-system
	python_version: "3.10"
	cuda: "11.8"
	hardware: t4-medium

	# System requirements
	compute:
	instance: t4-medium
	storage: large

	# Environment setup
	env:
	- MODEL_BACKEND=groq
	- GROQ_API_KEY # This will be loaded from repository secrets
	- ENABLE_LOCAL_FALLBACK=true
	- CACHE_MODELS=false
	- GRADIO_SERVER_PORT=7860
	- GRADIO_SERVER_NAME=0.0.0.0
	- MAX_PARALLEL_REQUESTS=10
	- REQUEST_TIMEOUT=30
	- BATCH_SIZE=4
	- GRADIO_ANALYTICS_ENABLED=false
	- PYTHONUNBUFFERED=1
	- SPACE_CACHE_DIR=/data/models
	- TORCH_CUDA_ARCH_LIST="7.5"
	- CUDA_VISIBLE_DEVICES=0

	# Model configurations
	models:
	- rrbale/pruned-qwen-moe/model-Q6_K.gguf
	- YorkieOH10/deepseek-coder-6.7B-kexer-Q8_0-GGUF/model.gguf
	- Nidum-Llama-3.2-3B-Uncensored-GGUF/model-Q6_K.gguf
	- deepseek-ai/JanusFlow-1.3B/model.gguf
	- prithivMLmods/QwQ-4B-Instruct/model.gguf
	- gpt-omni/mini-omni2/mini-omni2.gguf

	# Dependencies
	dependencies:
	python:
	- "gradio>=4.44.1"
	- "groq>=0.4.1"
	- "fastapi>=0.68.0"
	- "uvicorn>=0.15.0"
	- "pydantic>=2.0.0"
	- "python-dotenv>=0.19.0"
	- "aiohttp>=3.8.0"
	- "asyncio>=3.4.3"
	- "numpy>=1.24.0"
	- "pandas>=2.1.0"
	- "scikit-learn>=1.3.2"
	- "plotly>=5.18.0"
	- "networkx>=3.2.1"
	- "llama-cpp-python>=0.2.23" # Added for local LLM support
	system:
	- git-lfs
	- cmake
	- ninja-build # For faster builds
	- build-essential # Required for compilation
	- cuda-toolkit-11-8
	- nvidia-cuda-toolkit
	- libcudnn8

	# Inference settings
	inference:
	model_backend: groq
	models:
	- name: mixtral-8x7b-32768
	provider: groq
	max_tokens: 32768
	- name: llama2-70b-4096
	provider: groq
	max_tokens: 4096
	fallback:
	enabled: true
	provider: huggingface
	model: mistral-7b-instruct-v0.2

	# Resource limits
	resources:
	memory: 16
	cpu: 4
	gpu: 1
	gpu_memory: 16
	disk: 50

	# Monitoring
	monitoring:
	enable_logging: true
	log_level: INFO
	metrics_enabled: true

	# Build configuration
	build:
	system_packages:
	- cmake
	- build-essential
	- cuda-toolkit-11-8
	- nvidia-cuda-toolkit
	- libcudnn8
	python_packages:
	- --upgrade pip
	- -r requirements.txt
	- torch --index-url https://download.pytorch.org/whl/cu118
	- llama-cpp-python --no-cache-dir

	# Runtime configuration
	runtime:
	build:
	cuda: "11.8"
	python: "3.10"
	env:
	- PYTHONUNBUFFERED=1
	- GRADIO_SERVER_NAME=0.0.0.0
	- TORCH_CUDA_ARCH_LIST="7.5"
	- CUDA_VISIBLE_DEVICES=0
	- GRADIO_ANALYTICS_ENABLED=false