Spaces:
Runtime error
Runtime error
title: Advanced Agentic System | |
emoji: π€ | |
colorFrom: indigo | |
colorTo: purple | |
sdk: gradio | |
sdk_version: latest | |
app_file: startup.sh | |
pinned: true | |
license: apache-2.0 | |
duplicated_from: nananie143/agentic-system | |
python_version: "3.10" | |
cuda: "11.8" | |
hardware: t4-medium | |
# System requirements | |
compute: | |
instance: t4-medium | |
storage: large | |
# Environment setup | |
env: | |
- MODEL_BACKEND=groq | |
- GROQ_API_KEY # This will be loaded from repository secrets | |
- ENABLE_LOCAL_FALLBACK=true | |
- CACHE_MODELS=false | |
- GRADIO_SERVER_PORT=7860 | |
- GRADIO_SERVER_NAME=0.0.0.0 | |
- MAX_PARALLEL_REQUESTS=10 | |
- REQUEST_TIMEOUT=30 | |
- BATCH_SIZE=4 | |
- GRADIO_ANALYTICS_ENABLED=false | |
- PYTHONUNBUFFERED=1 | |
- SPACE_CACHE_DIR=/data/models | |
- TORCH_CUDA_ARCH_LIST="7.5" | |
- CUDA_VISIBLE_DEVICES=0 | |
# Model configurations | |
models: | |
- rrbale/pruned-qwen-moe/model-Q6_K.gguf | |
- YorkieOH10/deepseek-coder-6.7B-kexer-Q8_0-GGUF/model.gguf | |
- Nidum-Llama-3.2-3B-Uncensored-GGUF/model-Q6_K.gguf | |
- deepseek-ai/JanusFlow-1.3B/model.gguf | |
- prithivMLmods/QwQ-4B-Instruct/model.gguf | |
- gpt-omni/mini-omni2/mini-omni2.gguf | |
# Dependencies | |
dependencies: | |
python: | |
- "gradio>=4.44.1" | |
- "groq>=0.4.1" | |
- "fastapi>=0.68.0" | |
- "uvicorn>=0.15.0" | |
- "pydantic>=2.0.0" | |
- "python-dotenv>=0.19.0" | |
- "aiohttp>=3.8.0" | |
- "asyncio>=3.4.3" | |
- "numpy>=1.24.0" | |
- "pandas>=2.1.0" | |
- "scikit-learn>=1.3.2" | |
- "plotly>=5.18.0" | |
- "networkx>=3.2.1" | |
- "llama-cpp-python>=0.2.23" # Added for local LLM support | |
system: | |
- git-lfs | |
- cmake | |
- ninja-build # For faster builds | |
- build-essential # Required for compilation | |
- cuda-toolkit-11-8 | |
- nvidia-cuda-toolkit | |
- libcudnn8 | |
# Inference settings | |
inference: | |
model_backend: groq | |
models: | |
- name: mixtral-8x7b-32768 | |
provider: groq | |
max_tokens: 32768 | |
- name: llama2-70b-4096 | |
provider: groq | |
max_tokens: 4096 | |
fallback: | |
enabled: true | |
provider: huggingface | |
model: mistral-7b-instruct-v0.2 | |
# Resource limits | |
resources: | |
memory: 16 | |
cpu: 4 | |
gpu: 1 | |
gpu_memory: 16 | |
disk: 50 | |
# Monitoring | |
monitoring: | |
enable_logging: true | |
log_level: INFO | |
metrics_enabled: true | |
# Build configuration | |
build: | |
system_packages: | |
- cmake | |
- build-essential | |
- cuda-toolkit-11-8 | |
- nvidia-cuda-toolkit | |
- libcudnn8 | |
python_packages: | |
- --upgrade pip | |
- -r requirements.txt | |
- torch --index-url https://download.pytorch.org/whl/cu118 | |
- llama-cpp-python --no-cache-dir | |
# Runtime configuration | |
runtime: | |
build: | |
cuda: "11.8" | |
python: "3.10" | |
env: | |
- PYTHONUNBUFFERED=1 | |
- GRADIO_SERVER_NAME=0.0.0.0 | |
- TORCH_CUDA_ARCH_LIST="7.5" | |
- CUDA_VISIBLE_DEVICES=0 | |
- GRADIO_ANALYTICS_ENABLED=false | |