# Configure bash error handling | |
set -euo pipefail | |
# Configuration | |
API_HOST="localhost" | |
API_PORT="8000" | |
API_VERSION="v1" | |
BASE_URL="http://${API_HOST}:${API_PORT}/api/${API_VERSION}" | |
# Function to generate test embedding data | |
generate_test_embedding() { | |
python3 - <<EOF | |
import numpy as np | |
import json | |
# Generate a 4096-dimensional embedding vector (correct dimension for model) | |
embedding = np.random.randn(4096).astype(np.float32) | |
# Normalize the embedding | |
embedding = embedding / np.linalg.norm(embedding) | |
print(json.dumps(embedding.tolist()), end="") | |
EOF | |
} | |
# Function to test health endpoint | |
test_health() { | |
echo "Testing health endpoint..." | |
curl -s "${BASE_URL}/health" || { | |
echo "Health check failed" | |
exit 1 | |
} | |
} | |
# Function to test inference endpoint | |
test_inference() { | |
echo | |
start_time=$(date +%s) | |
echo "Testing inference endpoint..." | |
local embedding_data=$(generate_test_embedding) | |
curl -X POST "${BASE_URL}/inference" \ | |
-H "Content-Type: application/json" \ | |
-d "{ | |
\"embedding\": ${embedding_data} | |
}" || { | |
echo "Inference request failed" | |
exit 1 | |
} | |
end_time=$(date +%s) | |
duration=$((end_time - start_time)) | |
echo "Inference request completed in ${duration} seconds" | |
} | |
main() { | |
test_health | |
test_inference | |
} | |
main "$@" |