usag1e commited on
Commit
8e1f74f
·
1 Parent(s): aefd37c

Update app.py with the latest changes

Browse files
Files changed (1) hide show
  1. app.py +17 -23
app.py CHANGED
@@ -1,31 +1,25 @@
1
- from fastapi import FastAPI, HTTPException, Request
2
  from pydantic import BaseModel
3
- import logging
 
4
 
5
- # Set up logging
6
- logging.basicConfig(level=logging.INFO)
7
- logger = logging.getLogger("LLM-API")
 
 
8
 
9
  app = FastAPI()
10
 
11
- # Define the input schema
12
- class InputText(BaseModel):
13
  input_text: str
14
 
15
- @app.get("/")
16
- def root():
17
- logger.info("Root endpoint called.")
18
- return {"message": "Welcome to the LLM API"}
19
-
20
  @app.post("/predict")
21
- async def predict(data: InputText, request: Request):
22
- logger.info("Received request: %s", await request.body())
23
- try:
24
- # Log the received input
25
- input_text = data.input_text
26
- logger.info(f"Processing input: {input_text}")
27
- # Return a mock response for now
28
- return {"response": f"The input was: {input_text}"}
29
- except Exception as e:
30
- logger.error(f"Error occurred: {e}")
31
- raise HTTPException(status_code=500, detail=str(e))
 
1
+ from fastapi import FastAPI, HTTPException
2
  from pydantic import BaseModel
3
+ from transformers import AutoModelForCausalLM, AutoTokenizer
4
+ import torch
5
 
6
+ # Load the model and tokenizer
7
+ MODEL_NAME = "deepseek-ai/DeepSeek-V3-Base" # Change to the model you want
8
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
9
+ tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
10
+ model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, device_map="auto").to(device)
11
 
12
  app = FastAPI()
13
 
14
+ class Query(BaseModel):
 
15
  input_text: str
16
 
 
 
 
 
 
17
  @app.post("/predict")
18
+ async def predict(query: Query):
19
+ input_text = query.input_text
20
+ if not input_text:
21
+ raise HTTPException(status_code=400, detail="Input text cannot be empty.")
22
+ inputs = tokenizer(input_text, return_tensors="pt").to(device)
23
+ outputs = model.generate(inputs["input_ids"], max_new_tokens=50, temperature=0.7)
24
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
25
+ return {"response": response}