Kastg commited on
Commit
9dd4f46
·
verified ·
1 Parent(s): 299dc4c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -25
app.py CHANGED
@@ -1,30 +1,39 @@
 
 
1
  import gradio as gr
2
- from llama_cpp import Llama
3
 
4
- llm = Llama(model_path="model.gguf", n_ctx=4000, n_threads=2, chat_format="chatml")
5
-
6
- def generate(message, history,temperature=0.3,max_tokens=512):
7
- system_prompt = "You are OpenChat, an userful AI assistant."
8
- formatted_prompt = [{"role": "system", "content": system_prompt}]
9
- for user_prompt, bot_response in history:
10
- formatted_prompt.append({"role": "user", "content": user_prompt})
11
- formatted_prompt.append({"role": "assistant", "content": bot_response })
12
- formatted_prompt.append({"role": "user", "content": message})
13
- stream_response = llm.create_chat_completion(messages=formatted_prompt, temperature=temperature, max_tokens=max_tokens, stream=True)
14
- response = ""
15
- for chunk in stream_response:
16
- if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
17
- response += chunk['choices'][0]["delta"]["content"]
18
- yield response
19
 
20
- mychatbot = gr.Chatbot(
21
- avatar_images=["user.png", "botoc.png"], bubble_full_width=False, show_label=False, show_copy_button=True, likeable=True,)
22
-
23
- iface = gr.ChatInterface(fn=generate, chatbot=mychatbot, retry_btn=None, undo_btn=None)
24
 
25
- with gr.Blocks() as demo:
26
- gr.HTML("<center><h1>Tomoniai's Chat with OpenChat-3.5</h1></center>")
27
- iface.render()
 
 
 
 
28
 
29
- demo.queue().launch(show_api=False, server_name="0.0.0.0")
30
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, HTTPException, Request
2
+ from fastapi.responses import JSONResponse
3
  import gradio as gr
 
4
 
5
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
 
7
+ llm = gr.Llama(model_path="model.gguf", n_ctx=4000, n_threads=2, chat_format="chatml")
 
 
 
8
 
9
+ @app.post("/api/v1/chat")
10
+ async def chat_post(request: Request):
11
+ data = await request.json()
12
+ message = data.get("message")
13
+ history = data.get("history", [])
14
+ temperature = data.get("temperature", 0.3)
15
+ max_tokens = data.get("max_tokens", 512)
16
 
17
+ async def generate():
18
+ system_prompt = "You are OpenChat, a useful AI assistant."
19
+ formatted_prompt = [{"role": "system", "content": system_prompt}]
20
+ for user_prompt, bot_response in history:
21
+ formatted_prompt.append({"role": "user", "content": user_prompt})
22
+ formatted_prompt.append({"role": "assistant", "content": bot_response })
23
+ formatted_prompt.append({"role": "user", "content": message})
24
+ stream_response = llm.create_chat_completion(messages=formatted_prompt, temperature=temperature, max_tokens=max_tokens, stream=True)
25
+ response = ""
26
+ for chunk in stream_response:
27
+ if len(chunk['choices'][0]["delta"]) != 0 and "content" in chunk['choices'][0]["delta"]:
28
+ response += chunk['choices'][0]["delta"]["content"]
29
+ yield response
30
+
31
+ return JSONResponse(content={"response": await generate()})
32
+
33
+ @app.get("/api/v1/chat")
34
+ async def chat_get():
35
+ return {"message": "Send a POST request to this endpoint to chat."}
36
+
37
+ if __name__ == "__main__":
38
+ import uvicorn
39
+ uvicorn.run(app, host="0.0.0.0", port=8000)