jina-embeddings-v3

Sleeping

LINYINGHAO commited on Nov 6, 2024

Commit

4d3f40f

verified ·

1 Parent(s): 0a567c1

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+import gradio as gr
+from transformers import AutoTokenizer, AutoModel
+import torch
+# 1. 加载模型和分词器
+model_name = "jinaai/jina-embeddings-v3"  # 替换为您实际使用的模型名
+tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
+model = AutoModel.from_pretrained(model_name, trust_remote_code=True)
+# 2. 定义生成嵌入的函数
+def generate_embeddings(text):
+    # 使用分词器处理输入文本
+    inputs = tokenizer(text, return_tensors="pt")
+    # 禁用梯度计算，以减少资源消耗
+    with torch.no_grad():
+        # 获取最后一层隐藏状态并计算平均值作为嵌入
+        embeddings = model(**inputs).last_hidden_state.mean(dim=1)
+    # 将嵌入转换为Python列表，方便Gradio输出
+    return embeddings.numpy().tolist()
+# 3. 使用Gradio定义接口
+iface = gr.Interface(
+    fn=generate_embeddings,  # 调用嵌入生成函数
+    inputs="text",           # 输入类型为文本
+    outputs="json",          # 输出为JSON格式，方便API调用
+    title="Text Embedding Generator",
+    description="Enter text to generate embeddings using the Jina model."
+)
+# 4. 启动Gradio应用
+if __name__ == "__main__":
+    iface.launch()