# Copyright (c) Alibaba, Inc. and its affiliates. import os import secrets import tempfile from http import HTTPStatus from pathlib import Path import gradio as gr import modelscope_studio.components.antd as antd import modelscope_studio.components.base as ms from PIL import Image from urllib3.exceptions import HTTPError os.environ['DASHSCOPE_HTTP_BASE_URL'] = 'https://dashscope.aliyuncs.com/api/v1' # os.environ['DASHSCOPE_WEBSOCKET_BASE_URL'] = 'https://poc-dashscope.aliyuncs.com/api-ws/v1/inference' import dashscope from dashscope import MultiModalConversation API_KEY = os.environ.get('API_KEY') dashscope.api_key = API_KEY is_modelscope_studio = os.getenv('MODELSCOPE_ENVIRONMENT') == 'studio' def get_text(text: str, cn_text: str): if is_modelscope_studio: return cn_text return text def resolve_image(filename): return os.path.join(os.path.dirname(__file__), filename) DEMO_LIST = [ { "description": "Evaluate the integral of the functions graphed using the formula for circles: ", "image": resolve_image("./examples/1.webp") }, { "description": "请解答这道题", "image": resolve_image("./examples/5.png") }, { "description": "图片中的滤液E是什么化学物质?", "image": resolve_image("./examples/3.png") }, { "description": "How many pelicans are there in the picture", "image": resolve_image("./examples/6.png") }, ] def process_image(image, shouldConvert=False): # 获取上传文件的目录 uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str( Path(tempfile.gettempdir()) / "gradio") os.makedirs(uploaded_file_dir, exist_ok=True) # 创建临时文件路径 name = f"tmp{secrets.token_hex(20)}.jpg" filename = os.path.join(uploaded_file_dir, name) # 保存上传的图片 if shouldConvert: new_img = Image.new('RGB', size=(image.width, image.height), color=(255, 255, 255)) new_img.paste(image, (0, 0), mask=image) image = new_img image.save(filename) return filename def on_clear(): return { input: gr.update(value=None), **{ item: gr.update(value=None) for item in input_image }, } css = """ .output-markdown { overflow: unset !important; } """ with gr.Blocks(css=css) as demo: with ms.Application() as app: with antd.ConfigProvider( locale="zh_CN" if is_modelscope_studio else None, theme=dict(token=dict(colorPrimary="#a855f7"))): with antd.Card(elem_style=dict(marginBottom=12), styles=dict(body=dict(padding=4))): with antd.Flex(elem_style=dict(width="100%"), justify="center", align="center", gap=14): with ms.Div(elem_style=dict(flexShrink=0)): antd.Image(resolve_image("./cutelogo.jpg"), preview=False, height=60, width=60) with ms.Div(): antd.Typography.Title("QVQ-72B-Preview", elem_style=dict(margin=0, fontSize=24), level=1) with ms.AutoLoading(): with antd.Row(gutter=[8, 8], align="stretch"): with antd.Col(xs=24, md=8): with antd.Space(direction="vertical", elem_style=dict(width="100%")): with antd.Space(direction="vertical", elem_style=dict(width="100%"), elem_id="input-container"): with ms.Fragment(): input_image = gr.Image(type="pil", label="Upload", sources=["upload"]), input = antd.Input.Textarea( placeholder=get_text( "Ask a question", "输入一个问题"), auto_size=dict(maxRows=6, minRows=2), allow_clear=True) with antd.Flex(align="center", justify="space-between"): antd.Typography.Text(get_text( "Warning: This model only supports single-turn dialogue.", "注:当前模型只支持单轮对话,如需中文回答,提示词加“用中文回答”"), type="warning") tour_btn = antd.Button(get_text( "Tour", "使用指引"), variant="filled", color="default") with antd.Row(gutter=8): with antd.Col(span=12): clear_btn = antd.Button(get_text( "Clear", "清除"), block=True) with antd.Col(span=12): submit_btn = antd.Button( get_text("Submit", "提交"), type="primary", block=True, elem_id="submit-btn") antd.Divider(get_text("Examples", "示例")) with antd.Flex(gap="small", wrap=True): for item in DEMO_LIST: def bind_on_example(_item): def on_example(): return gr.update( value=_item['description'] ), gr.update(value=_item['image']) return on_example with antd.Card( hoverable=True, elem_style=dict( width="100%")) as example: if "description" in item: antd.Typography.Text( item["description"]) if "image" in item: antd.Image(item["image"], preview=False) example.click( fn=bind_on_example(item), outputs=[input, input_image[0]]) with antd.Col(xs=24, md=16): with antd.Card(title=get_text("Answer", "答案"), elem_style=dict(height="100%"), elem_id="output-container"): with ms.Slot("extra"): cancel_btn = antd.Button(get_text( "Stop", "停止"), elem_id="cancel-btn", block=True, disabled=True) with ms.Div(elem_style=dict( maxHeight=1600, display="flex", flexDirection="column-reverse", overflow="auto")): output = gr.Markdown( show_copy_button=True, elem_classes="output-markdown", latex_delimiters=[{ "left": '$$', "right": '$$', "display": True }, { "left": '$', "right": '$', "display": False, }, { "left": '\\(', "right": '\\)', "display": False, }, { "left": '\\[', "right": '\\]', "display": True }]) with antd.Tour(open=False) as tour: antd.Tour.Step( title=get_text("Step 1", "步骤 1"), description=get_text("Upload image and enter text", "传入图片和文本"), get_target= "() => document.querySelector('#input-container')") antd.Tour.Step( title=get_text("Step 2", "步骤 2"), description=get_text("Click the submit button", "点击提交按钮"), get_target="() => document.querySelector('#submit-btn')" ) antd.Tour.Step( title=get_text("Step 3", "步骤 3"), description=get_text("Wait for the result", "等待结果返回"), get_target= "() => document.querySelector('#output-container')") antd.Tour.Step( title=get_text("Tips", "提示"), description=get_text("Click here to end output early", "点击这里提前结束输出"), get_target="() => document.querySelector('#cancel-btn')" ) tour_btn.click(fn=lambda: gr.update(open=True), outputs=[tour]) gr.on([tour.finish, tour.close], fn=lambda: gr.update(open=False), outputs=[tour]) def generate(image, query): content = [] if not image and not query: raise gr.Error( get_text("Error: Input is empty", "错误:输入内容为空")) if image: imageFile = process_image(image) content.append({'image': f'file://{imageFile}'}) if query: content.append({'text': query}) print("image", image) print("query", query) messages = [ { 'role': 'user', 'content': content }, ] responses = MultiModalConversation.call( model='qvq-72b-preview', messages=messages, stream=True, ) yield {cancel_btn: gr.update(disabled=False)} for response in responses: if not response.status_code == HTTPStatus.OK: raise HTTPError( f'response.code: {response.code}\nresponse.message: {response.message}' ) response = response.output.choices[0].message.content if len(response) > 0 and response[0]['text']: print(response[0]['text']) yield {output: response[0]['text']} yield {cancel_btn: gr.update(disabled=True)} output_process = submit_btn.click(fn=generate, inputs=[*input_image, input], outputs=[output, cancel_btn]) clear_btn.click(fn=on_clear, outputs=[*input_image, input]) cancel_btn.click(fn=None, inputs=None, outputs=None, cancels=[output_process]) cancel_btn.click(fn=lambda: gr.update(disabled=True), inputs=None, outputs=[cancel_btn]) demo.queue(default_concurrency_limit=50).launch(ssr_mode=False)