Spaces:

Qwen
/

QVQ-72B-preview

Running

App Files Files Community

QVQ-72B-preview / app.py

littlebird13

Update app.py

51882e5 verified about 17 hours ago

raw

history blame contribute delete

13.6 kB

	# Copyright (c) Alibaba, Inc. and its affiliates.
	import os
	import secrets
	import tempfile
	from http import HTTPStatus
	from pathlib import Path

	import gradio as gr
	import modelscope_studio.components.antd as antd
	import modelscope_studio.components.base as ms
	from PIL import Image
	from urllib3.exceptions import HTTPError

	os.environ['DASHSCOPE_HTTP_BASE_URL'] = 'https://dashscope.aliyuncs.com/api/v1'
	# os.environ['DASHSCOPE_WEBSOCKET_BASE_URL'] = 'https://poc-dashscope.aliyuncs.com/api-ws/v1/inference'

	import dashscope
	from dashscope import MultiModalConversation

	API_KEY = os.environ.get('API_KEY')
	dashscope.api_key = API_KEY

	is_modelscope_studio = os.getenv('MODELSCOPE_ENVIRONMENT') == 'studio'


	def get_text(text: str, cn_text: str):
	if is_modelscope_studio:
	return cn_text
	return text


	def resolve_image(filename):
	return os.path.join(os.path.dirname(__file__), filename)


	DEMO_LIST = [
	{
	"description":
	"Evaluate the integral of the functions graphed using the formula for circles: ",
	"image": resolve_image("./examples/1.webp")
	},
	{
	"description": "请解答这道题",
	"image": resolve_image("./examples/5.png")
	},
	{
	"description": "图片中的滤液E是什么化学物质?",
	"image": resolve_image("./examples/3.png")
	},
	{
	"description": "How many pelicans are there in the picture",
	"image": resolve_image("./examples/6.png")
	},
	]


	def process_image(image, shouldConvert=False):
	# 获取上传文件的目录
	uploaded_file_dir = os.environ.get("GRADIO_TEMP_DIR") or str(
	Path(tempfile.gettempdir()) / "gradio")
	os.makedirs(uploaded_file_dir, exist_ok=True)

	# 创建临时文件路径
	name = f"tmp{secrets.token_hex(20)}.jpg"
	filename = os.path.join(uploaded_file_dir, name)
	# 保存上传的图片
	if shouldConvert:
	new_img = Image.new('RGB',
	size=(image.width, image.height),
	color=(255, 255, 255))
	new_img.paste(image, (0, 0), mask=image)
	image = new_img
	image.save(filename)

	return filename


	def on_clear():
	return {
	input: gr.update(value=None),
	**{
	item: gr.update(value=None)
	for item in input_image
	},
	}


	css = """
	.output-markdown {
	overflow: unset !important;
	}
	"""

	with gr.Blocks(css=css) as demo:
	with ms.Application() as app:
	with antd.ConfigProvider(
	locale="zh_CN" if is_modelscope_studio else None,
	theme=dict(token=dict(colorPrimary="#a855f7"))):
	with antd.Card(elem_style=dict(marginBottom=12),
	styles=dict(body=dict(padding=4))):
	with antd.Flex(elem_style=dict(width="100%"),
	justify="center",
	align="center",
	gap=14):
	with ms.Div(elem_style=dict(flexShrink=0)):
	antd.Image(resolve_image("./cutelogo.jpg"),
	preview=False,
	height=60,
	width=60)
	with ms.Div():
	antd.Typography.Title("QVQ-72B-Preview",
	elem_style=dict(margin=0,
	fontSize=24),
	level=1)
	with ms.AutoLoading():
	with antd.Row(gutter=[8, 8], align="stretch"):
	with antd.Col(xs=24, md=8):
	with antd.Space(direction="vertical",
	elem_style=dict(width="100%")):
	with antd.Space(direction="vertical",
	elem_style=dict(width="100%"),
	elem_id="input-container"):
	with ms.Fragment():
	input_image = gr.Image(type="pil",
	label="Upload",
	sources=["upload"]),
	input = antd.Input.Textarea(
	placeholder=get_text(
	"Ask a question", "输入一个问题"),
	auto_size=dict(maxRows=6, minRows=2),
	allow_clear=True)

	with antd.Flex(align="center",
	justify="space-between"):
	antd.Typography.Text(get_text(
	"Warning: This model only supports single-turn dialogue.",
	"注：当前模型只支持单轮对话，如需中文回答，提示词加“用中文回答”"),
	type="warning")
	tour_btn = antd.Button(get_text(
	"Tour", "使用指引"),
	variant="filled",
	color="default")

	with antd.Row(gutter=8):
	with antd.Col(span=12):
	clear_btn = antd.Button(get_text(
	"Clear", "清除"),
	block=True)
	with antd.Col(span=12):
	submit_btn = antd.Button(
	get_text("Submit", "提交"),
	type="primary",
	block=True,
	elem_id="submit-btn")

	antd.Divider(get_text("Examples", "示例"))

	with antd.Flex(gap="small", wrap=True):
	for item in DEMO_LIST:

	def bind_on_example(_item):

	def on_example():
	return gr.update(
	value=_item['description']
	), gr.update(value=_item['image'])

	return on_example

	with antd.Card(
	hoverable=True,
	elem_style=dict(
	width="100%")) as example:
	if "description" in item:
	antd.Typography.Text(
	item["description"])
	if "image" in item:
	antd.Image(item["image"],
	preview=False)
	example.click(
	fn=bind_on_example(item),
	outputs=[input, input_image[0]])

	with antd.Col(xs=24, md=16):
	with antd.Card(title=get_text("Answer", "答案"),
	elem_style=dict(height="100%"),
	elem_id="output-container"):
	with ms.Slot("extra"):
	cancel_btn = antd.Button(get_text(
	"Stop", "停止"),
	elem_id="cancel-btn",
	block=True,
	disabled=True)
	with ms.Div(elem_style=dict(
	maxHeight=1600,
	display="flex",
	flexDirection="column-reverse",
	overflow="auto")):
	output = gr.Markdown(
	show_copy_button=True,
	elem_classes="output-markdown",
	latex_delimiters=[{
	"left": '$$',
	"right": '$$',
	"display": True
	}, {
	"left": '$',
	"right": '$',
	"display": False,
	}, {
	"left": '\\(',
	"right": '\\)',
	"display": False,
	}, {
	"left": '\\[',
	"right": '\\]',
	"display": True
	}])
	with antd.Tour(open=False) as tour:
	antd.Tour.Step(
	title=get_text("Step 1", "步骤 1"),
	description=get_text("Upload image and enter text",
	"传入图片和文本"),
	get_target=
	"() => document.querySelector('#input-container')")
	antd.Tour.Step(
	title=get_text("Step 2", "步骤 2"),
	description=get_text("Click the submit button",
	"点击提交按钮"),
	get_target="() => document.querySelector('#submit-btn')"
	)
	antd.Tour.Step(
	title=get_text("Step 3", "步骤 3"),
	description=get_text("Wait for the result", "等待结果返回"),
	get_target=
	"() => document.querySelector('#output-container')")
	antd.Tour.Step(
	title=get_text("Tips", "提示"),
	description=get_text("Click here to end output early",
	"点击这里提前结束输出"),
	get_target="() => document.querySelector('#cancel-btn')"
	)

	tour_btn.click(fn=lambda: gr.update(open=True), outputs=[tour])
	gr.on([tour.finish, tour.close],
	fn=lambda: gr.update(open=False),
	outputs=[tour])

	def generate(image, query):
	content = []
	if not image and not query:
	raise gr.Error(
	get_text("Error: Input is empty", "错误：输入内容为空"))
	if image:
	imageFile = process_image(image)
	content.append({'image': f'file://{imageFile}'})
	if query:
	content.append({'text': query})

	print("image", image)
	print("query", query)
	messages = [
	{
	'role': 'user',
	'content': content
	},
	]

	responses = MultiModalConversation.call(
	model='qvq-72b-preview',
	messages=messages,
	stream=True,
	)
	yield {cancel_btn: gr.update(disabled=False)}
	for response in responses:
	if not response.status_code == HTTPStatus.OK:
	raise HTTPError(
	f'response.code: {response.code}\nresponse.message: {response.message}'
	)
	response = response.output.choices[0].message.content
	if len(response) > 0 and response[0]['text']:
	print(response[0]['text'])
	yield {output: response[0]['text']}
	yield {cancel_btn: gr.update(disabled=True)}

	output_process = submit_btn.click(fn=generate,
	inputs=[*input_image, input],
	outputs=[output, cancel_btn])
	clear_btn.click(fn=on_clear, outputs=[*input_image, input])
	cancel_btn.click(fn=None,
	inputs=None,
	outputs=None,
	cancels=[output_process])
	cancel_btn.click(fn=lambda: gr.update(disabled=True),
	inputs=None,
	outputs=[cancel_btn])

	demo.queue(default_concurrency_limit=50).launch(ssr_mode=False)