asoria HF staff commited on
Commit
ad4860f
·
verified ·
1 Parent(s): ccd4941

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -4
app.py CHANGED
@@ -6,6 +6,7 @@ from httpx import Client
6
  from huggingface_hub import HfApi
7
  from huggingface_hub.utils import logging
8
  from llama_cpp import Llama
 
9
 
10
  load_dotenv()
11
 
@@ -24,10 +25,13 @@ headers = {
24
  logger = logging.get_logger(__name__)
25
  client = Client(headers=headers)
26
  api = HfApi(token=HF_TOKEN)
 
 
27
  llama = Llama(
28
  model_path="DuckDB-NSQL-7B-v0.1-q8_0.gguf",
29
  n_ctx=2048,
30
  )
 
31
 
32
  def get_first_parquet(dataset: str):
33
  resp = client.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset}")
@@ -59,9 +63,9 @@ def text2sql(dataset_name, query_input):
59
  print(first_parquet_url)
60
  con = duckdb.connect()
61
  con.execute("INSTALL 'httpfs'; LOAD httpfs;")
 
62
  con.execute(f"CREATE TABLE data as SELECT * FROM '{first_parquet_url}' LIMIT 1;")
63
  result = con.sql("SELECT sql FROM duckdb_tables() where table_name ='data';").df()
64
- con.close()
65
 
66
  ddl_create = result.iloc[0,0]
67
  text = f"""### Instruction:
@@ -73,7 +77,7 @@ def text2sql(dataset_name, query_input):
73
  ### Question:
74
  {query_input}
75
 
76
- ### Response (use duckdb shorthand if possible):
77
  """
78
 
79
  print(text)
@@ -81,7 +85,17 @@ def text2sql(dataset_name, query_input):
81
  # sql_output = query_remote_model(text)
82
 
83
  sql_output = query_local_model(text)
84
- return sql_output
 
 
 
 
 
 
 
 
 
 
85
 
86
 
87
  with gr.Blocks() as demo:
@@ -92,5 +106,6 @@ with gr.Blocks() as demo:
92
  query_input = gr.Textbox("How many rows there are?", label="Ask something about your data")
93
  btn = gr.Button("Generate SQL")
94
  query_output = gr.Textbox(label="Output SQL", interactive= False)
95
- btn.click(text2sql, inputs=[dataset_name, query_input], outputs=query_output)
 
96
  demo.launch()
 
6
  from huggingface_hub import HfApi
7
  from huggingface_hub.utils import logging
8
  from llama_cpp import Llama
9
+ import pandas as pd
10
 
11
  load_dotenv()
12
 
 
25
  logger = logging.get_logger(__name__)
26
  client = Client(headers=headers)
27
  api = HfApi(token=HF_TOKEN)
28
+
29
+ print("About to load DuckDB-NSQL-7B model")
30
  llama = Llama(
31
  model_path="DuckDB-NSQL-7B-v0.1-q8_0.gguf",
32
  n_ctx=2048,
33
  )
34
+ print("DuckDB-NSQL-7B model has been loaded")
35
 
36
  def get_first_parquet(dataset: str):
37
  resp = client.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset}")
 
63
  print(first_parquet_url)
64
  con = duckdb.connect()
65
  con.execute("INSTALL 'httpfs'; LOAD httpfs;")
66
+ # could get from parquet instead?
67
  con.execute(f"CREATE TABLE data as SELECT * FROM '{first_parquet_url}' LIMIT 1;")
68
  result = con.sql("SELECT sql FROM duckdb_tables() where table_name ='data';").df()
 
69
 
70
  ddl_create = result.iloc[0,0]
71
  text = f"""### Instruction:
 
77
  ### Question:
78
  {query_input}
79
 
80
+ ### Response (use duckdb shorthand if possible) replace table name with {first_parquet_url} in the generated sql query:
81
  """
82
 
83
  print(text)
 
85
  # sql_output = query_remote_model(text)
86
 
87
  sql_output = query_local_model(text)
88
+
89
+ try:
90
+ query_result = con.sql(sql_output).df()
91
+ except Exception as error:
92
+ query_result = pd.DataFrame([{"error": f"❌ Could not execute SQL query {error=}"}])
93
+ finally:
94
+ con.close()
95
+ return {
96
+ query_output:sql_output,
97
+ df:query_result
98
+ }
99
 
100
 
101
  with gr.Blocks() as demo:
 
106
  query_input = gr.Textbox("How many rows there are?", label="Ask something about your data")
107
  btn = gr.Button("Generate SQL")
108
  query_output = gr.Textbox(label="Output SQL", interactive= False)
109
+ df = gr.DataFrame(datatype="markdown")
110
+ btn.click(text2sql, inputs=[dataset_name, query_input], outputs=[query_output,df])
111
  demo.launch()