asoria HF staff commited on
Commit
140f5d3
·
verified ·
1 Parent(s): 49e03fb

Disable local models

Browse files
Files changed (1) hide show
  1. app.py +19 -33
app.py CHANGED
@@ -4,10 +4,9 @@ import gradio as gr
4
  from dotenv import load_dotenv
5
  from httpx import Client
6
  from huggingface_hub import HfApi
7
- from huggingface_hub.utils import logging
8
- from llama_cpp import Llama
9
  import pandas as pd
10
- from transformers import pipeline
11
 
12
  load_dotenv()
13
 
@@ -23,19 +22,22 @@ headers = {
23
  "Content-Type": "application/json"
24
  }
25
 
26
- logger = logging.get_logger(__name__)
27
  client = Client(headers=headers)
28
  api = HfApi(token=HF_TOKEN)
29
 
30
- print("About to load DuckDB-NSQL-7B model")
31
- """
32
- llama = Llama(
33
- model_path="DuckDB-NSQL-7B-v0.1-q8_0.gguf",
34
- n_ctx=2048,
35
- )
36
- """
37
- pipe = pipeline("text-generation", model="motherduckdb/DuckDB-NSQL-7B-v0.1")
38
- print("DuckDB-NSQL-7B model has been loaded")
 
 
 
 
39
 
40
  def get_first_parquet(dataset: str):
41
  resp = client.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset}")
@@ -51,17 +53,6 @@ def query_remote_model(text):
51
  pred = response.json()
52
  return pred[0]["generated_text"]
53
 
54
- def query_local_model_transformers(text):
55
- pred = pipe(text, max_length=1000)
56
- print(type(pred))
57
- print(pred)
58
- return pred[0]["generated_text"]
59
-
60
-
61
- def query_local_model(text):
62
- pred = llama(text, temperature=0.1, max_tokens=500)
63
- return pred["choices"][0]["text"]
64
-
65
 
66
  def text2sql(dataset_name, query_input):
67
  print(f"start text2sql for {dataset_name}")
@@ -73,10 +64,9 @@ def text2sql(dataset_name, query_input):
73
  print(first_parquet_url)
74
  con = duckdb.connect()
75
  con.execute("INSTALL 'httpfs'; LOAD httpfs;")
76
- # could get from parquet instead?
77
  con.execute(f"CREATE TABLE data as SELECT * FROM '{first_parquet_url}' LIMIT 1;")
78
  result = con.sql("SELECT sql FROM duckdb_tables() where table_name ='data';").df()
79
-
80
  ddl_create = result.iloc[0,0]
81
 
82
  text = f"""### Instruction:
@@ -92,12 +82,8 @@ def text2sql(dataset_name, query_input):
92
  ### Response (use duckdb shorthand if possible) replace table name with {first_parquet_url} in the generated sql query:
93
  """
94
 
95
- print(text)
96
-
97
  sql_output = query_remote_model(text)
98
 
99
- # sql_output = query_local_model_transformers(text)
100
-
101
  try:
102
  query_result = con.sql(sql_output).df()
103
  except Exception as error:
@@ -111,9 +97,9 @@ def text2sql(dataset_name, query_input):
111
 
112
 
113
  with gr.Blocks() as demo:
114
- gr.Markdown("# Talk to your dataset")
115
- gr.Markdown("This space shows how to talk to your datasets: Get a brief description, create SQL queries, and get results.")
116
- gr.Markdown("Generate SQL queries'")
117
  dataset_name = gr.Textbox("sksayril/medicine-info", label="Dataset Name")
118
  query_input = gr.Textbox("How many rows there are?", label="Ask something about your data")
119
  btn = gr.Button("Generate SQL")
 
4
  from dotenv import load_dotenv
5
  from httpx import Client
6
  from huggingface_hub import HfApi
7
+ #from llama_cpp import Llama
 
8
  import pandas as pd
9
+ #from transformers import pipeline
10
 
11
  load_dotenv()
12
 
 
22
  "Content-Type": "application/json"
23
  }
24
 
 
25
  client = Client(headers=headers)
26
  api = HfApi(token=HF_TOKEN)
27
 
28
+ # First approach: Use llama.cpp
29
+ #llama = Llama(model_path="DuckDB-NSQL-7B-v0.1-q8_0.gguf", n_ctx=2048)
30
+ #def query_local_model(text):
31
+ # pred = llama(text, temperature=0.1, max_tokens=500)
32
+ # return pred["choices"][0]["text"]
33
+
34
+
35
+ # Second approach: Use transformers -> Took too much time
36
+ #pipe = pipeline("text-generation", model="motherduckdb/DuckDB-NSQL-7B-v0.1")
37
+ #def query_local_model_transformers(text):
38
+ # pred = pipe(text, max_length=1000)
39
+ # return pred[0]["generated_text"]
40
+
41
 
42
  def get_first_parquet(dataset: str):
43
  resp = client.get(f"{BASE_DATASETS_SERVER_URL}/parquet?dataset={dataset}")
 
53
  pred = response.json()
54
  return pred[0]["generated_text"]
55
 
 
 
 
 
 
 
 
 
 
 
 
56
 
57
  def text2sql(dataset_name, query_input):
58
  print(f"start text2sql for {dataset_name}")
 
64
  print(first_parquet_url)
65
  con = duckdb.connect()
66
  con.execute("INSTALL 'httpfs'; LOAD httpfs;")
67
+ # could get from Parquet instead?
68
  con.execute(f"CREATE TABLE data as SELECT * FROM '{first_parquet_url}' LIMIT 1;")
69
  result = con.sql("SELECT sql FROM duckdb_tables() where table_name ='data';").df()
 
70
  ddl_create = result.iloc[0,0]
71
 
72
  text = f"""### Instruction:
 
82
  ### Response (use duckdb shorthand if possible) replace table name with {first_parquet_url} in the generated sql query:
83
  """
84
 
 
 
85
  sql_output = query_remote_model(text)
86
 
 
 
87
  try:
88
  query_result = con.sql(sql_output).df()
89
  except Exception as error:
 
97
 
98
 
99
  with gr.Blocks() as demo:
100
+ gr.Markdown("# Generate SQL queries based on a given text for your dataset")
101
+ gr.Markdown("This space showcase how to generate a SQL query from a text and get the result.")
102
+ gr.Markdown("Tech stack: duckdb and DuckDB-NSQL-7B model")
103
  dataset_name = gr.Textbox("sksayril/medicine-info", label="Dataset Name")
104
  query_input = gr.Textbox("How many rows there are?", label="Ask something about your data")
105
  btn = gr.Button("Generate SQL")