Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Miaoran000
commited on
Commit
·
d4bf693
1
Parent(s):
1557ad2
minor fix
Browse files- requirements.txt +4 -2
- src/backend/model_operations.py +24 -16
requirements.txt
CHANGED
@@ -12,11 +12,13 @@ pandas==2.0.0
|
|
12 |
python-dateutil==2.8.2
|
13 |
requests==2.28.2
|
14 |
tqdm==4.65.0
|
15 |
-
transformers
|
16 |
tokenizers>=0.15.0
|
17 |
sentence-transformers==2.2.2
|
18 |
google-generativeai
|
19 |
replicate
|
20 |
anthropic
|
21 |
openai
|
22 |
-
cohere
|
|
|
|
|
|
12 |
python-dateutil==2.8.2
|
13 |
requests==2.28.2
|
14 |
tqdm==4.65.0
|
15 |
+
transformers
|
16 |
tokenizers>=0.15.0
|
17 |
sentence-transformers==2.2.2
|
18 |
google-generativeai
|
19 |
replicate
|
20 |
anthropic
|
21 |
openai
|
22 |
+
cohere
|
23 |
+
mistralai
|
24 |
+
peft
|
src/backend/model_operations.py
CHANGED
@@ -9,19 +9,17 @@ import json
|
|
9 |
import numpy as np
|
10 |
import pandas as pd
|
11 |
import spacy
|
12 |
-
from sentence_transformers import CrossEncoder
|
13 |
import litellm
|
14 |
from tqdm import tqdm
|
15 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForTokenClassification
|
|
|
16 |
import torch
|
17 |
import cohere
|
18 |
from openai import OpenAI
|
19 |
import anthropic
|
20 |
import replicate
|
21 |
-
# pip install -U google-generativeai
|
22 |
import google.generativeai as genai
|
23 |
-
from mistralai
|
24 |
-
from mistralai.models.chat_completion import ChatMessage
|
25 |
|
26 |
|
27 |
import src.backend.util as util
|
@@ -330,15 +328,21 @@ class SummaryGenerator:
|
|
330 |
|
331 |
elif 'mistral-large' in self.model_id.lower():
|
332 |
api_key = os.environ["MISTRAL_API_KEY"]
|
333 |
-
client =
|
334 |
|
335 |
messages = [
|
336 |
-
|
337 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
338 |
]
|
339 |
|
340 |
# No streaming
|
341 |
-
chat_response = client.chat(
|
342 |
model=self.model_id,
|
343 |
messages=messages,
|
344 |
)
|
@@ -373,6 +377,7 @@ class SummaryGenerator:
|
|
373 |
self.local_pipeline = pipeline(
|
374 |
"text-generation",
|
375 |
model=self.model_id,
|
|
|
376 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
377 |
device_map="auto",
|
378 |
trust_remote_code=True
|
@@ -384,7 +389,8 @@ class SummaryGenerator:
|
|
384 |
self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id,
|
385 |
torch_dtype=torch.bfloat16,
|
386 |
attn_implementation="flash_attention_2",
|
387 |
-
device_map="auto"
|
|
|
388 |
else:
|
389 |
self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
|
390 |
# print(self.local_model.device)
|
@@ -401,7 +407,7 @@ class SummaryGenerator:
|
|
401 |
outputs = self.local_pipeline(
|
402 |
messages,
|
403 |
max_new_tokens=250,
|
404 |
-
|
405 |
do_sample=False
|
406 |
)
|
407 |
result = outputs[0]["generated_text"][-1]['content']
|
@@ -434,15 +440,16 @@ class SummaryGenerator:
|
|
434 |
input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
435 |
with torch.no_grad():
|
436 |
outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
|
437 |
-
|
438 |
-
|
|
|
439 |
result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
440 |
if 'gemma-2' in self.model_id.lower():
|
441 |
result = result.split(user_prompt + '\nmodel')[-1].strip()
|
442 |
-
|
443 |
elif 'intel' in self.model_id.lower():
|
444 |
result = result.split("### Assistant:\n")[-1]
|
445 |
-
|
|
|
446 |
else:
|
447 |
# print(prompt)
|
448 |
# print('-'*50)
|
@@ -496,7 +503,8 @@ class EvaluationModel:
|
|
496 |
Args:
|
497 |
model_path (str): Path to the CrossEncoder model.
|
498 |
"""
|
499 |
-
|
|
|
500 |
self.device = device
|
501 |
self.model.to(self.device)
|
502 |
self.scores = []
|
|
|
9 |
import numpy as np
|
10 |
import pandas as pd
|
11 |
import spacy
|
|
|
12 |
import litellm
|
13 |
from tqdm import tqdm
|
14 |
+
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForTokenClassification, AutoConfig
|
15 |
+
from peft import PeftModel
|
16 |
import torch
|
17 |
import cohere
|
18 |
from openai import OpenAI
|
19 |
import anthropic
|
20 |
import replicate
|
|
|
21 |
import google.generativeai as genai
|
22 |
+
from mistralai import Mistral
|
|
|
23 |
|
24 |
|
25 |
import src.backend.util as util
|
|
|
328 |
|
329 |
elif 'mistral-large' in self.model_id.lower():
|
330 |
api_key = os.environ["MISTRAL_API_KEY"]
|
331 |
+
client = Mistral(api_key=api_key)
|
332 |
|
333 |
messages = [
|
334 |
+
{
|
335 |
+
"role":"system",
|
336 |
+
"content":system_prompt
|
337 |
+
},
|
338 |
+
{
|
339 |
+
"role":"user",
|
340 |
+
"content":user_prompt
|
341 |
+
}
|
342 |
]
|
343 |
|
344 |
# No streaming
|
345 |
+
chat_response = client.chat.complete(
|
346 |
model=self.model_id,
|
347 |
messages=messages,
|
348 |
)
|
|
|
377 |
self.local_pipeline = pipeline(
|
378 |
"text-generation",
|
379 |
model=self.model_id,
|
380 |
+
tokenizer=AutoTokenizer.from_pretrained(self.model_id),
|
381 |
model_kwargs={"torch_dtype": torch.bfloat16},
|
382 |
device_map="auto",
|
383 |
trust_remote_code=True
|
|
|
389 |
self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id,
|
390 |
torch_dtype=torch.bfloat16,
|
391 |
attn_implementation="flash_attention_2",
|
392 |
+
device_map="auto",
|
393 |
+
use_mamba_kernels=False)
|
394 |
else:
|
395 |
self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
|
396 |
# print(self.local_model.device)
|
|
|
407 |
outputs = self.local_pipeline(
|
408 |
messages,
|
409 |
max_new_tokens=250,
|
410 |
+
# return_full_text=False,
|
411 |
do_sample=False
|
412 |
)
|
413 |
result = outputs[0]["generated_text"][-1]['content']
|
|
|
440 |
input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
|
441 |
with torch.no_grad():
|
442 |
outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
|
443 |
+
if 'glm' in self.model_id.lower():
|
444 |
+
outputs = outputs[:, input_ids['input_ids'].shape[1]:]
|
445 |
+
|
446 |
result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
|
447 |
if 'gemma-2' in self.model_id.lower():
|
448 |
result = result.split(user_prompt + '\nmodel')[-1].strip()
|
|
|
449 |
elif 'intel' in self.model_id.lower():
|
450 |
result = result.split("### Assistant:\n")[-1]
|
451 |
+
elif 'jamba' in self.model_id.lower():
|
452 |
+
result = result.split(messages[-1]['content'])[1].strip()
|
453 |
else:
|
454 |
# print(prompt)
|
455 |
# print('-'*50)
|
|
|
503 |
Args:
|
504 |
model_path (str): Path to the CrossEncoder model.
|
505 |
"""
|
506 |
+
config = AutoConfig.from_pretrained('google/flan-t5-large')
|
507 |
+
self.model = AutoModelForTokenClassification.from_pretrained(model_path, config=config)
|
508 |
self.device = device
|
509 |
self.model.to(self.device)
|
510 |
self.scores = []
|