Miaoran000 commited on
Commit
d4bf693
·
1 Parent(s): 1557ad2
Files changed (2) hide show
  1. requirements.txt +4 -2
  2. src/backend/model_operations.py +24 -16
requirements.txt CHANGED
@@ -12,11 +12,13 @@ pandas==2.0.0
12
  python-dateutil==2.8.2
13
  requests==2.28.2
14
  tqdm==4.65.0
15
- transformers==4.35.2
16
  tokenizers>=0.15.0
17
  sentence-transformers==2.2.2
18
  google-generativeai
19
  replicate
20
  anthropic
21
  openai
22
- cohere
 
 
 
12
  python-dateutil==2.8.2
13
  requests==2.28.2
14
  tqdm==4.65.0
15
+ transformers
16
  tokenizers>=0.15.0
17
  sentence-transformers==2.2.2
18
  google-generativeai
19
  replicate
20
  anthropic
21
  openai
22
+ cohere
23
+ mistralai
24
+ peft
src/backend/model_operations.py CHANGED
@@ -9,19 +9,17 @@ import json
9
  import numpy as np
10
  import pandas as pd
11
  import spacy
12
- from sentence_transformers import CrossEncoder
13
  import litellm
14
  from tqdm import tqdm
15
- from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForTokenClassification
 
16
  import torch
17
  import cohere
18
  from openai import OpenAI
19
  import anthropic
20
  import replicate
21
- # pip install -U google-generativeai
22
  import google.generativeai as genai
23
- from mistralai.client import MistralClient
24
- from mistralai.models.chat_completion import ChatMessage
25
 
26
 
27
  import src.backend.util as util
@@ -330,15 +328,21 @@ class SummaryGenerator:
330
 
331
  elif 'mistral-large' in self.model_id.lower():
332
  api_key = os.environ["MISTRAL_API_KEY"]
333
- client = MistralClient(api_key=api_key)
334
 
335
  messages = [
336
- ChatMessage(role="system", content=system_prompt),
337
- ChatMessage(role="user", content=user_prompt)
 
 
 
 
 
 
338
  ]
339
 
340
  # No streaming
341
- chat_response = client.chat(
342
  model=self.model_id,
343
  messages=messages,
344
  )
@@ -373,6 +377,7 @@ class SummaryGenerator:
373
  self.local_pipeline = pipeline(
374
  "text-generation",
375
  model=self.model_id,
 
376
  model_kwargs={"torch_dtype": torch.bfloat16},
377
  device_map="auto",
378
  trust_remote_code=True
@@ -384,7 +389,8 @@ class SummaryGenerator:
384
  self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id,
385
  torch_dtype=torch.bfloat16,
386
  attn_implementation="flash_attention_2",
387
- device_map="auto")
 
388
  else:
389
  self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
390
  # print(self.local_model.device)
@@ -401,7 +407,7 @@ class SummaryGenerator:
401
  outputs = self.local_pipeline(
402
  messages,
403
  max_new_tokens=250,
404
- temperature=0.0,
405
  do_sample=False
406
  )
407
  result = outputs[0]["generated_text"][-1]['content']
@@ -434,15 +440,16 @@ class SummaryGenerator:
434
  input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
435
  with torch.no_grad():
436
  outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
437
- if 'glm' in self.model_id.lower():
438
- outputs = outputs[:, input_ids['input_ids'].shape[1]:]
 
439
  result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
440
  if 'gemma-2' in self.model_id.lower():
441
  result = result.split(user_prompt + '\nmodel')[-1].strip()
442
-
443
  elif 'intel' in self.model_id.lower():
444
  result = result.split("### Assistant:\n")[-1]
445
-
 
446
  else:
447
  # print(prompt)
448
  # print('-'*50)
@@ -496,7 +503,8 @@ class EvaluationModel:
496
  Args:
497
  model_path (str): Path to the CrossEncoder model.
498
  """
499
- self.model = AutoModelForTokenClassification.from_pretrained(model_path)
 
500
  self.device = device
501
  self.model.to(self.device)
502
  self.scores = []
 
9
  import numpy as np
10
  import pandas as pd
11
  import spacy
 
12
  import litellm
13
  from tqdm import tqdm
14
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline, AutoModelForTokenClassification, AutoConfig
15
+ from peft import PeftModel
16
  import torch
17
  import cohere
18
  from openai import OpenAI
19
  import anthropic
20
  import replicate
 
21
  import google.generativeai as genai
22
+ from mistralai import Mistral
 
23
 
24
 
25
  import src.backend.util as util
 
328
 
329
  elif 'mistral-large' in self.model_id.lower():
330
  api_key = os.environ["MISTRAL_API_KEY"]
331
+ client = Mistral(api_key=api_key)
332
 
333
  messages = [
334
+ {
335
+ "role":"system",
336
+ "content":system_prompt
337
+ },
338
+ {
339
+ "role":"user",
340
+ "content":user_prompt
341
+ }
342
  ]
343
 
344
  # No streaming
345
+ chat_response = client.chat.complete(
346
  model=self.model_id,
347
  messages=messages,
348
  )
 
377
  self.local_pipeline = pipeline(
378
  "text-generation",
379
  model=self.model_id,
380
+ tokenizer=AutoTokenizer.from_pretrained(self.model_id),
381
  model_kwargs={"torch_dtype": torch.bfloat16},
382
  device_map="auto",
383
  trust_remote_code=True
 
389
  self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id,
390
  torch_dtype=torch.bfloat16,
391
  attn_implementation="flash_attention_2",
392
+ device_map="auto",
393
+ use_mamba_kernels=False)
394
  else:
395
  self.local_model = AutoModelForCausalLM.from_pretrained(self.model_id, trust_remote_code=True, device_map="auto", torch_dtype="auto")
396
  # print(self.local_model.device)
 
407
  outputs = self.local_pipeline(
408
  messages,
409
  max_new_tokens=250,
410
+ # return_full_text=False,
411
  do_sample=False
412
  )
413
  result = outputs[0]["generated_text"][-1]['content']
 
440
  input_ids = self.tokenizer(prompt, return_tensors="pt").to(self.device)
441
  with torch.no_grad():
442
  outputs = self.local_model.generate(**input_ids, max_new_tokens=250, do_sample=True, temperature=0.01, pad_token_id=self.tokenizer.eos_token_id)
443
+ if 'glm' in self.model_id.lower():
444
+ outputs = outputs[:, input_ids['input_ids'].shape[1]:]
445
+
446
  result = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
447
  if 'gemma-2' in self.model_id.lower():
448
  result = result.split(user_prompt + '\nmodel')[-1].strip()
 
449
  elif 'intel' in self.model_id.lower():
450
  result = result.split("### Assistant:\n")[-1]
451
+ elif 'jamba' in self.model_id.lower():
452
+ result = result.split(messages[-1]['content'])[1].strip()
453
  else:
454
  # print(prompt)
455
  # print('-'*50)
 
503
  Args:
504
  model_path (str): Path to the CrossEncoder model.
505
  """
506
+ config = AutoConfig.from_pretrained('google/flan-t5-large')
507
+ self.model = AutoModelForTokenClassification.from_pretrained(model_path, config=config)
508
  self.device = device
509
  self.model.to(self.device)
510
  self.scores = []