Spaces:

Synthia
/

ChatGal

Runtime error

App Files Files Community

wanicca commited on May 12, 2023

Commit

629f62e

1 Parent(s): 0f9dd39

加入提前温度，修复top_p=1时的一些bug

Browse files

Files changed (2) hide show

app.py +11 -8
utils.py +13 -8

app.py CHANGED Viewed

@@ -45,6 +45,7 @@ pipeline = PIPELINE(model, "20B_tokenizer.json")
 def infer(
         ctx,
         token_count=10,
         temperature=0.7,
         top_p=1.0,
         top_k=50,
@@ -55,6 +56,7 @@ def infer(
     args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p), top_k=int(top_k),typical_p=float(typical_p),
                      alpha_frequency = countPenalty,
                      alpha_presence = presencePenalty,
                      token_ban = [0], # ban the generation of some tokens
                      token_stop = []) # stop generation whenever you see any token here
@@ -81,7 +83,7 @@ def infer(
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
-        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p, top_k=args.top_k, typical_p=args.typical_p)
         if token in args.token_stop:
             break
         all_tokens += [token]
@@ -106,7 +108,7 @@ examples = [
 女招待: 是吗。那真是太好了
-{我因为撰稿的需要，而造访了这间位于信州山间的温泉宿驿。}""", 200, 0.7, 1.0, 0, 1.0, 0.1, 0.1],
     ["""{我叫嘉祥，家里经营着一家点心店。
 为了追求独当一面的目标，我离开了老家，开了一家名为"La Soleil"的新糕点店。
 原本想独自一人打拼，却没想到，在搬家的箱子中发现了意想不到的人。
@@ -118,7 +120,7 @@ examples = [
 嘉祥: 昨天才在家里见过面不是吗。
-巧克力: 这个……话是这么说没错啦……啊哈哈……""", 200, 0.7, 1.0, 0, 1.0, 0.1, 0.1],
     ["""莲华: 你的目的，就是这个万华镜吧？
 {莲华拿出了万华镜。}
@@ -134,7 +136,7 @@ examples = [
 深见: 请让我好好看看……
-{我刚想把手伸过去，莲华就一下子把它收了回去。}""", 200, 0.7, 1.0, 0, 1.0, 0.1, 0.1],
     ["""{我叫嘉祥，有两只可爱的猫娘，名字分别是巧克力和香草。}
 嘉祥: 偶尔来一次也不错。
@@ -153,14 +155,14 @@ examples = [
 {我摸摸各自占据住我左右两腿的两颗猫头。}
-嘉祥: 开心归开心，拜托你们俩别一直乱动啊，很危险的。""", 200, 0.7, 1.0, 0, 1.0, 0.1, 0.1],
     ["""{我叫嘉祥，在日本开了一家名为La Soleil的糕点店，同时也是猫娘巧克力的主人。
 巧克力是非常聪明的猫娘，她去国外留学了一段时间，向Alice教授学习，拿到了计算机博士学位。
 她会各种程序语言，对世界各地的风土人情都十分了解，也掌握了很多数学、物理知识。}
 嘉祥: 很棒啊，巧克力！你真是懂不少东西呢！
-巧克力: 因为巧克力是主人的最佳拍挡兼猫娘情人呀♪为了主人，巧克力会解决各种问题！""", 200, 0.7, 1.0, 0, 1.0, 0.1, 0.1],
 ]
 iface = gr.Interface(
@@ -176,10 +178,11 @@ iface = gr.Interface(
 巧克力: 因为巧克力是主人的最佳拍挡兼猫娘情人呀♪为了主人，巧克力会解决各种问题！"""),  # prompt
         gr.Slider(10, 2000, step=10, value=200, label="token_count 每次生成的长度"),  # token_count
-        gr.Slider(0.2, 2.0, step=0.1, value=0.7, label="temperature 默认0.7，高则变化丰富，低则保守求稳"),  # temperature
         gr.Slider(0.0, 1.0, step=0.05, value=1.0, label="top_p 默认1.0，高则标新立异，低则循规蹈矩"),  # top_p
         gr.Slider(0, 500, step=1, value=0, label="top_k 默认0（不过滤），0以上时高则标新立异，低则循规蹈矩"),  # top_p
-        gr.Slider(0.05, 1.0, step=0.05, value=1.0, label="typical_p 默认1.0，高则保留模型天性，低则试图贴近人类典型习惯"),  # top_p
         gr.Slider(0.0, 1.0, step=0.1, value=0.1, label="presencePenalty 默认0.0，避免写过的类似字"),  # presencePenalty
         gr.Slider(0.0, 1.0, step=0.1, value=0.1, label="countPenalty 默认0.0，额外避免写过多次的类似字"),  # countPenalty
     ],

 def infer(
         ctx,
         token_count=10,
+        temperature_a=1.0,
         temperature=0.7,
         top_p=1.0,
         top_k=50,
     args = PIPELINE_ARGS(temperature = max(0.2, float(temperature)), top_p = float(top_p), top_k=int(top_k),typical_p=float(typical_p),
                      alpha_frequency = countPenalty,
                      alpha_presence = presencePenalty,
+                     temperature_a=max(0.2, float(temperature_a)),
                      token_ban = [0], # ban the generation of some tokens
                      token_stop = []) # stop generation whenever you see any token here
         for n in occurrence:
             out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
+        token = pipeline.sample_logits(out, temperature=args.temperature, top_p=args.top_p, top_k=args.top_k, typical_p=args.typical_p, temperature_a=args.temperature_a)
         if token in args.token_stop:
             break
         all_tokens += [token]
 女招待: 是吗。那真是太好了
+{我因为撰稿的需要，而造访了这间位于信州山间的温泉宿驿。}""", 200, 0.6, 1.2, 1.0, 0, 0.4, 0.1, 0.1],
     ["""{我叫嘉祥，家里经营着一家点心店。
 为了追求独当一面的目标，我离开了老家，开了一家名为"La Soleil"的新糕点店。
 原本想独自一人打拼，却没想到，在搬家的箱子中发现了意想不到的人。
 嘉祥: 昨天才在家里见过面不是吗。
+巧克力: 这个……话是这么说没错啦……啊哈哈……""", 200, 0.6, 1.2, 1.0, 0, 0.4, 0.1, 0.1],
     ["""莲华: 你的目的，就是这个万华镜吧？
 {莲华拿出了万华镜。}
 深见: 请让我好好看看……
+{我刚想把手伸过去，莲华就一下子把它收了回去。}""", 200, 0.6, 1.2, 1.0, 0, 0.4, 0.1, 0.1],
     ["""{我叫嘉祥，有两只可爱的猫娘，名字分别是巧克力和香草。}
 嘉祥: 偶尔来一次也不错。
 {我摸摸各自占据住我左右两腿的两颗猫头。}
+嘉祥: 开心归开心，拜托你们俩别一直乱动啊，很危险的。""", 200, 0.6, 1.2, 1.0, 0, 0.4, 0.1, 0.1],
     ["""{我叫嘉祥，在日本开了一家名为La Soleil的糕点店，同时也是猫娘巧克力的主人。
 巧克力是非常聪明的猫娘，她去国外留学了一段时间，向Alice教授学习，拿到了计算机博士学位。
 她会各种程序语言，对世界各地的风土人情都十分了解，也掌握了很多数学、物理知识。}
 嘉祥: 很棒啊，巧克力！你真是懂不少东西呢！
+巧克力: 因为巧克力是主人的最佳拍挡兼猫娘情人呀♪为了主人，巧克力会解决各种问题！""", 200, 0.6, 1.2, 1.0, 0, 0.4, 0.1, 0.1],
 ]
 iface = gr.Interface(
 巧克力: 因为巧克力是主人的最佳拍挡兼猫娘情人呀♪为了主人，巧克力会解决各种问题！"""),  # prompt
         gr.Slider(10, 2000, step=10, value=200, label="token_count 每次生成的长度"),  # token_count
+        gr.Slider(0.2, 2.0, step=0.1, value=0.6, label="temperature_a 过滤前温度，高则变化丰富，低则保守求稳"),  # temperature_a
+        gr.Slider(0.2, 2.0, step=0.1, value=1.2, label="temperature 过滤后温度，高则变化丰富，低则保守求稳"),  # temperature
         gr.Slider(0.0, 1.0, step=0.05, value=1.0, label="top_p 默认1.0，高则标新立异，低则循规蹈矩"),  # top_p
         gr.Slider(0, 500, step=1, value=0, label="top_k 默认0（不过滤），0以上时高则标新立异，低则循规蹈矩"),  # top_p
+        gr.Slider(0.05, 1.0, step=0.05, value=0.4, label="typical_p 默认0.4，高则保留模型天性，低则试图贴近人类典型习惯"),  # top_p
         gr.Slider(0.0, 1.0, step=0.1, value=0.1, label="presencePenalty 默认0.0，避免写过的类似字"),  # presencePenalty
         gr.Slider(0.0, 1.0, step=0.1, value=0.1, label="countPenalty 默认0.0，额外避免写过多次的类似字"),  # countPenalty
     ],

utils.py CHANGED Viewed

@@ -4,13 +4,14 @@ import torch
 from torch.nn import functional as F
 class PIPELINE_ARGS():
-    def __init__(self, temperature=1.0, top_p=0.85, top_k=0, typical_p=1, alpha_frequency=0.2, alpha_presence=0.2, token_ban=[], token_stop=[], chunk_len=256):
         self.temperature = temperature
         self.top_p = top_p
         self.top_k = top_k
         self.typical_p = typical_p
         self.alpha_frequency = alpha_frequency # Frequency Penalty (as in GPT-3)
         self.alpha_presence = alpha_presence # Presence Penalty (as in GPT-3)
         self.token_ban = token_ban # ban the generation of some tokens
         self.token_stop = token_stop # stop generation whenever you see any token here
         self.chunk_len = chunk_len # split input into chunks to save VRAM (shorter -> slower)
@@ -44,7 +45,9 @@ class PIPELINE():
     def decode(self, x):
         return self.tokenizer.decode(x)
-    def sample_logits(self, logits, temperature=1.0, top_p=0.85, top_k=0,typical_p=1):
         probs = F.softmax(logits.float(), dim=-1)
         top_k = int(top_k)
         if typical_p<1:
@@ -54,14 +57,15 @@ class PIPELINE():
             sorted_typical_scores = typical_scores[typical_sorted_ids]
             typical_sorted_probs = probs[typical_sorted_ids]
             cum_typical_sorted_probs = torch.cumsum(typical_sorted_probs, dim=-1).cpu().numpy()
-            typical_cutoff = float(sorted_typical_scores[np.argmax(cum_typical_sorted_probs > typical_p)])
         if probs.device == torch.device('cpu'):
             probs = probs.numpy()
             sorted_ids = np.argsort(probs)
             sorted_probs = probs[sorted_ids][::-1]
             cumulative_probs = np.cumsum(sorted_probs)
-            cutoff = float(sorted_probs[np.argmax(cumulative_probs > top_p)])
-            probs[probs < cutoff] = 0
             if top_k < len(probs) and top_k > 0:
                 probs[sorted_ids[:-top_k]] = 0
             if typical_p<1:
@@ -76,8 +80,9 @@ class PIPELINE():
             sorted_probs = probs[sorted_ids]
             sorted_probs = torch.flip(sorted_probs, dims=(0,))
             cumulative_probs = torch.cumsum(sorted_probs, dim=-1).cpu().numpy()
-            cutoff = float(sorted_probs[np.argmax(cumulative_probs > top_p)])
-            probs[probs < cutoff] = 0
             if top_k < len(probs) and top_k > 0:
                 probs[sorted_ids[:-top_k]] = 0
             if typical_p<1:
@@ -106,7 +111,7 @@ class PIPELINE():
                 out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
             # sampler
-            token = self.sample_logits(out, temperature=args.temperature, top_p=args.top_p, top_k=args.top_k, typical_p=args.typical_p)
             if token in args.token_stop:
                 break
             all_tokens += [token]

 from torch.nn import functional as F
 class PIPELINE_ARGS():
+    def __init__(self, temperature=1.0, top_p=0.85, top_k=0, typical_p=1, alpha_frequency=0.2, alpha_presence=0.2, temperature_a=1.0,token_ban=[], token_stop=[], chunk_len=256):
         self.temperature = temperature
         self.top_p = top_p
         self.top_k = top_k
         self.typical_p = typical_p
         self.alpha_frequency = alpha_frequency # Frequency Penalty (as in GPT-3)
         self.alpha_presence = alpha_presence # Presence Penalty (as in GPT-3)
+        self.temperature_a = temperature_a
         self.token_ban = token_ban # ban the generation of some tokens
         self.token_stop = token_stop # stop generation whenever you see any token here
         self.chunk_len = chunk_len # split input into chunks to save VRAM (shorter -> slower)
     def decode(self, x):
         return self.tokenizer.decode(x)
+    def sample_logits(self, logits, temperature=1.0, top_p=0.85, top_k=0,typical_p=1,temperature_a=1.0):
+        if temperature_a != 1.0:
+            logits = logits / temperature_a
         probs = F.softmax(logits.float(), dim=-1)
         top_k = int(top_k)
         if typical_p<1:
             sorted_typical_scores = typical_scores[typical_sorted_ids]
             typical_sorted_probs = probs[typical_sorted_ids]
             cum_typical_sorted_probs = torch.cumsum(typical_sorted_probs, dim=-1).cpu().numpy()
+            typical_cutoff = float(sorted_typical_scores[np.argmax(cum_typical_sorted_probs >= typical_p)])
         if probs.device == torch.device('cpu'):
             probs = probs.numpy()
             sorted_ids = np.argsort(probs)
             sorted_probs = probs[sorted_ids][::-1]
             cumulative_probs = np.cumsum(sorted_probs)
+            cutoff = float(sorted_probs[np.argmax(cumulative_probs >= top_p)])
+            if top_p < 1:
+                probs[probs < cutoff] = 0
             if top_k < len(probs) and top_k > 0:
                 probs[sorted_ids[:-top_k]] = 0
             if typical_p<1:
             sorted_probs = probs[sorted_ids]
             sorted_probs = torch.flip(sorted_probs, dims=(0,))
             cumulative_probs = torch.cumsum(sorted_probs, dim=-1).cpu().numpy()
+            cutoff = float(sorted_probs[np.argmax(cumulative_probs >= top_p)])
+            if top_p < 1:
+                probs[probs < cutoff] = 0
             if top_k < len(probs) and top_k > 0:
                 probs[sorted_ids[:-top_k]] = 0
             if typical_p<1:
                 out[n] -= (args.alpha_presence + occurrence[n] * args.alpha_frequency)
             # sampler
+            token = self.sample_logits(out, temperature=args.temperature, top_p=args.top_p, top_k=args.top_k, typical_p=args.typical_p,temperature_a=args.temperature_a)
             if token in args.token_stop:
                 break
             all_tokens += [token]