Update README.md
Browse files
README.md
CHANGED
@@ -3,46 +3,16 @@ license: apache-2.0
|
|
3 |
tags:
|
4 |
- moe
|
5 |
- merge
|
6 |
-
- mergekit
|
7 |
-
- lazymergekit
|
8 |
-
- cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser
|
9 |
-
- berkeley-nest/Starling-LM-7B-alpha
|
10 |
---
|
11 |
|
12 |
# megatron_v1
|
13 |
|
14 |
-
megatron_v1 is a Mixure of Experts (MoE) made
|
15 |
-
|
16 |
-
* [berkeley-nest/Starling-LM-7B-alpha](https://huggingface.co/berkeley-nest/Starling-LM-7B-alpha)
|
17 |
-
|
18 |
-
## 🧩 Configuration
|
19 |
-
|
20 |
-
```yaml
|
21 |
-
base_model: openchat/openchat-3.5-0106
|
22 |
-
gate_mode: hidden
|
23 |
-
dtype: bfloat16
|
24 |
-
experts:
|
25 |
-
- source_model: cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser
|
26 |
-
positive_prompts:
|
27 |
-
- "Mathematics"
|
28 |
-
- "Physics"
|
29 |
-
negative_prompts:
|
30 |
-
- "History"
|
31 |
-
- "Philosophy"
|
32 |
-
- source_model: berkeley-nest/Starling-LM-7B-alpha
|
33 |
-
positive_prompts:
|
34 |
-
- "retrieval"
|
35 |
-
- "life science"
|
36 |
-
negative_prompts:
|
37 |
-
- "Education"
|
38 |
-
- "Law"
|
39 |
-
```
|
40 |
|
41 |
## 💻 Usage
|
42 |
|
43 |
```python
|
44 |
-
!pip install -qU transformers bitsandbytes accelerate
|
45 |
-
|
46 |
from transformers import AutoTokenizer
|
47 |
import transformers
|
48 |
import torch
|
|
|
3 |
tags:
|
4 |
- moe
|
5 |
- merge
|
|
|
|
|
|
|
|
|
6 |
---
|
7 |
|
8 |
# megatron_v1
|
9 |
|
10 |
+
megatron_v1 is a Mixure of Experts (MoE) made of mistral models.
|
11 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
## 💻 Usage
|
14 |
|
15 |
```python
|
|
|
|
|
16 |
from transformers import AutoTokenizer
|
17 |
import transformers
|
18 |
import torch
|