Llama-3-DARE-v3-8B / README.md
rmihaylov's picture
Update README.md
fe7f0a2 verified
metadata
base_model:
  - meta-llama/Meta-Llama-3-8B
  - meta-llama/Meta-Llama-3-8B-Instruct
library_name: transformers
tags:
  - mergekit
  - merge
license: other
license_name: llama3
license_link: LICENSE
extra_gated_prompt: '### META LLAMA 3 COMMUNITY LICENSE AGREEMENT'

Llama-3-DARE-v3-8B

This is a merge of pre-trained language models created using mergekit.

Merge Details

Merge Method

This model was merged using the DARE TIES merge method using meta-llama/Meta-Llama-3-8B as a base.

Models Merged

The following models were included in the merge:

Configuration

The following YAML configuration was used to produce this model:

base_model:
  model:
    path: meta-llama/Meta-Llama-3-8B
dtype: bfloat16
merge_method: dare_ties
parameters:
  int8_mask: 1.0
  normalize: 0.0
slices:
- sources:
  - layer_range: [0, 32]
    model:
      model:
        path: meta-llama/Meta-Llama-3-8B-Instruct
    parameters:
      density:
      - filter: embed_token
        value: 0.12392239047187575
      - filter: model.norm
        value: 0.9321540995757155
      - filter: lm_head
        value: 0.9321540995757155
      - filter: layers.0.
        value: 0.9790541113047215
      - filter: layers.1.
        value: 0.5837293662960215
      - filter: layers.2.
        value: 0.9412235995535374
      - filter: layers.3.
        value: 0.31233149627589435
      - filter: layers.4.
        value: 0.8429344053665633
      - filter: layers.5.
        value: 0.6736586892578483
      - filter: layers.6.
        value: 0.24511379602231775
      - filter: layers.7.
        value: 0.9579106307398759
      - filter: layers.8.
        value: 0.763438755789315
      - filter: layers.9.
        value: 0.9682444116383796
      - filter: layers.10.
        value: 0.08453321074167956
      - filter: layers.11.
        value: 0.7964240843030714
      - filter: layers.12.
        value: 0.33878902628372387
      - filter: layers.13.
        value: 0.8458690962458848
      - filter: layers.14.
        value: 0.1052429440590172
      - filter: layers.15.
        value: 0.7623565162481113
      - filter: layers.16.
        value: 0.9707532532287503
      - filter: layers.17.
        value: 0.12523916859700104
      - filter: layers.18.
        value: 0.8415224301240337
      - filter: layers.19.
        value: 0.12872802862625543
      - filter: layers.20.
        value: 0.5529482316673654
      - filter: layers.21.
        value: 0.09282157218446654
      - filter: layers.22.
        value: 0.8370536041906024
      - filter: layers.23.
        value: 0.9175102292532279
      - filter: layers.24.
        value: 0.8983418171724273
      - filter: layers.25.
        value: 0.8136717935920286
      - filter: layers.26.
        value: 0.05054222298359671
      - filter: layers.27.
        value: 0.869544796603939
      - filter: layers.28.
        value: 0.04716191274361657
      - filter: layers.29.
        value: 0.13032011470396976
      - filter: layers.30.
        value: 0.19116844757457122
      - filter: layers.31.
        value: 0.1455500526734667
      weight:
      - filter: embed_token
        value: 0.12232308541622408
      - filter: model.norm.
        value: 0.7266901175725669
      - filter: lm_head
        value: 0.7266901175725669
      - filter: layers.0.
        value: 0.8207345096435786
      - filter: layers.1.
        value: 0.9504884225844141
      - filter: layers.2.
        value: 0.7328920145925348
      - filter: layers.3.
        value: 0.6736895869883676
      - filter: layers.4.
        value: 0.7970121175937948
      - filter: layers.5.
        value: 0.9789312914172503
      - filter: layers.6.
        value: 0.962551880054289
      - filter: layers.7.
        value: 0.9561739657469092
      - filter: layers.8.
        value: 0.8536201095014567
      - filter: layers.9.
        value: 0.9376890733815005
      - filter: layers.10.
        value: 0.9551398977410172
      - filter: layers.11.
        value: 0.9967262117722387
      - filter: layers.12.
        value: 0.7701592243202565
      - filter: layers.13.
        value: 0.6842573291853765
      - filter: layers.14.
        value: 0.798376050387875
      - filter: layers.15.
        value: 0.801001533828631
      - filter: layers.16.
        value: 0.14199137490635572
      - filter: layers.17.
        value: 0.7587521819162459
      - filter: layers.18.
        value: 0.9769968221517621
      - filter: layers.19.
        value: 0.5936888514834866
      - filter: layers.20.
        value: 0.979481555973458
      - filter: layers.21.
        value: 0.1362420472755318
      - filter: layers.22.
        value: 0.1451804836602873
      - filter: layers.23.
        value: 0.9319964347718136
      - filter: layers.24.
        value: 0.8814265997262563
      - filter: layers.25.
        value: 0.870638468633288
      - filter: layers.26.
        value: 0.06311119172889679
      - filter: layers.27.
        value: 0.902932718098389
      - filter: layers.28.
        value: 0.9174145551871369
      - filter: layers.29.
        value: 0.9048467992426628
      - filter: layers.30.
        value: 0.04929564345988049
      - filter: layers.31.
        value: 0.922707420329624
  - layer_range: [0, 32]
    model:
      model:
        path: meta-llama/Meta-Llama-3-8B
    parameters:
      density:
      - filter: embed_token
        value: 0.1479082895745973
      - filter: model.norm
        value: 0.18334257522610492
      - filter: lm_head
        value: 0.18334257522610492
      - filter: layers.0.
        value: 0.17476905394590242
      - filter: layers.1.
        value: 0.11161623400742576
      - filter: layers.2.
        value: 0.16109344344908105
      - filter: layers.3.
        value: 0.2735834275693588
      - filter: layers.4.
        value: 0.8258891898417566
      - filter: layers.5.
        value: 0.21085556872053604
      - filter: layers.6.
        value: 0.20766543320815006
      - filter: layers.7.
        value: 0.8947694253855037
      - filter: layers.8.
        value: 0.734275334571558
      - filter: layers.9.
        value: 0.1632311874735626
      - filter: layers.10.
        value: 0.940700711783812
      - filter: layers.11.
        value: 0.07148774488326176
      - filter: layers.12.
        value: 0.07541557340487534
      - filter: layers.13.
        value: 0.13833770311269455
      - filter: layers.14.
        value: 0.9612379711004643
      - filter: layers.15.
        value: 0.8090075125599039
      - filter: layers.16.
        value: 0.7255233959581611
      - filter: layers.17.
        value: 0.2634507144990253
      - filter: layers.18.
        value: 0.07135903934561608
      - filter: layers.19.
        value: 0.1180822729914722
      - filter: layers.20.
        value: 0.07751975543731829
      - filter: layers.21.
        value: 0.9990557487897024
      - filter: layers.22.
        value: 0.17045615586066107
      - filter: layers.23.
        value: 0.19588339382290734
      - filter: layers.24.
        value: 0.152313213824124
      - filter: layers.25.
        value: 0.8120646024357844
      - filter: layers.26.
        value: 0.6661112930033101
      - filter: layers.27.
        value: 0.7782416079783356
      - filter: layers.28.
        value: 0.24425477536875875
      - filter: layers.29.
        value: 0.05962906198631645
      - filter: layers.30.
        value: 0.023125010859717736
      - filter: layers.31.
        value: 0.9109899850283665
      weight:
      - filter: embed_token
        value: 0.12126630242759481
      - filter: model.norm.
        value: 0.07734624352533248
      - filter: lm_head
        value: 0.07734624352533248
      - filter: layers.0.
        value: 0.16823028525905875
      - filter: layers.1.
        value: 0.9417449451303712
      - filter: layers.2.
        value: 0.7717519522673566
      - filter: layers.3.
        value: 0.7601040526349441
      - filter: layers.4.
        value: 0.0019090753772779204
      - filter: layers.5.
        value: 0.16032547702469566
      - filter: layers.6.
        value: 0.12224994873335546
      - filter: layers.7.
        value: 0.27695385066177564
      - filter: layers.8.
        value: 0.018799614691291815
      - filter: layers.9.
        value: 0.9759168818301882
      - filter: layers.10.
        value: 0.006525097827571269
      - filter: layers.11.
        value: 0.756537797885991
      - filter: layers.12.
        value: 0.8051453838823787
      - filter: layers.13.
        value: 0.8879631547059472
      - filter: layers.14.
        value: 0.713799746085261
      - filter: layers.15.
        value: 0.03862352880564701
      - filter: layers.16.
        value: 0.1837448681603537
      - filter: layers.17.
        value: 0.30267576939315943
      - filter: layers.18.
        value: 0.17332405807285195
      - filter: layers.19.
        value: 0.11686420946772721
      - filter: layers.20.
        value: 0.2826021601318976
      - filter: layers.21.
        value: 0.14782621450845335
      - filter: layers.22.
        value: 0.8764989337980243
      - filter: layers.23.
        value: 0.5836574402524565
      - filter: layers.24.
        value: 0.8579541606567384
      - filter: layers.25.
        value: 0.2310998812434597
      - filter: layers.26.
        value: 0.13443251834995432
      - filter: layers.27.
        value: 0.9754382468614297
      - filter: layers.28.
        value: 0.9406099007353652
      - filter: layers.29.
        value: 0.10203532427654999
      - filter: layers.30.
        value: 0.747420490316978
      - filter: layers.31.
        value: 0.06383831695667043