Update inference-cache-config/llama.json
Browse files
inference-cache-config/llama.json
CHANGED
@@ -66,5 +66,21 @@
|
|
66 |
"num_cores": 2,
|
67 |
"auto_cast_type": "bf16"
|
68 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
]
|
70 |
}
|
|
|
66 |
"num_cores": 2,
|
67 |
"auto_cast_type": "bf16"
|
68 |
}
|
69 |
+
],
|
70 |
+
"meta-llama/Llama-3.2-1B": [
|
71 |
+
{
|
72 |
+
"batch_size": 1,
|
73 |
+
"sequence_length": 4096,
|
74 |
+
"num_cores": 2,
|
75 |
+
"auto_cast_type": "bf16"
|
76 |
+
}
|
77 |
+
],
|
78 |
+
"meta-llama/Llama-3.2-3B": [
|
79 |
+
{
|
80 |
+
"batch_size": 1,
|
81 |
+
"sequence_length": 4096,
|
82 |
+
"num_cores": 2,
|
83 |
+
"auto_cast_type": "bf16"
|
84 |
+
}
|
85 |
]
|
86 |
}
|