Spaces:

Adityak204
/

SmolLM2-135M-Text-Generator

Sleeping

App Files Files Community

Adityak204 commited on 17 days ago

Commit

70a0a5b

1 Parent(s): ed6a6f5

Initial commit

Browse files

Files changed (7) hide show

app.py +125 -0
src/__init__.py +0 -0
src/__pycache__/__init__.cpython-310.pyc +0 -0
src/__pycache__/model.cpython-310.pyc +0 -0
src/__pycache__/utils.cpython-310.pyc +0 -0
src/model.py +201 -0
src/utils.py +85 -0

app.py ADDED Viewed

	@@ -0,0 +1,125 @@

+import streamlit as st
+import torch
+import torch.nn as nn
+from transformers import AutoTokenizer
+import os
+from dataclasses import dataclass
+from huggingface_hub import hf_hub_download
+from src.model import SmolLM
+def greedy_decode(model, input_ids, max_length=100, tokenizer=None):
+    current_ids = input_ids
+    with torch.no_grad():
+        for _ in range(max_length - current_ids.shape[1]):
+            outputs = model(current_ids)
+            last_token_logits = outputs[:, -1, :]
+            next_token = torch.argmax(last_token_logits, dim=-1).unsqueeze(0)
+            current_ids = torch.cat([current_ids, next_token], dim=1)
+            if next_token.item() == tokenizer.eos_token_id:
+                break
+    return current_ids
+def generate_prediction(model, prompt, max_length=100):
+    # Load tokenizer
+    tokenizer = AutoTokenizer.from_pretrained("HuggingFaceTB/SmolLM2-135M")
+    tokenizer.pad_token = tokenizer.eos_token
+    device = next(model.parameters()).device
+    input_ids = tokenizer.encode(prompt, return_tensors="pt").to(device)
+    model.eval()
+    with torch.no_grad():
+        generated_ids = greedy_decode(
+            model, input_ids, max_length=max_length, tokenizer=tokenizer
+        )
+    generated_text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
+    return generated_text
+def main():
+    # Set page configuration
+    st.set_page_config(page_title="SmolLM2-TextGen", page_icon="🤖")
+    # Title and description
+    st.title("SmolLM2-TextGen 🤖")
+    st.write("Generate text using the SmolLM2 language model")
+    # Load the model (you'll need to replace this with your actual model loading logic)
+    @st.cache_resource
+    def load_model(config):
+        model = SmolLM(config)
+        return model
+    # Try to load the model
+    try:
+        @dataclass
+        class MainConfig:
+            vocab_size: int = 49152
+            emb_dim: int = 576
+            intermediate_size: int = 1536
+            num_layers: int = 30
+            n_q_heads: int = 9
+            n_kv_heads: int = 3
+            max_seq_len: int = 1024
+            dropout: float = 0.1
+            rms_norm_eps: float = 1e-05
+            init_std: float = 0.041666666666666664
+        config = MainConfig()
+        model = load_model(config)
+        # load checkpoint
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        # checkpoint_path = "/Users/aditya/Documents/self_learning/ERA V3/week 13/artifacts/m1/smolLM-v2.pth"
+        model_repo = "Adityak204/SmolLM2-135-cosmopedia-10k"
+        model_filename = "smolLM-v2.pth"
+        checkpoint_path = hf_hub_download(repo_id=model_repo, filename=model_filename)
+        checkpoint = torch.load(checkpoint_path, map_location=device)[
+            "model_state_dict"
+        ]
+        model.load_state_dict(checkpoint)
+    except Exception as e:
+        st.error(f"Error loading model: {e}")
+        return
+    # Input prompt
+    prompt = st.text_input(
+        "Enter your prompt:", placeholder="Type a sentence to generate text..."
+    )
+    # Max length slider
+    max_length = st.slider(
+        "Maximum Generation Length", min_value=10, max_value=200, value=100, step=10
+    )
+    # Generate button
+    if st.button("Generate Text"):
+        if not prompt:
+            st.warning("Please enter a prompt.")
+            return
+        # Show loading spinner
+        with st.spinner("Generating text..."):
+            try:
+                # Generate text
+                generated_text = generate_prediction(model, prompt, max_length)
+                # Display generated text
+                st.subheader("Generated Text:")
+                st.write(generated_text)
+            except Exception as e:
+                st.error(f"An error occurred during text generation: {e}")
+if __name__ == "__main__":
+    main()

src/__init__.py ADDED Viewed

File without changes

src/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (173 Bytes). View file

src/__pycache__/model.cpython-310.pyc ADDED Viewed

Binary file (5.78 kB). View file

src/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (2.73 kB). View file

src/model.py ADDED Viewed

	@@ -0,0 +1,201 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import math
+from src.utils import LlamaRotaryEmbedding, repeat_kv
+class RMSNorm(nn.Module):
+    def __init__(self, dim, eps=1e-6):
+        super().__init__()
+        self.eps = eps
+        self.weight = nn.Parameter(torch.ones(dim))
+    def forward(self, x):
+        # Root Mean Square Layer Normalization
+        rms = torch.rsqrt(x.pow(2).mean(-1, keepdim=True) + self.eps)
+        return x * rms * self.weight
+class Attention(nn.Module):
+    """Multi-head attention module with support for GQA (Grouped Query Attention)."""
+    def __init__(self, config):
+        super(Attention, self).__init__()
+        self.emb_dim = config.emb_dim
+        self.n_q_heads = config.n_q_heads
+        self.n_kv_heads = config.n_kv_heads
+        self.head_dim = self.emb_dim // self.n_q_heads
+        self.n_rep = self.n_q_heads // self.n_kv_heads
+        # Projections for Q, K, V & O
+        self.q_proj = nn.Linear(self.emb_dim, self.emb_dim, bias=False)
+        self.k_proj = nn.Linear(
+            self.emb_dim, self.head_dim * self.n_kv_heads, bias=False
+        )
+        self.v_proj = nn.Linear(
+            self.emb_dim, self.head_dim * self.n_kv_heads, bias=False
+        )
+        self.o_proj = nn.Linear(self.emb_dim, self.emb_dim, bias=False)
+        # Initialize rotary embeddings
+        self.rotary_embedding = LlamaRotaryEmbedding(
+            dim=self.head_dim, max_seq_len=config.max_seq_len
+        )
+        # Dropout layers
+        self.attn_dropout = nn.Dropout(config.dropout)
+        self.resid_dropout = nn.Dropout(config.dropout)
+        # Causal mask
+        self.register_buffer(
+            "mask",
+            torch.tril(torch.ones(config.max_seq_len, config.max_seq_len)).view(
+                1, 1, config.max_seq_len, config.max_seq_len
+            ),
+        )
+    def forward(self, x):
+        B, T, C = x.size()  # batch_size, seq_len, emb_dim
+        # Project Q, K, V
+        q = self.q_proj(x)  # (B, T, emb_dim)
+        k = self.k_proj(x)  # (B, T, n_kv_heads * head_dim)
+        v = self.v_proj(x)  # (B, T, n_kv_heads * head_dim)
+        # Reshape Q, K, V
+        q = q.view(B, T, self.n_q_heads, self.head_dim)  # (B, T, n_q_heads, head_dim)
+        k = k.view(B, T, self.n_kv_heads, self.head_dim)  # (B, T, n_kv_heads, head_dim)
+        v = v.view(B, T, self.n_kv_heads, self.head_dim)  # (B, T, n_kv_heads, head_dim)
+        # Reshape for attention computation
+        q = q.transpose(1, 2)  # (B, n_q_heads, T, head_dim)
+        k = k.transpose(1, 2)  # (B, n_kv_heads, T, head_dim)
+        v = v.transpose(1, 2)  # (B, n_kv_heads, T, head_dim)
+        # Apply rotary embeddings
+        q, k = self.rotary_embedding(q, k)
+        # Repeat K and V for GQA
+        k = repeat_kv(k, self.n_rep)  # (B, n_q_heads, T, head_dim)
+        v = repeat_kv(v, self.n_rep)  # (B, n_q_heads, T, head_dim)
+        # Compute attention scores
+        scale = 1.0 / math.sqrt(self.head_dim)
+        att = (q @ k.transpose(-2, -1)) * scale  # (B, n_q_heads, T, T)
+        att = att.masked_fill(self.mask[:, :, :T, :T] == 0, float("-inf"))
+        att = F.softmax(att, dim=-1)
+        att = self.attn_dropout(att)
+        # Apply attention to values
+        y = att @ v  # (B, n_q_heads, T, head_dim)
+        # Reshape and project output
+        y = y.transpose(1, 2).contiguous().view(B, T, C)  # (B, T, emb_dim)
+        y = self.o_proj(y)
+        y = self.resid_dropout(y)
+        return y
+class FeedForward(nn.Module):
+    """Feed-forward module with SiLU activation."""
+    def __init__(self, config):
+        super(FeedForward, self).__init__()
+        # Gate and up-projections project from hidden_size to intermediate_size
+        self.gate_proj = nn.Linear(config.emb_dim, config.intermediate_size, bias=False)
+        self.up_proj = nn.Linear(config.emb_dim, config.intermediate_size, bias=False)
+        # Down projection brings the dimension back to hidden_size
+        self.down_proj = nn.Linear(config.intermediate_size, config.emb_dim, bias=False)
+        # SiLU activation function
+        self.act_fn = F.silu
+        # Dropout layer
+        self.dropout = nn.Dropout(config.dropout)
+    def forward(self, x):
+        # Apply gate and up projections
+        gate_output = self.act_fn(self.gate_proj(x))  # SiLU activation
+        up_output = self.up_proj(x)
+        # Element-wise multiplication of gate and up projections
+        intermediate_output = gate_output * up_output
+        # Project back to hidden size
+        output = self.down_proj(intermediate_output)
+        output = self.dropout(output)
+        return output
+class TransformerBlock(nn.Module):
+    """Transformer block with attention and feed-forward modules."""
+    def __init__(self, config):
+        super(TransformerBlock, self).__init__()
+        self.attention = Attention(config)
+        self.feed_forward = FeedForward(config)
+        self.input_layernorm = RMSNorm(config.emb_dim, config.rms_norm_eps)
+        self.attention_layernorm = RMSNorm(config.emb_dim, config.rms_norm_eps)
+    def forward(self, x):
+        x = x + self.attention(self.input_layernorm(x))
+        x = x + self.feed_forward(self.attention_layernorm(x))
+        return x
+class SmolLM(nn.Module):
+    """Small language model with transformer blocks."""
+    def __init__(self, config):
+        super(SmolLM, self).__init__()
+        self.config = config
+        self.wte = nn.Embedding(config.vocab_size, config.emb_dim)
+        self.transformer_blocks = nn.ModuleList(
+            [TransformerBlock(config) for _ in range(config.num_layers)]
+        )
+        self.lm_head = nn.Linear(config.emb_dim, config.vocab_size, bias=False)
+        self.apply(self._init_weights)
+        self.layernorm = RMSNorm(config.emb_dim, config.rms_norm_eps)
+        # weight sharing
+        self.lm_head.weight = self.wte.weight
+    def total_params(self):
+        return sum(p.numel() for p in self.parameters() if p.requires_grad)
+    def _init_weights(self, module):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
+            module.weight.data.normal_(mean=0.0, std=self.config.init_std)
+            if isinstance(module, nn.Linear) and module.bias is not None:
+                module.bias.data.zero_()
+        elif isinstance(module, nn.LayerNorm):
+            module.bias.data.zero_()
+            module.weight.data.fill_(1.0)
+    def forward(self, x):
+        x = self.wte(x)
+        for block in self.transformer_blocks:
+            x = block(x)
+        x = self.layernorm(x)
+        logits = self.lm_head(x)
+        return logits
+# @dataclass
+# class Config:
+#     vocab_size: int = 49152
+#     emb_dim: int = 576
+#     intermediate_size: int = 1536
+#     num_layers: int = 10
+#     n_q_heads: int = 9
+#     n_kv_heads: int = 3
+#     max_seq_len: int = 8192
+#     dropout: float = 0.1
+#     rms_norm_eps: float = 1e-05
+#     init_std: float = 0.041666666666666664

src/utils.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+class LlamaRotaryEmbedding(nn.Module):
+    def __init__(
+        self,
+        dim: int = 64,  # Dimension per attention head
+        max_seq_len: int = 2048,  # Maximum sequence length
+        base: int = 10000,  # Base for the angle calculations
+        device: str = None,
+    ):
+        super().__init__()
+        self.dim = dim
+        self.max_seq_len = max_seq_len
+        self.base = base
+        # Create cache for position frequencies
+        inv_freq = 1.0 / (base ** (torch.arange(0, dim, 2).float() / dim))
+        self.register_buffer("inv_freq", inv_freq)
+        # Create position sequence
+        self._seq_len_cached = 0
+        self._cos_cached = None
+        self._sin_cached = None
+    def _update_cos_sin_tables(self, x: torch.Tensor, seq_len: int):
+        # Return early if cache is valid
+        if seq_len <= self._seq_len_cached:
+            return
+        # Update cache size
+        self._seq_len_cached = seq_len
+        # Create position sequence
+        t = torch.arange(seq_len, device=x.device).type_as(self.inv_freq)
+        # Calculate position frequencies
+        freqs = torch.einsum("i,j->ij", t, self.inv_freq)
+        # Calculate embeddings
+        emb = torch.cat((freqs, freqs), dim=-1)
+        self._cos_cached = emb.cos()  # [None, None, :, :]
+        self._sin_cached = emb.sin()  # [None, None, :, :]
+    def forward(
+        self, q: torch.Tensor, k: torch.Tensor
+    ) -> tuple[torch.Tensor, torch.Tensor]:
+        batch, num_heads, seq_len, head_dim = q.shape
+        # Update cos/sin tables if needed
+        self._update_cos_sin_tables(q, seq_len)
+        # Get cos and sin for current sequence
+        cos = (
+            self._cos_cached[:seq_len, :].unsqueeze(0).unsqueeze(0)
+        )  # Shape: [1, 1, seq_len, dim]
+        sin = (
+            self._sin_cached[:seq_len, :].unsqueeze(0).unsqueeze(0)
+        )  # Shape: [1, 1, seq_len, dim]
+        def rotate_half(x):
+            """Rotates half the hidden dims of the input."""
+            x1, x2 = x[..., : x.shape[-1] // 2], x[..., x.shape[-1] // 2 :]
+            return torch.cat((-x2, x1), dim=-1)
+        # Apply rotary embeddings to q and k
+        q_embed = (q * cos) + (rotate_half(q) * sin)
+        k_embed = (k * cos) + (rotate_half(k) * sin)
+        return q_embed, k_embed
+def repeat_kv(hidden_states: torch.Tensor, n_rep: int) -> torch.Tensor:
+    """
+    This is the equivalent of torch.repeat_interleave(x, dim=1, repeats=n_rep). The hidden states go from (batch,
+    num_key_value_heads, seqlen, head_dim) to (batch, num_attention_heads, seqlen, head_dim)
+    """
+    batch, num_key_value_heads, slen, head_dim = hidden_states.shape
+    if n_rep == 1:
+        return hidden_states
+    hidden_states = hidden_states[:, :, None, :, :].expand(
+        batch, num_key_value_heads, n_rep, slen, head_dim
+    )
+    return hidden_states.reshape(batch, num_key_value_heads * n_rep, slen, head_dim)