token_test / app.py
Sebbe33's picture
Update app.py
042f436 verified
raw
history blame contribute delete
721 Bytes
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter
st.title("Understand Chunking")
chunk_size = st.number_input(
min_value=1,
label="Chunk Size",
value=50
)
chunk_overlap = st.number_input(
min_value=1,
max_value=chunk_size - 1,
label="Chunk Overlap",
value=10
)
docs = st.text_area("Put your text:")
if st.button("Split"):
splitter = RecursiveCharacterTextSplitter(
chunk_size=chunk_size,
chunk_overlap=chunk_overlap,
length_function=len
)
splits = splitter.split_text(docs)
for idx, split in enumerate(splits, start=1):
st.text_area(f"Chunk {idx}", split)