File size: 721 Bytes
86b7493
17b7b74
86b7493
042f436
86b7493
2737fb5
86b7493
 
042f436
2737fb5
86b7493
2737fb5
86b7493
 
 
042f436
2737fb5
86b7493
042f436
86b7493
042f436
17b7b74
 
86b7493
 
042f436
17b7b74
 
042f436
86b7493
 
042f436
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
import streamlit as st
from langchain.text_splitter import RecursiveCharacterTextSplitter

st.title("Understand Chunking")

chunk_size = st.number_input(
        min_value=1,
        label="Chunk Size",
        value=50
)

chunk_overlap = st.number_input(
        min_value=1,
        max_value=chunk_size - 1,
        label="Chunk Overlap",
        value=10
)

docs = st.text_area("Put your text:")

if st.button("Split"):

    splitter = RecursiveCharacterTextSplitter(
            chunk_size=chunk_size,
            chunk_overlap=chunk_overlap,
            length_function=len
    )

    splits = splitter.split_text(docs)

    for idx, split in enumerate(splits, start=1):
        st.text_area(f"Chunk {idx}", split)