{ "cells": [ { "cell_type": "code", "execution_count": null, "id": "585da432", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of parquet files 30\n", "Reading geclm-datasets/samples/c4/20230404_102105_00007_t8w9z_3085d601-45f1-443a-b50d-8eb4812dd227\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_code/20230404_102116_00007_ajvns_4e5b2899-8640-4a4c-b0cd-758662178176\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_github_issues/20230404_102127_00022_yv77i_982f928f-1431-4ea7-986d-c5c5cb0f4a3f\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_jupyter_markdowned_clean_dedup/20230404_102137_00026_vwcg7_3167c932-87a1-4fec-ad01-215831d0bf6e\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/books3/20230404_102143_00027_t4kwf_198fc997-b871-4e4a-b88e-3776f1cf92fe\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/gutenberg_raw/20230404_102215_00007_x3ntt_30873bfe-c94c-439a-96e2-71165570dc99\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/reddit_threaded/20230404_102241_00049_xj4uk_d7612f5a-5107-46e1-b710-47e7db95a7e6\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/enwiki_data/20230404_102246_00007_ye63c_57166ca6-f0d2-40ef-8ae7-ed4bc7ecd28d\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/s2orc_dedup/20230404_102252_00080_6ce5q_330e23f7-1270-4a52-b277-af823baf1de6\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/stackexchange2/20230404_102308_00031_qvnh6_cec28e17-f163-4a04-9fbe-dc617d9ea03e\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/commoncrawl/20230404_124237_00026_sin5w_c2e65b68-2449-47fa-be8b-a6e6e83611d0\n", "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "