{ "cells": [ { "cell_type": "code", "execution_count": 11, "id": "8955cb73", "metadata": {}, "outputs": [], "source": [ "os.environ[\"geclm_token\"] = \"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\"" ] }, { "cell_type": "code", "execution_count": null, "id": "585da432", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of parquet files 30\n", "Reading geclm-datasets/samples/c4/20230404_102105_00007_t8w9z_5dddd9ff-0020-4e23-8621-614fe1c82cec\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_code/20230404_102116_00007_ajvns_6d261b8b-12bb-4ca9-a406-1645f2e31af7\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_github_issues/20230404_102127_00022_yv77i_2d0f6685-c3b8-4b16-b7bd-5b47e6938102\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/bigcode_python_jupyter_markdowned_clean_dedup/20230404_102137_00026_vwcg7_79f2fc1b-a99c-4ef2-9d73-690ee3157f7b\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/books3/20230404_102143_00027_t4kwf_326b263c-d184-42d3-a1bc-833e0c7cd8c6\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/gutenberg_raw/20230404_102215_00007_x3ntt_eb8e349d-2806-4bef-81dd-8f3b951eec1f\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/reddit_threaded/20230404_102241_00049_xj4uk_3c4761ee-2dbb-493b-ba2f-35a1da79cd45\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/enwiki_data/20230404_102246_00007_ye63c_dc22902c-9d73-426c-9091-4c93f22fee5d\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/s2orc_dedup/20230404_102252_00080_6ce5q_96d31fe2-9f5e-4632-9905-6d37a0c07ec3\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/stackexchange2/20230404_102308_00031_qvnh6_ebca5822-7684-47af-bdac-670001d5a92a\n", "Number of parquet files 30\n", "Reading geclm-datasets/samples/commoncrawl/20230404_124237_00026_sin5w_1278b6e7-4f3e-49b3-9a8e-9cea3f20eadb\n", "Running on local URL: http://127.0.0.1:7860\n", "\n", "To create a public link, set `share=True` in `launch()`.\n" ] }, { "data": { "text/html": [ "
" ], "text/plain": [ "