In [4]:
import sys
sys.path.insert(0,'..')

In [5]:
from scripts.transformer_prediction_interface import TabPFNClassifier

In [56]:
!pwd

/Users/samuelmueller/TabPFN/TabPFN


In [49]:
import numpy as np
import pandas as pd
import torch
import gradio as gr
import openml


def compute(table: np.array):
    vfunc = np.vectorize(lambda s: len(s))
    non_empty_row_mask = (vfunc(table).sum(1) != 0)
    print(table)
    table = table[non_empty_row_mask]
    empty_mask = table == ''
    empty_inds = np.where(empty_mask)
    assert np.all(empty_inds[1][0] == empty_inds[1])
    y_column = empty_inds[1][0]
    eval_lines = empty_inds[0]

    train_table = np.delete(table, eval_lines, axis=0)
    eval_table = table[eval_lines]

    try:
        x_train = torch.tensor(np.delete(train_table, y_column, axis=1).astype(np.float32))
        x_eval = torch.tensor(np.delete(eval_table, y_column, axis=1).astype(np.float32))

        y_train = train_table[:, y_column]
    except ValueError:
        return "Please only add numbers (to the inputs) or leave fields empty.", None

    classifier = TabPFNClassifier(base_path='..', device='cpu')
    classifier.fit(x_train, y_train)
    y_eval, p_eval = classifier.predict(x_eval, return_winning_probability=True)
    print(x_train, y_train, x_eval, y_eval)

    # print(file, type(file))
    out_table = table.copy().astype(str)
    out_table[eval_lines, y_column] = [f"{y_e} (p={p_e:.2f})" for y_e, p_e in zip(y_eval, p_eval)]
    return None, out_table


def upload_file(file):
    if file.name.endswith('.arff'):
        dataset = openml.datasets.OpenMLDataset('t', 'test', data_file=file.name)
        X_, _, categorical_indicator_, attribute_names_ = dataset.get_data(
            dataset_format="array"
        )
        return X_
    elif file.name.endswith('.csv') or file.name.endswith('.data'):
        df = pd.read_csv(file.name)
        return df.to_numpy()


example = \
    [
        [1, 2, 1],
        [2, 1, 1],
        [1, 1, 1],
        [2, 2, 2],
        [3, 4, 2],
        [3, 2, 2],
        [2, 3, '']
    ]

with gr.Blocks() as demo:
    gr.Markdown("""This demo allows you to play with the **TabPFN**.
    You can either change the table manually (we have filled it with a toy benchmark, sum up to 3 has label 1 and over that label 2).
    The network predicts fields you leave empty. Only one column can have empty entries that are predicted.
    Please, provide everything but the label column as numeric values. It is ok to encode classes as integers.
    """)
    inp_table = gr.DataFrame(type='numpy', value=example, headers=[''] * 3)
    inp_file = gr.File(
        label='Drop either a .csv (without header, only numeric values for all but the labels) or a .arff file.')
    btn = gr.Button("Predict Empty Table Cells")

    inp_file.change(fn=upload_file, inputs=inp_file, outputs=inp_table)

    out_text = gr.Textbox()
    out_table = gr.DataFrame()

    btn.click(fn=compute, inputs=inp_table, outputs=[out_text, out_table])
    examples = gr.Examples(examples=['./iris.csv'],
                           inputs=[inp_file],
                           outputs=[inp_table],
                           fn=upload_file,
                           cache_examples=True)

demo.launch()

Caching examples at: '/Users/samuelmueller/TabPFN/TabPFN/gradio_cached_examples/670/log.csv'


  s = socket.socket()  # create a socket object
  s = socket.socket()  # create a socket object


Running on local URL:  http://127.0.0.1:7898/

To create a public link, set `share=True` in `launch()`.


(<gradio.routes.App at 0x7fa954c66a90>, 'http://127.0.0.1:7898/', None)

In [52]:
df = pd.DataFrame({'hi':[1,2,'j']})

In [59]:
df.to_numpy().tolist()

[[1], [2], ['j']]



In [None]:
k