|
import os |
|
import yaml |
|
import gdown |
|
import gradio as gr |
|
from predict import PredictTri |
|
|
|
output_path = "tashkeela-d2.pt" |
|
if not os.path.exists(output_path): |
|
model_gdrive_id = "1FGelqImFkESbTyRsx_elkKIOZ9VbhRuo" |
|
gdown.download(id=model_gdrive_id, output=output_path, quiet=False) |
|
|
|
output_path = "vocab.vec" |
|
if not os.path.exists(output_path): |
|
vocab_gdrive_id = "1-0muGvcSYEf8RAVRcwXay4MRex6kmCii" |
|
gdown.download(id=vocab_gdrive_id, output=output_path, quiet=False) |
|
|
|
with open("config.yaml", 'r', encoding="utf-8") as file: |
|
config = yaml.load(file, Loader=yaml.FullLoader) |
|
|
|
config["train"]["max-sent-len"] = config["predictor"]["window"] |
|
config["train"]["max-token-count"] = config["predictor"]["window"] * 3 |
|
|
|
def diacritze(text): |
|
print(text) |
|
predictor = PredictTri(config, text) |
|
diacritized_lines = predictor.predict_majority_vote() |
|
return '\n'.join(diacritized_lines) |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown( |
|
""" |
|
# Partial Diacritization |
|
TODO: put paper links here |
|
""") |
|
input_txt = gr.Textbox( |
|
placeholder="اكتب هنا", |
|
lines=5, |
|
label="Input", |
|
type='text', |
|
|
|
|
|
) |
|
|
|
output_txt = gr.Textbox( |
|
lines=5, |
|
label="Output", |
|
type='text', |
|
|
|
|
|
) |
|
|
|
btn = gr.Button(value="Shakkel") |
|
btn.click(diacritze, inputs=input_txt, outputs=output_txt) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|