Spaces:
Running
Running
AlirezaF138
commited on
Update app.py
Browse files
app.py
CHANGED
@@ -35,29 +35,62 @@ def process_text(text, operation, correct_spacing, remove_diacritics, remove_spe
|
|
35 |
pos_tags = pos_tagger.tag(tokens) # Generate POS tags
|
36 |
chunks = chunker.parse(pos_tags) # Pass tagged tokens to Chunker
|
37 |
result = str(chunks) # Show chunks as text
|
38 |
-
|
|
|
|
|
|
|
|
|
39 |
return result
|
40 |
|
|
|
|
|
|
|
|
|
|
|
41 |
# Define Gradio interface
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
gr.
|
48 |
-
|
49 |
-
|
50 |
-
gr.
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
if __name__ == "__main__":
|
63 |
-
|
|
|
35 |
pos_tags = pos_tagger.tag(tokens) # Generate POS tags
|
36 |
chunks = chunker.parse(pos_tags) # Pass tagged tokens to Chunker
|
37 |
result = str(chunks) # Show chunks as text
|
38 |
+
elif operation == "pos_tag":
|
39 |
+
tokens = word_tokenize(text)
|
40 |
+
pos_tags = pos_tagger.tag(tokens)
|
41 |
+
result = " ".join([f"{token}/{tag}" for token, tag in pos_tags]) # Format: token/POS
|
42 |
+
|
43 |
return result
|
44 |
|
45 |
+
def toggle_normalization_options(operation):
|
46 |
+
# Show normalization options only if 'normalize' is selected
|
47 |
+
is_normalize = (operation == "normalize")
|
48 |
+
return [gr.update(visible=is_normalize)] * 9 # Update visibility for all 9 checkboxes
|
49 |
+
|
50 |
# Define Gradio interface
|
51 |
+
with gr.Blocks() as demo:
|
52 |
+
gr.Markdown("# Persian Text Processor with Hazm")
|
53 |
+
gr.Markdown("Select an operation and, if applicable, adjust normalization parameters to process the input text using Hazm.")
|
54 |
+
|
55 |
+
with gr.Row():
|
56 |
+
input_text = gr.Textbox(lines=10, label="Input Text")
|
57 |
+
|
58 |
+
with gr.Row():
|
59 |
+
operation = gr.Radio(
|
60 |
+
choices=['normalize', 'tokenize', 'lemmatize', 'chunk', 'pos_tag'],
|
61 |
+
label="Select Operation",
|
62 |
+
value='normalize'
|
63 |
+
)
|
64 |
+
|
65 |
+
with gr.Column(visible=True) as normalization_options:
|
66 |
+
correct_spacing = gr.Checkbox(value=True, label="Correct Spacing")
|
67 |
+
remove_diacritics = gr.Checkbox(value=True, label="Remove Diacritics")
|
68 |
+
remove_specials_chars = gr.Checkbox(value=True, label="Remove Special Characters")
|
69 |
+
decrease_repeated_chars = gr.Checkbox(value=True, label="Decrease Repeated Characters")
|
70 |
+
persian_style = gr.Checkbox(value=True, label="Persian Style")
|
71 |
+
persian_numbers = gr.Checkbox(value=True, label="Persian Numbers")
|
72 |
+
unicodes_replacement = gr.Checkbox(value=True, label="Unicodes Replacement")
|
73 |
+
seperate_mi = gr.Checkbox(value=True, label="Separate 'می'")
|
74 |
+
|
75 |
+
operation.change(
|
76 |
+
fn=toggle_normalization_options,
|
77 |
+
inputs=operation,
|
78 |
+
outputs=normalization_options
|
79 |
+
)
|
80 |
+
|
81 |
+
output_text = gr.Textbox(label="Processed Text", lines=10, interactive=False, show_copy_button=True)
|
82 |
+
|
83 |
+
submit_button = gr.Button("Process Text")
|
84 |
+
submit_button.click(
|
85 |
+
fn=process_text,
|
86 |
+
inputs=[
|
87 |
+
input_text, operation,
|
88 |
+
correct_spacing, remove_diacritics, remove_specials_chars,
|
89 |
+
decrease_repeated_chars, persian_style, persian_numbers,
|
90 |
+
unicodes_replacement, seperate_mi
|
91 |
+
],
|
92 |
+
outputs=output_text
|
93 |
+
)
|
94 |
|
95 |
if __name__ == "__main__":
|
96 |
+
demo.launch()
|