AlirezaF138 commited on
Commit
0922873
·
verified ·
1 Parent(s): cc082f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +54 -21
app.py CHANGED
@@ -35,29 +35,62 @@ def process_text(text, operation, correct_spacing, remove_diacritics, remove_spe
35
  pos_tags = pos_tagger.tag(tokens) # Generate POS tags
36
  chunks = chunker.parse(pos_tags) # Pass tagged tokens to Chunker
37
  result = str(chunks) # Show chunks as text
38
-
 
 
 
 
39
  return result
40
 
 
 
 
 
 
41
  # Define Gradio interface
42
- operations = ['normalize', 'tokenize', 'lemmatize', 'chunk']
43
- iface = gr.Interface(
44
- fn=process_text,
45
- inputs=[
46
- gr.Textbox(lines=10, label="Input Text"),
47
- gr.Radio(operations, label="Select Operation", type="value"), # Radio button to select one operation at a time
48
- gr.Checkbox(value=True, label="Correct Spacing", interactive=True),
49
- gr.Checkbox(value=True, label="Remove Diacritics", interactive=True),
50
- gr.Checkbox(value=True, label="Remove Special Characters", interactive=True),
51
- gr.Checkbox(value=True, label="Decrease Repeated Characters", interactive=True),
52
- gr.Checkbox(value=True, label="Persian Style", interactive=True),
53
- gr.Checkbox(value=True, label="Persian Numbers", interactive=True),
54
- gr.Checkbox(value=True, label="Unicodes Replacement", interactive=True),
55
- gr.Checkbox(value=True, label="Separate 'می'", interactive=True)
56
- ],
57
- outputs=gr.Textbox(label="Processed Text", interactive=False, lines=10, show_copy_button=True, show_label=True), # Output as copyable text
58
- title="Persian Text Processor with Hazm",
59
- description="Select an operation and normalization parameters to process the input text using Hazm."
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  if __name__ == "__main__":
63
- iface.launch()
 
35
  pos_tags = pos_tagger.tag(tokens) # Generate POS tags
36
  chunks = chunker.parse(pos_tags) # Pass tagged tokens to Chunker
37
  result = str(chunks) # Show chunks as text
38
+ elif operation == "pos_tag":
39
+ tokens = word_tokenize(text)
40
+ pos_tags = pos_tagger.tag(tokens)
41
+ result = " ".join([f"{token}/{tag}" for token, tag in pos_tags]) # Format: token/POS
42
+
43
  return result
44
 
45
+ def toggle_normalization_options(operation):
46
+ # Show normalization options only if 'normalize' is selected
47
+ is_normalize = (operation == "normalize")
48
+ return [gr.update(visible=is_normalize)] * 9 # Update visibility for all 9 checkboxes
49
+
50
  # Define Gradio interface
51
+ with gr.Blocks() as demo:
52
+ gr.Markdown("# Persian Text Processor with Hazm")
53
+ gr.Markdown("Select an operation and, if applicable, adjust normalization parameters to process the input text using Hazm.")
54
+
55
+ with gr.Row():
56
+ input_text = gr.Textbox(lines=10, label="Input Text")
57
+
58
+ with gr.Row():
59
+ operation = gr.Radio(
60
+ choices=['normalize', 'tokenize', 'lemmatize', 'chunk', 'pos_tag'],
61
+ label="Select Operation",
62
+ value='normalize'
63
+ )
64
+
65
+ with gr.Column(visible=True) as normalization_options:
66
+ correct_spacing = gr.Checkbox(value=True, label="Correct Spacing")
67
+ remove_diacritics = gr.Checkbox(value=True, label="Remove Diacritics")
68
+ remove_specials_chars = gr.Checkbox(value=True, label="Remove Special Characters")
69
+ decrease_repeated_chars = gr.Checkbox(value=True, label="Decrease Repeated Characters")
70
+ persian_style = gr.Checkbox(value=True, label="Persian Style")
71
+ persian_numbers = gr.Checkbox(value=True, label="Persian Numbers")
72
+ unicodes_replacement = gr.Checkbox(value=True, label="Unicodes Replacement")
73
+ seperate_mi = gr.Checkbox(value=True, label="Separate 'می'")
74
+
75
+ operation.change(
76
+ fn=toggle_normalization_options,
77
+ inputs=operation,
78
+ outputs=normalization_options
79
+ )
80
+
81
+ output_text = gr.Textbox(label="Processed Text", lines=10, interactive=False, show_copy_button=True)
82
+
83
+ submit_button = gr.Button("Process Text")
84
+ submit_button.click(
85
+ fn=process_text,
86
+ inputs=[
87
+ input_text, operation,
88
+ correct_spacing, remove_diacritics, remove_specials_chars,
89
+ decrease_repeated_chars, persian_style, persian_numbers,
90
+ unicodes_replacement, seperate_mi
91
+ ],
92
+ outputs=output_text
93
+ )
94
 
95
  if __name__ == "__main__":
96
+ demo.launch()