backend

Sleeping

App Files Files Community

meg-huggingface commited on Jul 16, 2024

Commit

64c3915

1 Parent(s): 5ea4d55

Backend toxicity

Browse files

Files changed (1) hide show

main_backend_toxicity.py +85 -0

main_backend_toxicity.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import logging
+import pprint
+import re
+from huggingface_hub import snapshot_download
+logging.getLogger("openai").setLevel(logging.DEBUG)
+from src.backend.inference_endpoint import create_endpoint
+from src.backend.run_toxicity_eval import main
+from src.backend.manage_requests import check_completed_evals, get_eval_requests, set_eval_request
+from src.backend.sort_queue import sort_models_by_priority
+from src.envs import (QUEUE_REPO, EVAL_REQUESTS_PATH_BACKEND, RESULTS_REPO,
+                      EVAL_RESULTS_PATH_BACKEND, API, TOKEN)
+#, LIMIT, ACCELERATOR, VENDOR, REGION
+from src.logging import setup_logger
+logger = setup_logger(__name__)
+# logging.basicConfig(level=logging.ERROR)
+pp = pprint.PrettyPrinter(width=80)
+PENDING_STATUS = "PENDING"
+RUNNING_STATUS = "RUNNING"
+FINISHED_STATUS = "FINISHED"
+FAILED_STATUS = "FAILED"
+snapshot_download(repo_id=RESULTS_REPO, revision="main", local_dir=EVAL_RESULTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
+snapshot_download(repo_id=QUEUE_REPO, revision="main", local_dir=EVAL_REQUESTS_PATH_BACKEND, repo_type="dataset", max_workers=60, token=TOKEN)
+def run_auto_eval():
+    current_pending_status = [PENDING_STATUS]
+    # pull the eval dataset from the hub and parse any eval requests
+    # check completed evals and set them to finished
+    check_completed_evals(
+        api=API,
+        checked_status=RUNNING_STATUS,
+        completed_status=FINISHED_STATUS,
+        failed_status=FAILED_STATUS,
+        hf_repo=QUEUE_REPO,
+        local_dir=EVAL_REQUESTS_PATH_BACKEND,
+        hf_repo_results=RESULTS_REPO,
+        local_dir_results=EVAL_RESULTS_PATH_BACKEND
+    )
+    # Get all eval request that are PENDING, if you want to run other evals, change this parameter
+    eval_requests = get_eval_requests(job_status=current_pending_status, hf_repo=QUEUE_REPO, local_dir=EVAL_REQUESTS_PATH_BACKEND)
+    # Sort the evals by priority (first submitted first run)
+    eval_requests = sort_models_by_priority(api=API, models=eval_requests)
+    logger.info(f"Found {len(eval_requests)} {','.join(current_pending_status)} eval requests")
+    if len(eval_requests) == 0:
+        return
+    eval_request = eval_requests[0]
+    logger.info(pp.pformat(eval_request))
+    set_eval_request(
+        api=API,
+        eval_request=eval_request,
+        set_to_status=RUNNING_STATUS,
+        hf_repo=QUEUE_REPO,
+        local_dir=EVAL_REQUESTS_PATH_BACKEND,
+    )
+    logger.info(f'Starting Evaluation of {eval_request.json_filepath} on Inference endpoints')
+    model_repository = eval_request.model
+    endpoint_name = re.sub("/", "-", model_repository.lower()) + "-toxicity-eval" #+ str(random.randint(0,1000))
+    endpoint_url = create_endpoint(endpoint_name, model_repository)
+    logger.info("Created an endpoint url at %s" % endpoint_url)
+    results = main(endpoint_url, model_repository)
+    logger.debug("FINISHED!")
+    #local_dir = EVAL_RESULTS_PATH_BACKEND,
+    #limit=LIMIT
+    #    )
+    #logger.info(f'Completed Evaluation of {eval_request.json_filepath} on Inference endpoints: {instance_size} {instance_type}')
+if __name__ == "__main__":
+    run_auto_eval()