Skip to content
This repository has been archived by the owner on Jun 9, 2024. It is now read-only.

Commit

Permalink
training run
Browse files Browse the repository at this point in the history
  • Loading branch information
SilenNaihin committed Sep 22, 2023
1 parent dbfa0b1 commit 8cdfe4d
Show file tree
Hide file tree
Showing 12 changed files with 49,055 additions and 54 deletions.
220 changes: 216 additions & 4 deletions paper/monitor.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1325,7 +1325,7 @@
},
{
"cell_type": "code",
"execution_count": 58,
"execution_count": 60,
"id": "d258911a",
"metadata": {},
"outputs": [],
Expand Down Expand Up @@ -1457,7 +1457,7 @@
},
{
"cell_type": "code",
"execution_count": 59,
"execution_count": 61,
"id": "c02ecd86",
"metadata": {},
"outputs": [],
Expand All @@ -1473,11 +1473,223 @@
},
{
"cell_type": "code",
"execution_count": null,
"execution_count": 63,
"id": "80903100",
"metadata": {},
"outputs": [],
"source": []
"source": [
"#objective_function\n",
"\n",
"def train_prompt(variables):\n",
" agent_explanation_msg, scoring_msg, not_too_strict_msg, few_shot_msg, description_msg, previous_action, prompt_msg, whitelist_msg, pre_read_msg = variables\n",
" # Initialize the responses\n",
" llm_monitor_responses = initialize_llm_monitor_responses()\n",
" llm_monitor_responses[\"test_run\"][\"variables\"] = {\n",
" \"agent_explanation_msg\": agent_explanation_msg,\n",
" \"scoring_msg\": scoring_msg,\n",
" \"not_too_strict_msg\": not_too_strict_msg,\n",
" \"few_shot_msg\": few_shot_msg,\n",
" \"description_msg\": description_msg,\n",
" \"previous_action\": previous_action,\n",
" \"prompt_msg\": prompt_msg,\n",
" \"whitelist_msg\": whitelist_msg,\n",
" \"pre_read_msg\": pre_read_msg\n",
" } \n",
"\n",
" original_logs_json = load_json_file('all_logs/auto-gpt/validation.json')\n",
" prompts_json = load_json_file('all_logs/auto-gpt/prompt.json')\n",
" all_data = load_json_file('all_data_jsons.json')\n",
" all_uninserted_logs = load_json_file('all_logs/auto-gpt/response.json')\n",
"\n",
" # NOTE: leave empty for all challenges\n",
" # OR: specify specific challenges. When an inserted log within a timestamp is met, it finishes the timestamp moves on \n",
"\n",
" # Testing: [\"TestAdaptSimpleTypoWithGuidance\", \"TestRevenueRetrieval\", \"TestWrite5FilesWithArray\", \"TestDebugMultipleTypo\"] \n",
" # All, only 1 insertion per challenge: list(all_data.keys())\n",
" # All logs: []\n",
" challenges_to_run = []\n",
" llm_monitor_responses[\"test_run\"][\"challenges_to_run_var\"] = \"[]\"\n",
" # If the array is empty, run all of the challenges, otherwise only specific ones\n",
" challenges_to_process = challenges_to_run if challenges_to_run else list(original_logs_json.keys())\n",
"\n",
" all_int_scores = []\n",
"\n",
" # Loop through each challenge\n",
" for challenge_name in challenges_to_process:\n",
" # this is to break after a single inserted log is found. in testing when there's challenges specified in the array\n",
" found_inserted_log_in_challenge = False\n",
"\n",
" date_logs = original_logs_json.get(challenge_name)\n",
"\n",
" if not date_logs: # Skip if the challenge name isn't found\n",
" continue\n",
"\n",
" llm_monitor_responses[\"logs\"][challenge_name] = {}\n",
"\n",
" # get the current challenge data\n",
" challenge_data = all_data[challenge_name]\n",
"\n",
" llm_monitor_responses[\"test_run\"][\"challenges\"].append(challenge_name)\n",
"\n",
" # Loop through each timestamp\n",
" for date, logs in date_logs.items():\n",
" llm_monitor_responses[\"test_run\"][\"total_runs\"] += 1 # Increment total runs for each timestamp\n",
" llm_monitor_responses[\"logs\"][challenge_name][date] = {} # Initialize logs for this timestamp\n",
" # Loop through each log entry\n",
" for log_id, log_data in logs.items():\n",
" found_inserted_log = False\n",
"\n",
" # Increment the total log count\n",
" llm_monitor_responses[\"test_run\"][\"total_logs\"] += 1\n",
" # if it's an inserted log\n",
" try:\n",
" if log_data.get('description', ''):\n",
" llm_monitor_responses[\"test_run\"][\"inserted_logs\"] += 1\n",
" found_inserted_log = found_inserted_log_in_challenge = True\n",
" except:\n",
" continue\n",
" \n",
" previous_action = all_uninserted_logs[challenge_name][date].get(f\"{int(log_id) - 1}\", {})\n",
"\n",
" # this gets the monitor response and combines the above info\n",
" int_score = process_log(variables, log_data, challenge_data, challenge_name, date, log_id, prompts_json, llm_monitor_responses, previous_action)\n",
"\n",
" update_scoring(log_id, date, challenge_name, llm_monitor_responses, found_inserted_log, all_int_scores, int_score)\n",
"\n",
" print_status(llm_monitor_responses, date, log_id, challenges_to_process)\n",
"\n",
" if found_inserted_log_in_challenge and len(challenges_to_run):\n",
" # If an inserted log was found in this timestamp, and the array is not empty, \n",
" # break out of the loop and move to the next challenge\n",
" print(f'Found inserted log in {challenge_name} at {date}')\n",
" break\n",
" \n",
" llm_monitor_responses[\"test_run\"][\"end_time\"] = datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n",
"\n",
" save_to_json(llm_monitor_responses)\n",
" \n",
" objective_value = -llm_monitor_responses[\"test_run\"][\"scores\"][\"f1_score\"]\n",
" \n",
" update_and_log_optimization_stats(optimization_stats, llm_monitor_responses, params, objective_value)\n",
" \n",
" return objective_value\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "8bcdf71e",
"metadata": {},
"outputs": [],
"source": [
"#final_optimization\n",
"\n",
"from skopt import gp_minimize\n",
"from skopt.space import Categorical\n",
"from skopt.callbacks import CheckpointSaver\n",
"import pickle\n",
"import os\n",
"import sys\n",
"from datetime import datetime\n",
"\n",
"optimization_stats = {\n",
" \"memo_cache\": {},\n",
" \"start_time\": datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\"),\n",
" \"end_time\": None,\n",
" \"total_optimization_runs\": 0,\n",
" \"total_runs\": 0,\n",
" \"total_inserted_logs\": 0,\n",
" \"total_logs\": 0,\n",
" \"best_params\": None,\n",
" \"best_objective_value\": None,\n",
" \"tokens\": {\n",
" \"total_prompt_tokens\": 0,\n",
" \"total_completion_tokens\": 0,\n",
" \"total_overall_tokens\": 0,\n",
" \"total_prompt_cost\": 0,\n",
" \"total_completion_cost\": 0,\n",
" \"total_overall_cost\": 0\n",
" },\n",
"}\n",
"\n",
"# Run naming and folder creation\n",
"run_name = \"training_bayesian\"\n",
"file_count = len([f for f in os.listdir(f\"results\") if f.startswith(run_name)])\n",
"run_folder_name = f\"{run_name}_{file_count + 1}\"\n",
"os.makedirs(f\"results/{run_folder_name}\", exist_ok=True)\n",
"\n",
"# Define the search space\n",
"space = [\n",
" Categorical([0, 1], name='agent_explanation_msg'),\n",
" Categorical([0, 1], name='scoring_msg'),\n",
" Categorical([0, 1], name='not_too_strict_msg'),\n",
" Categorical([0, 1], name='few_shot_msg'),\n",
" Categorical([0, 1], name='description_msg'),\n",
" Categorical([0, 1], name='previous_action'),\n",
" Categorical([0, 1], name='prompt_msg'),\n",
" Categorical([0, 1], name='whitelist_msg'),\n",
" Categorical([0, 1], name='pre_read_msg')\n",
"]\n",
"\n",
"# Initialize variables for resuming\n",
"x0, y0 = None, None\n",
"\n",
"# Try to load previous state\n",
"checkpoint_file = f\"results/{run_folder_name}/checkpoint.pkl\"\n",
"checkpoint_saver = CheckpointSaver(checkpoint_file, compress=9)\n",
"\n",
"try:\n",
" # Try to load previous state, 0 for no checkpoint\n",
" run_number = 0 \n",
" previous_checkpoint_file = f\"results/training_bayesian_{run_number}/checkpoint.pkl\"\n",
" with open(previous_checkpoint_file, \"rb\") as f:\n",
" result = pickle.load(f)\n",
" x0 = result.x_iters\n",
" y0 = result.func_vals\n",
" print(f\"Resuming from checkpoint.\")\n",
"except Exception as e:\n",
" print(f\"An unexpected error occurred: {e}. Starting from scratch.\")\n",
"\n",
"\n",
"\n",
"# Save original stdout and stderr\n",
"original_stdout = sys.stdout\n",
"original_stderr = sys.stderr\n",
"\n",
"# Create Tee objects\n",
"tee_stdout = Tee(f'results/{run_folder_name}/optimization_run _logs.txt', original_stdout)\n",
"tee_stderr = Tee(f'results/{run_folder_name}/optimization_run_logs.txt', original_stderr)\n",
"\n",
"# Redirect stdout and stderr\n",
"sys.stdout = tee_stdout\n",
"sys.stderr = tee_stderr\n",
"\n",
"# Perform the optimization\n",
"result = gp_minimize(objective_function,\n",
" space,\n",
" n_calls=25,\n",
" random_state=0,\n",
" verbose=True,\n",
" x0=x0,\n",
" y0=y0,\n",
" callback=[checkpoint_saver])\n",
"\n",
"# Restore original stdout and stderr\n",
"sys.stdout.close()\n",
"sys.stderr.close()\n",
"sys.stdout = original_stdout\n",
"sys.stderr = original_stderr\n",
"\n",
"optimization_stats[\"end_time\"] = datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n",
"\n",
"# Save the final result\n",
"with open(f\"results/{run_folder_name}/checkpoint.pkl\", \"wb\") as f:\n",
" pickle.dump(result, f)\n",
"\n",
"# Print the best result\n",
"print(\"Best parameters: \", result.x)\n",
"print(\"Best performance metric: \", result.fun)"
]
}
],
"metadata": {
Expand Down
Loading

0 comments on commit 8cdfe4d

Please sign in to comment.