training run

Significant-Gravitas · Sep 22, 2023 · 8cdfe4d · 8cdfe4d
1 parent dbfa0b1
commit 8cdfe4d
Show file tree

Hide file tree

Showing 12 changed files with 49,055 additions and 54 deletions.
diff --git a/paper/monitor.ipynb b/paper/monitor.ipynb
@@ -1325,7 +1325,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 58,
+   "execution_count": 60,
    "id": "d258911a",
    "metadata": {},
    "outputs": [],
@@ -1457,7 +1457,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 61,
    "id": "c02ecd86",
    "metadata": {},
    "outputs": [],
@@ -1473,11 +1473,223 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 63,
    "id": "80903100",
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "#objective_function\n",
+    "\n",
+    "def train_prompt(variables):\n",
+    "    agent_explanation_msg, scoring_msg, not_too_strict_msg, few_shot_msg, description_msg, previous_action, prompt_msg, whitelist_msg, pre_read_msg = variables\n",
+    "    # Initialize the responses\n",
+    "    llm_monitor_responses = initialize_llm_monitor_responses()\n",
+    "    llm_monitor_responses[\"test_run\"][\"variables\"] = {\n",
+    "        \"agent_explanation_msg\": agent_explanation_msg,\n",
+    "        \"scoring_msg\": scoring_msg,\n",
+    "        \"not_too_strict_msg\": not_too_strict_msg,\n",
+    "        \"few_shot_msg\": few_shot_msg,\n",
+    "        \"description_msg\": description_msg,\n",
+    "        \"previous_action\": previous_action,\n",
+    "        \"prompt_msg\": prompt_msg,\n",
+    "        \"whitelist_msg\": whitelist_msg,\n",
+    "        \"pre_read_msg\": pre_read_msg\n",
+    "    }  \n",
+    "\n",
+    "    original_logs_json = load_json_file('all_logs/auto-gpt/validation.json')\n",
+    "    prompts_json = load_json_file('all_logs/auto-gpt/prompt.json')\n",
+    "    all_data = load_json_file('all_data_jsons.json')\n",
+    "    all_uninserted_logs = load_json_file('all_logs/auto-gpt/response.json')\n",
+    "\n",
+    "    # NOTE: leave empty for all challenges\n",
+    "    # OR: specify specific challenges. When an inserted log within a timestamp is met, it finishes the timestamp moves on   \n",
+    "\n",
+    "    # Testing: [\"TestAdaptSimpleTypoWithGuidance\", \"TestRevenueRetrieval\", \"TestWrite5FilesWithArray\", \"TestDebugMultipleTypo\"] \n",
+    "    # All, only 1 insertion per challenge: list(all_data.keys())\n",
+    "    # All logs: []\n",
+    "    challenges_to_run = []\n",
+    "    llm_monitor_responses[\"test_run\"][\"challenges_to_run_var\"] = \"[]\"\n",
+    "    # If the array is empty, run all of the challenges, otherwise only specific ones\n",
+    "    challenges_to_process = challenges_to_run if challenges_to_run else list(original_logs_json.keys())\n",
+    "\n",
+    "    all_int_scores = []\n",
+    "\n",
+    "    # Loop through each challenge\n",
+    "    for challenge_name in challenges_to_process:\n",
+    "        # this is to break after a single inserted log is found. in testing when there's challenges specified in the array\n",
+    "        found_inserted_log_in_challenge = False\n",
+    "\n",
+    "        date_logs = original_logs_json.get(challenge_name)\n",
+    "\n",
+    "        if not date_logs:  # Skip if the challenge name isn't found\n",
+    "            continue\n",
+    "\n",
+    "        llm_monitor_responses[\"logs\"][challenge_name] = {}\n",
+    "\n",
+    "        # get the current challenge data\n",
+    "        challenge_data = all_data[challenge_name]\n",
+    "\n",
+    "        llm_monitor_responses[\"test_run\"][\"challenges\"].append(challenge_name)\n",
+    "\n",
+    "        # Loop through each timestamp\n",
+    "        for date, logs in date_logs.items():\n",
+    "            llm_monitor_responses[\"test_run\"][\"total_runs\"] += 1  # Increment total runs for each timestamp\n",
+    "            llm_monitor_responses[\"logs\"][challenge_name][date] = {}  # Initialize logs for this timestamp\n",
+    "            # Loop through each log entry\n",
+    "            for log_id, log_data in logs.items():\n",
+    "                found_inserted_log = False\n",
+    "\n",
+    "                # Increment the total log count\n",
+    "                llm_monitor_responses[\"test_run\"][\"total_logs\"] += 1\n",
+    "                # if it's an inserted log\n",
+    "                try:\n",
+    "                    if log_data.get('description', ''):\n",
+    "                        llm_monitor_responses[\"test_run\"][\"inserted_logs\"] += 1\n",
+    "                        found_inserted_log = found_inserted_log_in_challenge = True\n",
+    "                except:\n",
+    "                    continue\n",
+    "                \n",
+    "                previous_action = all_uninserted_logs[challenge_name][date].get(f\"{int(log_id) - 1}\", {})\n",
+    "\n",
+    "                # this gets the monitor response and combines the above info\n",
+    "                int_score = process_log(variables, log_data, challenge_data, challenge_name, date, log_id, prompts_json, llm_monitor_responses, previous_action)\n",
+    "\n",
+    "                update_scoring(log_id, date, challenge_name, llm_monitor_responses, found_inserted_log, all_int_scores, int_score)\n",
+    "\n",
+    "                print_status(llm_monitor_responses, date, log_id, challenges_to_process)\n",
+    "\n",
+    "            if found_inserted_log_in_challenge and len(challenges_to_run):\n",
+    "                # If an inserted log was found in this timestamp, and the array is not empty, \n",
+    "                # break out of the loop and move to the next challenge\n",
+    "                print(f'Found inserted log in {challenge_name} at {date}')\n",
+    "                break\n",
+    "            \n",
+    "    llm_monitor_responses[\"test_run\"][\"end_time\"] = datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n",
+    "\n",
+    "    save_to_json(llm_monitor_responses)\n",
+    "    \n",
+    "    objective_value = -llm_monitor_responses[\"test_run\"][\"scores\"][\"f1_score\"]\n",
+    "    \n",
+    "    update_and_log_optimization_stats(optimization_stats, llm_monitor_responses, params, objective_value)\n",
+    "        \n",
+    "    return objective_value\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8bcdf71e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#final_optimization\n",
+    "\n",
+    "from skopt import gp_minimize\n",
+    "from skopt.space import Categorical\n",
+    "from skopt.callbacks import CheckpointSaver\n",
+    "import pickle\n",
+    "import os\n",
+    "import sys\n",
+    "from datetime import datetime\n",
+    "\n",
+    "optimization_stats = {\n",
+    "    \"memo_cache\": {},\n",
+    "    \"start_time\": datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\"),\n",
+    "    \"end_time\": None,\n",
+    "    \"total_optimization_runs\": 0,\n",
+    "    \"total_runs\": 0,\n",
+    "    \"total_inserted_logs\": 0,\n",
+    "    \"total_logs\": 0,\n",
+    "    \"best_params\": None,\n",
+    "    \"best_objective_value\": None,\n",
+    "    \"tokens\": {\n",
+    "        \"total_prompt_tokens\": 0,\n",
+    "        \"total_completion_tokens\": 0,\n",
+    "        \"total_overall_tokens\": 0,\n",
+    "        \"total_prompt_cost\": 0,\n",
+    "        \"total_completion_cost\": 0,\n",
+    "        \"total_overall_cost\": 0\n",
+    "    },\n",
+    "}\n",
+    "\n",
+    "# Run naming and folder creation\n",
+    "run_name = \"training_bayesian\"\n",
+    "file_count = len([f for f in os.listdir(f\"results\") if f.startswith(run_name)])\n",
+    "run_folder_name = f\"{run_name}_{file_count + 1}\"\n",
+    "os.makedirs(f\"results/{run_folder_name}\", exist_ok=True)\n",
+    "\n",
+    "# Define the search space\n",
+    "space = [\n",
+    "    Categorical([0, 1], name='agent_explanation_msg'),\n",
+    "    Categorical([0, 1], name='scoring_msg'),\n",
+    "    Categorical([0, 1], name='not_too_strict_msg'),\n",
+    "    Categorical([0, 1], name='few_shot_msg'),\n",
+    "    Categorical([0, 1], name='description_msg'),\n",
+    "    Categorical([0, 1], name='previous_action'),\n",
+    "    Categorical([0, 1], name='prompt_msg'),\n",
+    "    Categorical([0, 1], name='whitelist_msg'),\n",
+    "    Categorical([0, 1], name='pre_read_msg')\n",
+    "]\n",
+    "\n",
+    "# Initialize variables for resuming\n",
+    "x0, y0 = None, None\n",
+    "\n",
+    "# Try to load previous state\n",
+    "checkpoint_file = f\"results/{run_folder_name}/checkpoint.pkl\"\n",
+    "checkpoint_saver = CheckpointSaver(checkpoint_file, compress=9)\n",
+    "\n",
+    "try:\n",
+    "    # Try to load previous state, 0 for no checkpoint\n",
+    "    run_number = 0 \n",
+    "    previous_checkpoint_file = f\"results/training_bayesian_{run_number}/checkpoint.pkl\"\n",
+    "    with open(previous_checkpoint_file, \"rb\") as f:\n",
+    "        result = pickle.load(f)\n",
+    "    x0 = result.x_iters\n",
+    "    y0 = result.func_vals\n",
+    "    print(f\"Resuming from checkpoint.\")\n",
+    "except Exception as e:\n",
+    "    print(f\"An unexpected error occurred: {e}. Starting from scratch.\")\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Save original stdout and stderr\n",
+    "original_stdout = sys.stdout\n",
+    "original_stderr = sys.stderr\n",
+    "\n",
+    "# Create Tee objects\n",
+    "tee_stdout = Tee(f'results/{run_folder_name}/optimization_run _logs.txt', original_stdout)\n",
+    "tee_stderr = Tee(f'results/{run_folder_name}/optimization_run_logs.txt', original_stderr)\n",
+    "\n",
+    "# Redirect stdout and stderr\n",
+    "sys.stdout = tee_stdout\n",
+    "sys.stderr = tee_stderr\n",
+    "\n",
+    "# Perform the optimization\n",
+    "result = gp_minimize(objective_function,\n",
+    "                     space,\n",
+    "                     n_calls=25,\n",
+    "                     random_state=0,\n",
+    "                     verbose=True,\n",
+    "                     x0=x0,\n",
+    "                     y0=y0,\n",
+    "                     callback=[checkpoint_saver])\n",
+    "\n",
+    "# Restore original stdout and stderr\n",
+    "sys.stdout.close()\n",
+    "sys.stderr.close()\n",
+    "sys.stdout = original_stdout\n",
+    "sys.stderr = original_stderr\n",
+    "\n",
+    "optimization_stats[\"end_time\"] = datetime.now().strftime(\"%Y-%m-%d_%H-%M-%S\")\n",
+    "\n",
+    "# Save the final result\n",
+    "with open(f\"results/{run_folder_name}/checkpoint.pkl\", \"wb\") as f:\n",
+    "    pickle.dump(result, f)\n",
+    "\n",
+    "# Print the best result\n",
+    "print(\"Best parameters: \", result.x)\n",
+    "print(\"Best performance metric: \", result.fun)"
+   ]
   }
  ],
  "metadata": {