Create skill_update_dev.ipynb

skill update

Create skill_update_dev.ipynb
skill update
f217a8ea · Sewwandi W.M.C · 2ebfc3f8 · f217a8ea
Commit f217a8ea authored Mar 23, 2024 by Sewwandi W.M.C
Hide whitespace changes
Inline Side-by-side

Showing with 194 additions and 0 deletions

Function 01/Function 01/Function1/March_Update/SkillPredict/skill_update_dev.ipynb ...unction1/March_Update/SkillPredict/skill_update_dev.ipynb +194 -0

No files found.
--- a/Function 01/Function 01/Function1/March_Update/SkillPredict/skill_update_dev.ipynb
+++ b/Function 01/Function 01/Function1/March_Update/SkillPredict/skill_update_dev.ipynb
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import ast\n",
+    "from joblib import load"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 53,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Filtered Predictions: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]\n",
+      "Raw Predictions: [1.45033268e-09 1.45059805e-09 1.00111773e-06 1.39370268e-09\n",
+      " 2.59494512e-01 1.38548161e-09 1.65780186e-09 7.40504445e-01\n",
+      " 1.24871426e-09 1.53996961e-09 1.39266887e-09 1.19986235e-09\n",
+      " 1.68285852e-09 1.53427857e-09 1.31650985e-09 1.16009754e-09\n",
+      " 1.45727081e-09 1.63127721e-09 1.13805600e-09 1.28755564e-09\n",
+      " 1.51714170e-09 1.50479260e-09 1.40481462e-09 1.31366271e-09\n",
+      " 1.62404093e-09 1.63591925e-09 1.60837080e-09 1.48109022e-09\n",
+      " 1.58240510e-09 1.04227142e-09 1.33086116e-09 1.16848105e-09]\n",
+      "Top Labels: ['Python', 'Java', 'Angular', 'JavaScript', 'CSS', 'Problem Solving', 'UX', 'Project Management', 'Team Collaboration', 'Leadership']\n",
+      "Top Predictions:  ['Python', 'Java', 'Angular', 'JavaScript', 'CSS', 'Problem Solving', 'UX', 'Project Management', 'Team Collaboration', 'Leadership']\n"
+     ]
+    }
+   ],
+   "source": [
+    "# Load the trained model\n",
+    "model = load('gb_model_skills.joblib')\n",
+    "\n",
+    "# Set the threshold for raw prediction values\n",
+    "threshold = 0.0\n",
+    "\n",
+    "# Map the indices back to the original labels\n",
+    "label_mapping = {\n",
+    "    1: 'React',\n",
+    "    2: 'React Native',\n",
+    "    3: 'Angular',\n",
+    "    4: 'PHP',\n",
+    "    5: 'Java',\n",
+    "    6: 'HTML/HTML5',\n",
+    "    7: 'CSS',\n",
+    "    8: 'Python',\n",
+    "    9: 'Node.js',\n",
+    "    10: 'Full-Stack Development',\n",
+    "    11: 'Mobile App Development (iOS)',\n",
+    "    12: 'Mobile App Development (Android)',\n",
+    "    13: 'JavaScript',\n",
+    "    14: 'TypeScript',\n",
+    "    15: 'DevOps',\n",
+    "    16: 'QA/Testing',\n",
+    "    17: 'UI',\n",
+    "    18: 'UX',\n",
+    "    19: 'UI/UX',\n",
+    "    20: 'Cloud (AWS, Google, Azure)',\n",
+    "    21: 'Graphics Designing',\n",
+    "    22: 'VFX Designing',\n",
+    "    23: 'GitHub/Version Control',\n",
+    "    24: 'Video Editing',\n",
+    "    25: 'Project Management',\n",
+    "    26: 'Problem Solving',\n",
+    "    27: 'Team Collaboration',\n",
+    "    28: '3D Designing',\n",
+    "    29: 'Leadership',\n",
+    "    30: 'Animation Editing',\n",
+    "    31: 'Time Management',\n",
+    "    32: 'Digital Marketing'\n",
+    "}\n",
+    "\n",
+    "\n",
+    "# Modify the function to accept user input as a string\n",
+    "def get_top_predictions(user_input):\n",
+    "    # Convert the string representation of list to an actual list\n",
+    "    keywords_list = eval(user_input)\n",
+    "\n",
+    "    # Use the trained model to get raw prediction probabilities for all labels\n",
+    "    raw_predictions = model.predict_proba([' '.join(keywords_list)])[0]\n",
+    "\n",
+    "    # Filter raw predictions based on the threshold\n",
+    "    filtered_predictions = [idx + 1 for idx, value in enumerate(raw_predictions) if value > threshold]\n",
+    "\n",
+    "    # Debugging statements\n",
+    "    print(\"Filtered Predictions:\", filtered_predictions)\n",
+    "    print(\"Raw Predictions:\", raw_predictions)\n",
+    "\n",
+    "    # Sort the filtered predictions\n",
+    "    top_indices = sorted(filtered_predictions, key=lambda idx: raw_predictions[idx - 1], reverse=True)[:10]\n",
+    "\n",
+    "    # Map the indices back to the original labels\n",
+    "    top_labels = [label_mapping[idx] for idx in top_indices]\n",
+    "\n",
+    "    # Debugging statement\n",
+    "    print(\"Top Labels:\", top_labels)\n",
+    "\n",
+    "    return top_labels\n",
+    "\n",
+    "\n",
+    "# Get user input\n",
+    "user_input = \"['Dedication', 'learning', 'Passion', 'technology', 'innovation', 'Strong', 'understanding', 'programming', 'language', 'Java', 'Python']\"\n",
+    "\n",
+    "# Get top predictions\n",
+    "top_predictions = get_top_predictions(user_input)\n",
+    "\n",
+    "# Print and return the top predictions\n",
+    "print(\"Top Predictions: \", top_predictions)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "-----------------------"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Get Raw Prediction from the Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 54,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# # Import necessary libraries\n",
+    "# import pandas as pd\n",
+    "# from joblib import load\n",
+    "# from sklearn.feature_extraction.text import TfidfVectorizer\n",
+    "\n",
+    "# # Load the trained model\n",
+    "# model = load('./MLModelDev/gb_model.joblib')\n",
+    "\n",
+    "# # Function to apply the model to each row\n",
+    "# def get_prediction(row):\n",
+    "#     # Assume 'unique_missing_keywords' is the column containing the input keywords as a list\n",
+    "#     keywords_list = row['unique_missing_keywords']\n",
+    "\n",
+    "#     # Check if 'unique_missing_keywords' is a list\n",
+    "#     if isinstance(keywords_list, list):\n",
+    "#         # Convert the list to a string by joining its elements\n",
+    "#         keywords_list = ' '.join(keywords_list)\n",
+    "\n",
+    "#     # Use the trained model to get raw prediction probabilities for all labels\n",
+    "#     raw_predictions = model.predict_proba([keywords_list])\n",
+    "\n",
+    "#     # Return the raw prediction probabilities\n",
+    "#     return raw_predictions[0]\n",
+    "\n",
+    "# # Apply the function to each row and create a new column 'raw_predictions'\n",
+    "# top_matched_jobs_df['raw_predictions'] = top_matched_jobs_df.apply(get_prediction, axis=1)\n",
+    "\n",
+    "# # Display the DataFrame with the new column\n",
+    "# top_matched_jobs_df[['unique_missing_keywords', 'raw_predictions']]\n",
+    "\n",
+    "# # save to a csv\n",
+    "# top_matched_jobs_df.to_csv(f'course_raw_predictions_{candidate_uid}.csv', index=False)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "resume-ranker",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}