Commit f217a8ea authored by Sewwandi W.M.C's avatar Sewwandi W.M.C

Create skill_update_dev.ipynb

skill update
parent 2ebfc3f8
{
"cells": [
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import ast\n",
"from joblib import load"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Filtered Predictions: [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]\n",
"Raw Predictions: [1.45033268e-09 1.45059805e-09 1.00111773e-06 1.39370268e-09\n",
" 2.59494512e-01 1.38548161e-09 1.65780186e-09 7.40504445e-01\n",
" 1.24871426e-09 1.53996961e-09 1.39266887e-09 1.19986235e-09\n",
" 1.68285852e-09 1.53427857e-09 1.31650985e-09 1.16009754e-09\n",
" 1.45727081e-09 1.63127721e-09 1.13805600e-09 1.28755564e-09\n",
" 1.51714170e-09 1.50479260e-09 1.40481462e-09 1.31366271e-09\n",
" 1.62404093e-09 1.63591925e-09 1.60837080e-09 1.48109022e-09\n",
" 1.58240510e-09 1.04227142e-09 1.33086116e-09 1.16848105e-09]\n",
"Top Labels: ['Python', 'Java', 'Angular', 'JavaScript', 'CSS', 'Problem Solving', 'UX', 'Project Management', 'Team Collaboration', 'Leadership']\n",
"Top Predictions: ['Python', 'Java', 'Angular', 'JavaScript', 'CSS', 'Problem Solving', 'UX', 'Project Management', 'Team Collaboration', 'Leadership']\n"
]
}
],
"source": [
"# Load the trained model\n",
"model = load('gb_model_skills.joblib')\n",
"\n",
"# Set the threshold for raw prediction values\n",
"threshold = 0.0\n",
"\n",
"# Map the indices back to the original labels\n",
"label_mapping = {\n",
" 1: 'React',\n",
" 2: 'React Native',\n",
" 3: 'Angular',\n",
" 4: 'PHP',\n",
" 5: 'Java',\n",
" 6: 'HTML/HTML5',\n",
" 7: 'CSS',\n",
" 8: 'Python',\n",
" 9: 'Node.js',\n",
" 10: 'Full-Stack Development',\n",
" 11: 'Mobile App Development (iOS)',\n",
" 12: 'Mobile App Development (Android)',\n",
" 13: 'JavaScript',\n",
" 14: 'TypeScript',\n",
" 15: 'DevOps',\n",
" 16: 'QA/Testing',\n",
" 17: 'UI',\n",
" 18: 'UX',\n",
" 19: 'UI/UX',\n",
" 20: 'Cloud (AWS, Google, Azure)',\n",
" 21: 'Graphics Designing',\n",
" 22: 'VFX Designing',\n",
" 23: 'GitHub/Version Control',\n",
" 24: 'Video Editing',\n",
" 25: 'Project Management',\n",
" 26: 'Problem Solving',\n",
" 27: 'Team Collaboration',\n",
" 28: '3D Designing',\n",
" 29: 'Leadership',\n",
" 30: 'Animation Editing',\n",
" 31: 'Time Management',\n",
" 32: 'Digital Marketing'\n",
"}\n",
"\n",
"\n",
"# Modify the function to accept user input as a string\n",
"def get_top_predictions(user_input):\n",
" # Convert the string representation of list to an actual list\n",
" keywords_list = eval(user_input)\n",
"\n",
" # Use the trained model to get raw prediction probabilities for all labels\n",
" raw_predictions = model.predict_proba([' '.join(keywords_list)])[0]\n",
"\n",
" # Filter raw predictions based on the threshold\n",
" filtered_predictions = [idx + 1 for idx, value in enumerate(raw_predictions) if value > threshold]\n",
"\n",
" # Debugging statements\n",
" print(\"Filtered Predictions:\", filtered_predictions)\n",
" print(\"Raw Predictions:\", raw_predictions)\n",
"\n",
" # Sort the filtered predictions\n",
" top_indices = sorted(filtered_predictions, key=lambda idx: raw_predictions[idx - 1], reverse=True)[:10]\n",
"\n",
" # Map the indices back to the original labels\n",
" top_labels = [label_mapping[idx] for idx in top_indices]\n",
"\n",
" # Debugging statement\n",
" print(\"Top Labels:\", top_labels)\n",
"\n",
" return top_labels\n",
"\n",
"\n",
"# Get user input\n",
"user_input = \"['Dedication', 'learning', 'Passion', 'technology', 'innovation', 'Strong', 'understanding', 'programming', 'language', 'Java', 'Python']\"\n",
"\n",
"# Get top predictions\n",
"top_predictions = get_top_predictions(user_input)\n",
"\n",
"# Print and return the top predictions\n",
"print(\"Top Predictions: \", top_predictions)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"-----------------------"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Get Raw Prediction from the Model"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {},
"outputs": [],
"source": [
"# # Import necessary libraries\n",
"# import pandas as pd\n",
"# from joblib import load\n",
"# from sklearn.feature_extraction.text import TfidfVectorizer\n",
"\n",
"# # Load the trained model\n",
"# model = load('./MLModelDev/gb_model.joblib')\n",
"\n",
"# # Function to apply the model to each row\n",
"# def get_prediction(row):\n",
"# # Assume 'unique_missing_keywords' is the column containing the input keywords as a list\n",
"# keywords_list = row['unique_missing_keywords']\n",
"\n",
"# # Check if 'unique_missing_keywords' is a list\n",
"# if isinstance(keywords_list, list):\n",
"# # Convert the list to a string by joining its elements\n",
"# keywords_list = ' '.join(keywords_list)\n",
"\n",
"# # Use the trained model to get raw prediction probabilities for all labels\n",
"# raw_predictions = model.predict_proba([keywords_list])\n",
"\n",
"# # Return the raw prediction probabilities\n",
"# return raw_predictions[0]\n",
"\n",
"# # Apply the function to each row and create a new column 'raw_predictions'\n",
"# top_matched_jobs_df['raw_predictions'] = top_matched_jobs_df.apply(get_prediction, axis=1)\n",
"\n",
"# # Display the DataFrame with the new column\n",
"# top_matched_jobs_df[['unique_missing_keywords', 'raw_predictions']]\n",
"\n",
"# # save to a csv\n",
"# top_matched_jobs_df.to_csv(f'course_raw_predictions_{candidate_uid}.csv', index=False)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "resume-ranker",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.13"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment