changes

e6fda490 · Birahavi Kugathasan · bdf13466 · e6fda490 · e6fda490 · e6fda490
Commit e6fda490 authored Mar 15, 2023 by Birahavi Kugathasan
7 changed files
--- a/Resume_Analyzer/Candidates_score.csv
+++ b/Resume_Analyzer/Candidates_score.csv
 filename,name,mobile_number,email,company_names,college_name,experience,skills,experience_age,degree,words,primary_score,primary_match,secondary_score,secondary_match,no_of_pages,document_similarity,document_score,Score
-resumes/Dhaval_Thakkar_Resume.pdf,Dhaval Thakkar,9191729595,thakkar.dhaval.haresh@gmail.com,['UNIFYND TECHNOLOGIES PVT. LTD'],None,"['UNIFYND TECHNOLOGIES PVT. LTD. | Data Scientist', 'Mumbai, MH, India | June 2018 – Present', '• Led the development of a Templatized OCR Engine with GUI to onboard 2000+ retailers from different malls. The', 'microservice deployed is currently operating at an accuracy of 81%', '• Built a Customer Segmentation model to target customers with relevant coupons, rewards, and content resulting', 'in a 3x increase in revenue and 2x increase in coupon utilization', '• Built a Dynamic Coupon Pricing Engine for malls that led to a 5x increase in coupon consumption on the coupon', 'marketplace', '• Built a Pricing Engine and Customer Segmentation Model for a logistics company which saw a 32% reduction in', 'Customer Attrition and a 12% increase in Repeat Purchase Rate', '• Developed an Automated End to End Reporting system to track KPIs performance for 10 malls that saves 60', 'hours of manual labour each month', 'UNIFYND TECHNOLOGIES PVT. LTD. | Intern Data Scientist Mumbai, MH, India | Sept 2017 - June 2018', '• Built a Smart Cryptocurrency trading platform which used social data and historical prices to optimize current', 'portfolio. Boosted overall profit from the portfolio by 30%', '• Worked with Product and Marketing teams to identify the power users of an app which resulted in 43% increase in', 'activity and a 65% increase in revenue from these users', 'ZIFF, INC | Deep Learning Intern', 'Provo, UT, USA | May 2017 – Aug 2017', '• Demonstrated competency in Hyperparameter Optimization, Image Augmentation and Learning Rate decay', 'strategies using the Keras Library', '• Deployed a Multi-Class Image classifier microservice written on Flask as a container on AWS EC2 using Docker']","['System', 'Github', 'Numpy', 'Mysql', 'Css', 'Data analytics', 'R', 'Segmentation', 'Logistics', 'Scrapy', 'Content', 'Keras', 'Engineering', 'Security', 'Machine learning', 'Docker', 'Testing', 'Reporting', 'Aws', 'Analytics', 'Anaconda', 'Sql', 'Html', 'Algorithms', 'Operating systems', 'Marketing', 'Flask', 'Kpis', 'Pandas', 'Python', 'Networking']",5.5,['Bachelor of Engineering'],350,44,"['ocr', 'aws', 'python', 'gcp']",42,"['data', 'ocr', 'science']",1,26,50.0,162.0
-resumes/Santhosh_Narayanan.pdf,SANTHOSH NARAYANAN,417-6755,santhosn@usc.edu,None,None,"['on an EC2 server supported by S3 and RDS.', '\uf0a7  Maintained AWS infrastructure for institute’s annual technical festival website, by hosting the website', 'on an EC2 Ubuntu server.', 'K J Somaiya Inst. of Engg. & I.T – Penetration tester', 'December 2016 – January 2016', '\uf0a7  Conducted penetration testing for institute’s online admission and examination portal.', '\uf0a7  Performed authentication checks, access control checks, per screen checks (XSS, SQL injection.).', '\uf0a7  Delivered error free application, incorporating patches for the respective bugs using ASP.NET']","['Java', 'Computer science', 'System', 'Spyder', 'Numpy', 'Scheduling', 'Mysql', 'Css', 'Scrapy', 'Oracle', 'Certification', 'Schedule', 'Keras', 'Ubuntu', 'Engineering', 'Website', 'Php', 'Security', 'Testing', 'Aws', 'Access', 'Sql', 'Html', 'Wordpress', 'Technical', 'Javascript', 'Jupyter', 'Purchasing', 'Flask', 'Programming', 'Pandas', 'Matplotlib', 'Python', 'Lan']",,None,367,22,"['python', 'aws']",14,['science'],1,5,50.0,91.0
+resumes/Dhaval_Thakkar_Resume.pdf,Dhaval Thakkar,9191729595,thakkar.dhaval.haresh@gmail.com,['UNIFYND TECHNOLOGIES PVT. LTD'],None,"['UNIFYND TECHNOLOGIES PVT. LTD. | Data Scientist', 'Mumbai, MH, India | June 2018 – Present', '• Led the development of a Templatized OCR Engine with GUI to onboard 2000+ retailers from different malls. The', 'microservice deployed is currently operating at an accuracy of 81%', '• Built a Customer Segmentation model to target customers with relevant coupons, rewards, and content resulting', 'in a 3x increase in revenue and 2x increase in coupon utilization', '• Built a Dynamic Coupon Pricing Engine for malls that led to a 5x increase in coupon consumption on the coupon', 'marketplace', '• Built a Pricing Engine and Customer Segmentation Model for a logistics company which saw a 32% reduction in', 'Customer Attrition and a 12% increase in Repeat Purchase Rate', '• Developed an Automated End to End Reporting system to track KPIs performance for 10 malls that saves 60', 'hours of manual labour each month', 'UNIFYND TECHNOLOGIES PVT. LTD. | Intern Data Scientist Mumbai, MH, India | Sept 2017 - June 2018', '• Built a Smart Cryptocurrency trading platform which used social data and historical prices to optimize current', 'portfolio. Boosted overall profit from the portfolio by 30%', '• Worked with Product and Marketing teams to identify the power users of an app which resulted in 43% increase in', 'activity and a 65% increase in revenue from these users', 'ZIFF, INC | Deep Learning Intern', 'Provo, UT, USA | May 2017 – Aug 2017', '• Demonstrated competency in Hyperparameter Optimization, Image Augmentation and Learning Rate decay', 'strategies using the Keras Library', '• Deployed a Multi-Class Image classifier microservice written on Flask as a container on AWS EC2 using Docker']","['Pandas', 'Aws', 'Logistics', 'Data analytics', 'Css', 'Machine learning', 'Keras', 'Github', 'Anaconda', 'Python', 'Testing', 'Mysql', 'Security', 'System', 'Kpis', 'Networking', 'Operating systems', 'Scrapy', 'Segmentation', 'Html', 'Algorithms', 'Flask', 'Sql', 'Marketing', 'Content', 'Numpy', 'Reporting', 'Analytics', 'Engineering', 'Docker', 'R']",5.67,['Bachelor of Engineering'],350,44,"['ocr', 'aws', 'python', 'gcp']",42,"['data', 'ocr', 'science']",1,32,50.0,168.0
+resumes/python-developer-resume-2.pdf,Python Developer,456-7890,ggonzalez@email.com,None,None,"['Python Developer Intern', 'Knewton', 'April 2016 - April 2017', '· Worked alongside another developer to implement RESTful APIs', 'Chicago, IL', 'in Django that enabled internal analytics team to increase', 'reporting speed by 24%', '· Using Selenium, built out a unit testing infrastructure for a client', 'web application that reduced the number of bugs reported by', 'the client by 11% month over month']","['Aws', 'Postgresql', 'Requests', 'Css', 'Oracle', 'Github', 'Python', 'Testing', 'Selenium', 'Database', 'System', 'Updates', 'Math', 'Writing', 'Javascript', 'Api', 'Agile', 'Django', 'Rest', 'Apis', 'Sql', 'Process', 'Reporting', 'Analytics', 'Design']",1.0,"['B.S.', 'M.S.']",223,22,"['python', 'aws']",28,"['science', 'data']",1,20,50.0,120.0
+resumes/software-engineer-resume-1.pdf,New York,456-7890,cmcturland@email.com,None,None,"['Software Engineer', 'Embark', 'January 2015 - current / New York, NY', 'Worked with product managers to re-architect a multi-page web', 'app into a single page web-app, boosting yearly revenue by $1.4M', 'Constructed the logic for a streamlined ad-serving platform that', 'scaled to our 35M users, which improved the page speed by 15%', 'after implementation', 'Tested software for bugs and operating speed, ﬁxing bugs and', 'documenting processes to increase efﬁciency by 18%', 'Iterated platform for college admissions, collaborating with a group', 'of 4 engineers to create features across the software', 'Software Engineer', 'MarketSmart', 'April 2012 - January 2015 / Washington, DC', 'Built RESTful APIs that served data to the JavaScript front-end', 'based on dynamically chosen user inputs that handled over 500,000', 'concurrent users', 'Built internal tool using NodeJS and Pupeteer.js to automate QA and', 'monitoring of donor-facing web app, which improved CTR by 3%', 'Reviewed code and conducted testing for 3 additional features on', 'donor-facing web app that increased contributions by 12%', 'Software Engineer Intern', 'Marketing Science Company', 'April 2011 - March 2012 / Pittsburgh, PA', 'Partnered with a developer to implement RESTful APIs in Django,', 'enabling analytics team to increase reporting speed by 24%', 'Using Selenium I built out a unit testing infrastructure for a client', 'application that reduced the number of bugs reported by the client', 'by 11% month over month']","['Aws', 'Postgresql', 'Admissions', 'Css', 'Unix', 'Python', 'Testing', 'Selenium', 'Mysql', 'Nosql', 'Html5', 'Javascript', 'Django', 'Sci', 'Apis', 'Sql', 'C', 'Marketing', 'Reporting', 'Analytics', 'R']",3.67,['B.S.'],233,22,"['python', 'aws']",28,"['science', 'data']",1,10,50.0,110.0
+resumes/Santhosh_Narayanan.pdf,SANTHOSH NARAYANAN,417-6755,santhosn@usc.edu,None,None,"['on an EC2 server supported by S3 and RDS.', '\uf0a7  Maintained AWS infrastructure for institute’s annual technical festival website, by hosting the website', 'on an EC2 Ubuntu server.', 'K J Somaiya Inst. of Engg. & I.T – Penetration tester', 'December 2016 – January 2016', '\uf0a7  Conducted penetration testing for institute’s online admission and examination portal.', '\uf0a7  Performed authentication checks, access control checks, per screen checks (XSS, SQL injection.).', '\uf0a7  Delivered error free application, incorporating patches for the respective bugs using ASP.NET']","['Pandas', 'Aws', 'Matplotlib', 'Technical', 'Php', 'Wordpress', 'Css', 'Oracle', 'Certification', 'Jupyter', 'Keras', 'Python', 'Testing', 'Mysql', 'Security', 'System', 'Javascript', 'Website', 'Java', 'Computer science', 'Scrapy', 'Html', 'Lan', 'Purchasing', 'Flask', 'Scheduling', 'Sql', 'Programming', 'Schedule', 'Ubuntu', 'Access', 'Numpy', 'Engineering', 'Spyder']",,None,367,22,"['python', 'aws']",14,['science'],1,7,50.0,93.0
--- a/Resume_Analyzer/main.py
+++ b/Resume_Analyzer/main.py
@@ -40,7 +40,7 @@ if __name__=='__main__':

    resume_dir = 'resumes/'
    jd_file = 'Job_description.txt'
-    list_of_resumes = os.listdir(resume_dir)
+    list_of_resumes = os.listdir(resume_dir)  # list_of_resumes = ['Dhaval_Thakkar_Resume.pdf', 'asdasdasd.pdf']
    
    df = pd.DataFrame()
    for file in tqdm(list_of_resumes):

--- a/Resume_Analyzer/Dhaval_Thakkar_Resume.pdf
+++ b/Resume_Analyzer/Dhaval_Thakkar_Resume.pdf
--- a/Resume_Analyzer/Santhosh_Narayanan.pdf
+++ b/Resume_Analyzer/Santhosh_Narayanan.pdf
--- a/Resume_Analyzer/resumes/python-developer-resume-2.pdf
+++ b/Resume_Analyzer/resumes/python-developer-resume-2.pdf
--- a/Resume_Analyzer/resumes/software-engineer-resume-1.pdf
+++ b/Resume_Analyzer/resumes/software-engineer-resume-1.pdf
--- a/Resume_Analyzer/scripts/processing.py
+++ b/Resume_Analyzer/scripts/processing.py
@@ -14,9 +14,15 @@ from pdf2image import convert_from_path
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.metrics.pairwise import cosine_similarity

+# skills = {
+#  "primary" : ['Python', 'Machine Learning', 'node.js', 'AWS', 'Kubernetese', 'NLP', 'GCP', 'predective', 'OCR'],
+#  "secondary" : ['data', 'science', 'modeling', 'anomaly', 'privacy', 'visualization', 'OCR'],
+# }
+
+
 class document_processing:
    
-    def __init__(self, resume, skills, job_desc):
+    def __init__(self, resume, skills, job_desc): #(resumes/Dhaval_Thakkar_Resume.pdf, skills, Job_description.txt)
        
        with open('Job_description.txt', 'rb') as file:
            job_desc = file.read()
@@ -27,10 +33,10 @@ class document_processing:
        
    def extract_resume(self):
        
-        filepath = self.resume
+        filepath = self.resume #resumes/Dhaval_Thakkar_Resume.pdf
        
-        extension = filepath.split('.')[-1]
-        extension = '.'+extension
+        extension = filepath.split('.')[-1] #pdf
+        extension = '.'+extension #.pdf
        
        resume_ner = ResumeParser(filepath).get_extracted_data()
        resume_txt = extract_text(filepath, extension=extension)
@@ -110,15 +116,18 @@ class document_processing:
        match['text'] = hero.clean(match['text'])
        
        # Find the max val
-        max_val = len(match)
-        
+        max_val = len(match)  #9
+
+
+        #1. source (main_df) Python, Python, Python, Python 
+        #2. match (primary skill)       
        # Find the skills that match with the resume
-        df = pd.merge(source, match, on = 'text')
-        df.drop_duplicates(inplace=True)
+        df = pd.merge(source, match, on = 'text') 
+        df.drop_duplicates(inplace=True) # Python
        df.reset_index(drop=True)
        
        # Skills matching
-        match_skills = len(df)
+        match_skills = len(df) #5
        
        if match_skills == 0:
            lst_skills = []
@@ -130,7 +139,7 @@ class document_processing:
        return score, lst_skills
    
    def fill_data(self, source, target, column):
-        
+   
        source.loc[0, column] = str(target[column])
        
        return source   
@@ -141,11 +150,13 @@ class document_processing:
        jd_txt = pd.Series(jd_txt)
        jd_txt = hero.clean(jd_txt)
        jd_txt = jd_txt[0]
-        
+
+        [[1,24],[24,1]]
+
        text_list = [text, jd_txt]
        cv = CountVectorizer()
        count_matrix = cv.fit_transform(text_list)
-        match_percentage = cosine_similarity(count_matrix)[0][1] * 80
+        match_percentage = cosine_similarity(count_matrix)[0][1] * 100
        match_percentage = round(match_percentage, 2)

        return match_percentage
@@ -161,14 +172,15 @@ class document_processing:
        self.text = pyres_text
        
        ocr_ser = pd.Series(pyres_text)
-        cleaned_words = hero.clean(ocr_ser)
+        cleaned_words = hero.clean(ocr_ser) #[ [clean words set], [@,#,$%,_,1234567890,], [] ]
        
        # Main dataframe for manipulation
        main_df = pd.DataFrame(cleaned_words[0].split(), columns = ['text'])
        self.clean_data = main_df
        
-        words = len(main_df)
-        
+        # Add the primary match and score
+        pri_score, pri_match = self.find_match(main_df, pd.DataFrame(skills['primary']))
+        sec_score, sec_match = self.find_match(main_df, pd.DataFrame(skills['secondary']))

        columns = ['filename', 'name', 'mobile_number', 'email', 'company_names',
                   'college_name', 'experience', 'skills', 'experience_age',
@@ -176,18 +188,12 @@ class document_processing:
                   'primary_score', 'primary_match',
                   'secondary_score', 'secondary_match',
                   'no_of_pages', 'document_similarity']
-        details = pd.DataFrame(columns = columns)

-        # print(skills[['Primary']])
+        details = pd.DataFrame(columns = columns)

-        primary = ['Python', 'Machine Learning', 'node.js', 'AWS', 'Kubernetese', 'NLP', 'GCP', 'predective', 'OCR']
-        
-        # Add the primary match and score
-        pri_score, pri_match = self.find_match(main_df, pd.DataFrame(skills['primary']))
-        sec_score, sec_match = self.find_match(main_df, pd.DataFrame(skills['secondary']))
-        
        # Add the document similarity score
        doc_sim = self.resume_cosine_score(cleaned_words[0])
+        words = len(main_df)
        
        # Add details in a dataframe
        details.loc[0, 'filename'] = self.resume