Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in / Register
Toggle navigation
2
2020_21 J-25
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
3
Merge Requests
3
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
2020_21 J-25
2020_21 J-25
Commits
c0684ebe
Commit
c0684ebe
authored
Jul 09, 2021
by
Ramachandran Rajeevaletshanth
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Update 1.Resume_Parser.py
parent
f91e96f7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
192 additions
and
21 deletions
+192
-21
IT17163682/1.Resume_Parser.py
IT17163682/1.Resume_Parser.py
+192
-21
No files found.
IT17163682/1.Resume_Parser.py
View file @
c0684ebe
from
resume_parser
import
resumeparse
import
docx2txt
from
pyresparser
import
ResumeParser
import
nltk
import
os
from
sklearn.feature_extraction.text
import
CountVectorizer
import
spacy
from
sklearn.metrics.pairwise
import
cosine_similarity
spacy
.
load
(
'en_core_web_sm'
)
#Load Negomi's model
Skills
=
open
(
"Tweet.txt"
,
"w"
)
#nltk.download('stopwords')
#nltk.download('punkt')
#nltk.download('averaged_perceptron_tagger')
#nltk.download('maxent_ne_chunker')
#nltk.download('words')
Skills
=
open
(
"Skills.txt"
,
"w"
)
Degree
=
open
(
"Degree.txt"
,
"w"
)
Degree
=
open
(
"Degree.txt"
,
"w"
)
F_score
=
open
(
"Final_Score.txt"
,
"w"
)
Experience
=
open
(
"Experience.txt"
,
"w"
)
Experience
=
open
(
"Experience.txt"
,
"w"
)
Skill_point
=
open
(
"skl_point.txt"
,
"w"
)
def
deleteContent
(
pfile
):
def
deleteContent
(
pfile
):
pfile
.
seek
(
0
)
pfile
.
seek
(
0
)
pfile
.
truncate
()
pfile
.
truncate
()
...
@@ -15,24 +23,187 @@ def deleteContent(pfile):
...
@@ -15,24 +23,187 @@ def deleteContent(pfile):
deleteContent
(
Skills
)
deleteContent
(
Skills
)
deleteContent
(
Degree
)
deleteContent
(
Degree
)
deleteContent
(
Experience
)
deleteContent
(
Experience
)
deleteContent
(
Skill_point
)
deleteContent
(
F_score
)
def
extract_text_from_docx
(
docx_path
):
txt
=
docx2txt
.
process
(
docx_path
)
if
txt
:
return
txt
.
replace
(
'
\t
'
,
' '
)
return
None
#Extract name
def
extract_names
(
txt
):
person_names
=
[]
for
sent
in
nltk
.
sent_tokenize
(
txt
):
for
chunk
in
nltk
.
ne_chunk
(
nltk
.
pos_tag
(
nltk
.
word_tokenize
(
sent
))):
if
hasattr
(
chunk
,
'label'
)
and
chunk
.
label
()
==
'PERSON'
:
person_names
.
append
(
' '
.
join
(
chunk_leave
[
0
]
for
chunk_leave
in
chunk
.
leaves
())
)
return
person_names
if
__name__
==
'__main__'
:
text
=
extract_text_from_docx
(
'C:/Users/User/Desktop/New folder (12)/Applicant_Ranker/UploadedCV/Mahela.docx'
)
names
=
extract_names
(
text
)
has_items
=
bool
(
names
)
if
(
has_items
==
True
):
name
=
"
\n
"
.
join
(
names
)
with
open
(
'Final_Score.txt'
,
'a'
)
as
the_file
:
Name
=
names
[
0
]
the_file
.
write
(
'Name '
+
Name
+
'
\n
'
)
print
(
'Name : '
+
Name
)
else
:
print
(
'Degree not found !!!'
)
#SKILLS EXTRACTION
SKILLS_DB
=
[
'machine learning'
,
'angular'
,
'data science'
,
'asp'
,
'python'
,
'ruby'
,
'c'
,
'java'
,
'swift'
,
'mysql'
,
'php'
,
'English'
,
'objective c'
]
def
extract_skills
(
input_text
):
stop_words
=
set
(
nltk
.
corpus
.
stopwords
.
words
(
'english'
))
word_tokens
=
nltk
.
tokenize
.
word_tokenize
(
input_text
)
filtered_tokens
=
[
w
for
w
in
word_tokens
if
w
not
in
stop_words
]
filtered_tokens
=
[
w
for
w
in
word_tokens
if
w
.
isalpha
()]
bigrams_trigrams
=
list
(
map
(
' '
.
join
,
nltk
.
everygrams
(
filtered_tokens
,
2
,
3
)))
found_skills
=
set
()
for
token
in
filtered_tokens
:
if
token
.
lower
()
in
SKILLS_DB
:
found_skills
.
add
(
token
)
for
ngram
in
bigrams_trigrams
:
if
ngram
.
lower
()
in
SKILLS_DB
:
found_skills
.
add
(
ngram
)
return
found_skills
if
__name__
==
'__main__'
:
text
=
extract_text_from_docx
(
'C:/Users/User/Desktop/New folder (12)/Applicant_Ranker/UploadedCV/Mahela.docx'
)
skills
=
extract_skills
(
text
)
has_items
=
bool
(
skills
)
if
(
has_items
==
True
):
Skills
=
"
\n
"
.
join
(
skills
)
with
open
(
'Skills.txt'
,
'w'
)
as
the_file
:
the_file
.
write
(
Skills
+
'
\n
'
)
print
(
'Skills :
\n
'
+
Skills
+
'
\n
'
)
skl
=
open
(
"Programming_Languages.txt"
)
.
read
()
skl_txt
=
[
Skills
,
skl
]
sk
=
CountVectorizer
()
pos_count_matrix
=
sk
.
fit_transform
(
skl_txt
)
Skl_match
=
cosine_similarity
(
pos_count_matrix
)[
0
][
1
]
Skl_match
=
Skl_match
*
100
Skl_match
=
round
(
Skl_match
,
2
)
with
open
(
'Final_Score.txt'
,
'a'
)
as
the_file
:
the_file
.
write
(
'Skills '
+
str
(
Skl_match
)
+
'
\n
'
)
with
open
(
'skl_point.txt'
,
'a'
)
as
the_file
:
the_file
.
write
(
str
(
Skl_match
))
#print(skills)
#EDUCATION / DEGREE EXTRACTION
RESERVED_WORDS
=
[
'school'
,
'college'
,
'univers'
,
'academy'
,
'faculty'
,
'institute'
,
'faculdades'
,
'Schola'
,
'schule'
,
'lise'
,
'lyceum'
,
'lycee'
,
'polytechnic'
,
'kolej'
,
'ünivers'
,
'okul'
,
'BSc'
,
'Bachelor Degree'
,
'Degree'
]
def
extract_education
(
input_text
):
organizations
=
[]
for
sent
in
nltk
.
sent_tokenize
(
input_text
):
for
chunk
in
nltk
.
ne_chunk
(
nltk
.
pos_tag
(
nltk
.
word_tokenize
(
sent
))):
if
hasattr
(
chunk
,
'label'
)
and
chunk
.
label
()
==
'ORGANIZATION'
:
organizations
.
append
(
' '
.
join
(
c
[
0
]
for
c
in
chunk
.
leaves
()))
education
=
set
()
for
org
in
organizations
:
for
word
in
RESERVED_WORDS
:
if
org
.
lower
()
.
find
(
word
)
>=
0
:
education
.
add
(
org
)
return
education
if
__name__
==
'__main__'
:
text
=
extract_text_from_docx
(
'C:/Users/User/Desktop/New folder (12)/Applicant_Ranker/UploadedCV/Mahela.docx'
)
education_information
=
extract_education
(
text
)
has_items
=
bool
(
education_information
)
if
(
has_items
==
True
):
Degree
=
"
\n
"
.
join
(
education_information
)
with
open
(
'Degree.txt'
,
'w'
)
as
the_file
:
the_file
.
write
(
"Degree "
+
Degree
+
'
\n
'
)
print
(
'Degree / Education :
\n
'
+
Degree
+
'
\n
'
)
else
:
print
(
'Degree not found !!!'
)
#print(education_information)
#EXPERIENCE EXTRACTION
"""
EXP_WORDS = [
'experience',
'engineer'
]
def extract_experience(input_text):
experience = []
for sent in nltk.sent_tokenize(input_text):
for chunk in nltk.ne_chunk(nltk.pos_tag(nltk.word_tokenize(sent))):
if hasattr(chunk, 'label') and chunk.label() == 'EXPERIENCE':
experience.append(' '.join(c[0] for c in chunk.leaves()))
education = set()
for org in experience:
for word in EXP_WORDS:
if org.lower().find(word) >= 0:
experience.add(org)
data
=
ResumeParser
(
"Sample_CV.docx"
)
.
get_extracted_data
()
return experience
list
=
[]
list
=
data
[
'skills'
]
if __name__ == '__main__':
print
(
"Skills : "
)
text = extract_text_from_docx('./Sample_CV2.docx')
with
open
(
'Skills.txt'
,
'a'
)
as
the_file
:
experience_information = extract_experience(text)
the_file
.
write
(
str
(
list
)
+
'
\n
'
)
print
(
str
(
list
))
'''
print(experience_information)
list = data['degree']
with open('Degree.txt', 'a') as the_file:
the_file.write(str(list) + '
\n
')
list = data['experience']
with open('Experience.txt', 'a') as the_file:
the_file.write(str(list) + '
\n
')
'''
"""
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment