Commit 276f79d3 authored by M.S.M.Perera's avatar M.S.M.Perera

Implemented read hadoop exam data function

parent eea740ad
...@@ -384,7 +384,7 @@ def PdfBreaker(gz_url): ...@@ -384,7 +384,7 @@ def PdfBreaker(gz_url):
return js return js
def hadoopUpload(jsObj, gazetteno, exType, pubDate): def hadoopUpload(jsObj, gazetteno, exType, pubDate):
client = InsecureClient('http://localhost:9870', client = InsecureClient('http://localhost:9870',
user='Shalini') user='Shalini')
...@@ -428,59 +428,51 @@ def readHadoopDataAll(): ...@@ -428,59 +428,51 @@ def readHadoopDataAll():
print("There are no examinations") print("There are no examinations")
return data_ return data_
def readHadoopDataVacancy():
# data_ = readHadoopDataVacancy()
# data_ = TopicCluster.clustervac(data_)
# for item in data_:
# if item["category"] == id:
# data__.append(item)
# new = Vac(department=dep, title=ct, description=completeDescription, summary=summarizedDescription, date=pubDate, url=gz_url, cat=)
# new.save()
def readHadoopDataExam():
data_ = [] data_ = []
client = InsecureClient('http://localhost:9870', user='') client = InsecureClient('http://localhost:9870', user='')
gzList = Gazette.objects.values_list('GazetteNo', flat=True) gzList = Gazette.objects.values_list('GazetteNo', flat=True)
for item in gzList: for item in gzList:
outputFileName = '/home/user_e/egaz/gz-e-' + str(item) + '.json' outputFileName = '/home/user_e/egaz/gz-p-' + str(item) + '.json'
try: try:
with client.read(outputFileName) as reader: with client.read(outputFileName) as reader:
content = reader.read() content = reader.read()
js = json.loads(content) js = json.loads(content)
data_.extend(js) data_.extend(js)
except: except:
print("There are no examinations") print("There are no vacancies")
return data_ return data_
def readHadoopDataVacancy(): # data_ = readHadoopDataVacancy()
# data_ = TopicCluster.clustervac(data_)
# for item in data_:
# if item["category"] == id:
# data__.append(item)
# new = Vac(department=dep, title=ct, description=completeDescription, summary=summarizedDescription, date=pubDate, url=gz_url, cat=)
# new.save()
def readHadoopDataExam():
data_ = [] data_ = []
client = InsecureClient('http://localhost:9870', user='') client = InsecureClient('http://localhost:9870', user='')
gzList = Gazette.objects.values_list('GazetteNo', flat=True) gzList = Gazette.objects.values_list('GazetteNo', flat=True)
for item in gzList: for item in gzList:
outputFileName = '/home/user_e/egaz/gz-p-' + str(item) + '.json' outputFileName = '/home/user_e/egaz/gz-e-' + str(item) + '.json'
try: try:
with client.read(outputFileName) as reader: with client.read(outputFileName) as reader:
content = reader.read() content = reader.read()
js = json.loads(content) js = json.loads(content)
data_.extend(js) data_.extend(js)
except: except:
print("There are no vacancies") print("There are no examinations")
return data_ return data_
def readSingleHadoopData(gazetteno): def readSingleHadoopData(gazetteno):
print() print()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment