Delete questionExxtraction.py

5d771fea · Maiushana Sutheshan · 4904bf9b · 4904bf9b
Commit 5d771fea authored Apr 28, 2022 by Maiushana Sutheshan
Hide whitespace changes
Inline Side-by-side

Showing with 0 additions and 106 deletions

questionExxtraction.py questionExxtraction.py +0 -106

No files found.
--- a/questionExxtraction.py
+++ b/questionExxtraction.py
-import json
-import os, io
-import re
-from google.cloud import vision
-from google.cloud import storage
-from google.protobuf import json_format
-
-
-os.environ['GOOGLE_APPLICATION_CREDENTIALS'] = r'soloexamimages-973d884bd8dd.json'
-client = vision.ImageAnnotatorClient()
-storage_client = storage.Client()
-
-def upload_blob(bucket_name, source_file_name, destination_blob_name):
-    """Uploads a file to the bucket."""
-    # The ID of your GCS bucket
-    # bucket_name = "your-bucket-name"
-    # The path to your file to upload
-    # source_file_name = "local/path/to/file"
-    # The ID of your GCS object
-    # destination_blob_name = "storage-object-name"
-
-
-    bucket = storage_client.bucket(bucket_name)
-    blob = bucket.blob(destination_blob_name)
-
-    blob.upload_from_filename(source_file_name)
-
-    print(
-        "File {} uploaded to {}.".format(
-            source_file_name, destination_blob_name
-        )
-    )
-
-def delete_blob(bucket_name, blob_name):
-    """Deletes a blob from the bucket."""
-    # bucket_name = "your-bucket-name"
-    # blob_name = "your-object-name"
-    bucket = storage_client.bucket(bucket_name)
-    blob = bucket.blob(blob_name)
-    blob.delete()
-
-    print("Blob {} deleted.".format(blob_name))
-
-
-
-batch_size = 1
-mime_type = 'application/pdf'
-feature = vision.Feature(
-    type_=vision.Feature.Type.DOCUMENT_TEXT_DETECTION)
-fileName="input.pdf"
-inputbucket="soloexamocrinputbucket"
-outputbucket="soloexamocroutputbucket"
-outputprefix = ''
-outputfile='out.txt'
-
-upload_blob(inputbucket,fileName,fileName)
-
-gcs_source_uri = 'gs://'+inputbucket+'/'+fileName
-gcs_source = vision.GcsSource(uri=gcs_source_uri)
-input_config = vision.InputConfig(
-    gcs_source=gcs_source, mime_type=mime_type)
-gcs_destination_uri = 'gs://'+outputbucket+'/'+outputprefix
-gcs_destination = vision.GcsDestination(uri=gcs_destination_uri)
-output_config = vision.OutputConfig(
-    gcs_destination=gcs_destination, batch_size=batch_size)
-
-
-async_request = vision.AsyncAnnotateFileRequest(
-    features=[feature], input_config=input_config,
-    output_config=output_config)
-
-operation = client.async_batch_annotate_files(
-    requests=[async_request])
-operation.result(timeout=18000)
-
-bucket = storage_client.get_bucket(outputbucket)
-
-blob_list = list(bucket.list_blobs(prefix=outputprefix))
-
-namelist=list(map(lambda x:x.name,blob_list))
-
-blob_list.sort(key=lambda x: int(re.match(r'.+-(\d+)-.+',x.name).group(1)))
-
-print('Output files:')
-delete_blob(inputbucket,fileName)
-with open(outputfile, 'w',encoding='utf8') as f:
-    for item in blob_list:
-        json_string = item.download_as_string()
-        # response = json_format.Parse(json_string)
-        response = json.loads(json_string)
-        print(type(response))
-        for singlePageResponse in response["responses"]:
-            print(type(singlePageResponse))
-            print(type(singlePageResponse))
-            full_text_annotation=singlePageResponse["fullTextAnnotation"]
-            text=full_text_annotation["text"]
-            f.write(text)
-blob_list = list(bucket.list_blobs(prefix=outputprefix))
-for bl in blob_list:
-    delete_blob(outputbucket,bl.name)
-
-
-        
-
-
-