I’m doing simple operation to of downloading the gzip files from S3 bucket to the local directory. I’m extracting those into another local directory and then uploading them back to S3 bucket again into archive folder path. While doing this operation I want to make sure I am processing same set of files that I initially download from S3 bucket which is (f_name) in below code. Now, below code is not uploading those back to S3 , that’s where I’m stuck. But able to download from S3 and extract it into local directory. Can you please help me understand what is wrong with the _uploadFile function?
from boto.s3.connection import S3Connection
from boto.s3.key import *
import os
import os.path
aws_bucket= "event-logs-dev” ## S3 Bucket name
local_download_directory= "/Users/TargetData/Download/test_queue1/“ ## local directory to download the gzip files from S3.
Target_directory_to_extract = "/Users/TargetData/unzip” ##local directory to gunzip the downloaded files.
Target_s3_path_to_upload= "event-logs-dev/data/clean/xact/logs/archive/“ ## S3 bucket path to upload the files.
def decompressAllFilesFromNetfiler(self,aws_bucket,local_download_directory,Target_d irectory_to_extract,Target_s3_path_to_upload):
zipFiles = [f for f in os.listdir(local_download_directory) if re.match(r'.*\.tar\.gz', f)]
for f_name in zipFiles:
if os.path.exists(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')]) and os.access(Target_directory_to_extract+"/"+f_name[:-len('.tar.gz')], os.R_OK):
print ('File {} already exists!'.format(f_name))
else:
f_name_with_path = os.path.join(local_download_directory, f_name)
os.system('mkdir -p {} && tar vxzf {} -C {}'.format(Target_directory_to_extract, f_name_with_path, Target_directory_to_extract))
print ('Extracted file {}'.format(f_name))
self._uploadFile(aws_bucket,f_name,Target_s3_path_to_upload,Target_directory_to_extract)
def _uploadFile(self, aws_bucket, f_name,Target_s3_path_to_upload,Target_directory_to_extract):
full_key_name = os.path.expanduser(os.path.join(Target_s3_path_to_upload, f_name))
path = os.path.expanduser(os.path.join(Target_directory_to_extract, f_name))
try:
print "Uploaded extracted file to: %s" % (full_key_name)
key = aws_bucket.new_key(full_key_name)
key.set_contents_from_filename(path)
except:
if full_key_name is None:
print "Error uploading”
Currently, the output prints that "Uploaded extracted file to: move-bi-event-logs-dev/data/clean/xact/logs/archive/1442235602129200000.tar.gz”, but nothing is uploaded to S3 bucket. Your help is greatly appreciated!! Thank you in advance!
Aucun commentaire:
Enregistrer un commentaire