import csv import psycopg2 import boto3 import os from tqdm import tqdm OUTPUT_NAME = "issue_7" # Database connection details db_host = os.environ.get('DB_HOST', "") db_name = os.environ.get('DB_SCHEMA', "") db_user = os.environ.get('DB_USER', "") db_password = os.environ.get('DB_PASSWORD', "") # S3 bucket details s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "") s3_folder_prefix = 'sbt_invoice' # S3 access credentials access_key = os.environ.get('S3_ACCESS_KEY', "") secret_key = os.environ.get('S3_SECRET_KEY', "") # Request IDs for filtering request_ids = [ 'SAPe39e970592394b27a17d4a64c39f7ed0', 'SAP477a02a21faf41ecbd1a0bb21636e644', 'SAP459d58a7dba84e7195f5ad8f46fc1530', 'SAPa5aaa0e1ce8c4824a7b0ded2e550caec', 'SAP492c063db44049c6b1e44f59c531f8d8', 'SAP3d0bdd5cb4ce4291b0cb77d7de0a48e9', 'SAP7e2c673e49c441a991661d1227342131', 'SAPc26974bcac2649b28227981459a427aa', 'SAP25b12dde6b854c70b512ac79059ac1d4', 'SAP_20240102194138_bf4a3cc4e0304d0385126b6592c2632d', 'SAP_20240102214550_8389ec1b84a249738eed9d2152bf0922', ] # Connect to the PostgreSQL database conn = psycopg2.connect( host=db_host, database=db_name, user=db_user, password=db_password ) # Create a cursor cursor = conn.cursor() # Generate the placeholder string for the IN statement placeholders = ','.join(['%s'] * len(request_ids)) # Execute the SELECT query with the filter query = f"SELECT * FROM fwd_api_subscriptionrequest WHERE request_id IN ({placeholders})" cursor.execute(query, request_ids) # Fetch the filtered data data = cursor.fetchall() # Define the CSV file path csv_file_path = f'{OUTPUT_NAME}.csv' # Write the data to the CSV file with open(csv_file_path, 'w', newline='') as csv_file: writer = csv.writer(csv_file) writer.writerow([desc[0] for desc in cursor.description]) # Write column headers writer.writerows(data) # Write the filtered data rows # Close the cursor and database connection cursor.close() conn.close() # Download folders from S3 s3_client = boto3.client( 's3', aws_access_key_id=access_key, aws_secret_access_key=secret_key ) for request_id in tqdm(request_ids): folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files os.makedirs(OUTPUT_NAME, exist_ok=True) os.makedirs(local_folder_path, exist_ok=True) # List objects in the S3 folder response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) objects = response.get('Contents', []) for s3_object in objects: object_key = s3_object['Key'] local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key # Download the S3 object to the local file s3_client.download_file(s3_bucket_name, object_key, local_file_path)