import csv import psycopg2 import boto3 import os from tqdm import tqdm from dotenv import load_dotenv load_dotenv("../.env_prod") OUTPUT_NAME = "5Jan" # Database connection details db_host = os.environ.get('DB_HOST', "") db_name = os.environ.get('DB_SCHEMA', "") db_user = os.environ.get('DB_USER', "") db_password = os.environ.get('DB_PASSWORD', "") # S3 bucket details s3_bucket_name = os.environ.get('S3_BUCKET_NAME', "") s3_folder_prefix = 'sbt_invoice' # S3 access credentials access_key = os.environ.get('S3_ACCESS_KEY', "") secret_key = os.environ.get('S3_SECRET_KEY', "") # Request IDs for filtering request_ids = [ 'SAP_20240104082259_85c7f4dd262946d183dbec826fc6709e', 'SAP_20240104082709_c05319c56fd3422dbf133aee33fc3e10', 'SAP_20240104091512_23ae1a81f1314be0a27ebeae0e8fa0d7', 'SAP_20240104091512_23ae1a81f1314be0a27ebeae0e8fa0d7', 'SAP_20240104091816_025c90b9789246ed811772003622fa0d', 'SAP_20240104092541_5c71e535f07c4cc8803b45336ec70f77', 'SAP_20240104100259_5a667d33cb914e7ba5a4447b9e17d649', 'SAP_20240104101145_a7010bac159f47bc95d5866e6c5f5bdf', 'SAP_20240104105702_95252229252b4e238add117919ce882a', 'SAP_20240104112108_34b2cca84a42473ca77bc316e787fe2e', 'SAP_20240104114038_dd57ecf7982c4a5eaf1409f5ef050fab', 'SAP_20240104115942_1b77f411791940a4a85c838c2e9931ad', 'SAP_20240104120746_d63319f4cde343d894f9b89706756a9d', 'SAP_20240104123607_48d25c04fec6411dbf013c6a19054e77', 'SAP_20240104130957_ece21bad331b4f2cad0887693331aa3a', 'SAP_20240104131228_edebee4000ae4bd382feaea5d6c82031', 'SAP_20240104132641_97909efd013f45e89d83d36a5ea35c52', 'SAP_20240104133527_ad55f6ee667643ba8ae65e9ef1c32418', 'SAP_20240104134014_2d2cdbc1b06a44868ce1b32cdb53864f', 'SAP_20240104134425_9b37555ef8094153838e6048f7c63c9b', 'SAP_20240104134457_55a1cf1e371146d995c8849cc0ba7c7b', 'SAP_20240104134609_3f7d308e467d43dbb59a7bcc02e3a7d2', 'SAP_20240104134709_c708daf83f7e4aa69ab9696afe1a9081', 'SAP_20240104135007_44b7a30c5e9c41a0b8065ac4e7000223', 'SAP_20240104141547_7203ddb915274e99a08ae6e54ec49cbd', 'SAP_20240104141559_62fd19a6179248ecb4ff15b33338b294', 'SAP_20240104142352_68699cbe140f4264b858981a3ac67e40', 'SAP_20240104143937_801931cc1f344a4ca8384dfe13d1accc', 'SAP_20240104144730_3180a8919e604e26a188ce051465c392', 'SAP_20240104144933_3380f64019634769befed49e9a671bc6', 'SAP_20240104151239_76ae2f1d02444f7fabbc104eb77fe45f', 'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', 'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', 'SAP_20240104151243_61775c88685d434d98bb9fc7a9889b8e', 'SAP_20240104151638_a08a61448a58459a8f2209f64e54c213', 'SAP_20240104152030_479259e84c5b449499df2cb1023e91ac', 'SAP_20240104160108_a03634c80583454494b77efcdecbcc71', 'SAP_20240104160108_a03634c80583454494b77efcdecbcc71', 'SAP_20240104160311_e7cb02a11bbd4ea1906b3758e97f33ab', 'SAP_20240104161305_89c5518563224ab89345439dffd504a5', 'SAP_20240104161305_89c5518563224ab89345439dffd504a5', 'SAP_20240104164022_0b94af24db9d4ebe9af2086a4bd3cd7e', 'SAP_20240104170837_58165ec9f88d4e4aa3095ba3dda201d7', 'SAP_20240104171740_10279cfebbf344f184bbb429cb9a15ad', 'SAP_20240104175202_247892a4dc7f40f28eafac9c2ad85971', 'SAP_20240104180517_8ce7a1981dc743e08e09284fd904d536', 'SAP_20240104182034_406bac0ab0684727b9efb1bb9b422026', 'SAP_20240104182426_92a48bb4b85a4c3abb48e0d7cf727777', 'SAP_20240104183506_aa1fa7d6774a4509a142a6f4a7b5af29', 'SAP_20240104185716_f9d464e42c314370910913b37133e6c3', 'SAP_20240104190220_573244d03bb8408dbca422ff60eb527a', 'SAP_20240104191236_deedcc588b7b4928a950f7dc2ce4230c', 'SAP_20240104191236_deedcc588b7b4928a950f7dc2ce4230c', 'SAP_20240104192614_990bf10c38e144a7bf489548d356720e', 'SAP_20240104192614_990bf10c38e144a7bf489548d356720e', 'SAP_20240104212143_f8c1b4a6e6e443fcb5e882c7a5b917f3', 'SAP_20240104212924_ee1998a60d6848af9576292ac383037f', 'SAP_20240104214418_f8e1abf808c8499097ecddf014d401c7', 'SAP_20240104214619_8d27c05a9ce74b738b20195cb816bfbf', 'SAP_20240104215037_477863cdc0aa4d5fa1f05bbb0ae673ed', 'SAP_20240104221543_37605982df624324ad2594e268054361', 'SAP_20240104225026_acacd06ea6de4a738bc47683dc53f378', 'SAP_20240104235743_b48aa3e744ed428795171d84066adefe', ] # Connect to the PostgreSQL database conn = psycopg2.connect( host=db_host, database=db_name, user=db_user, password=db_password ) # Create a cursor cursor = conn.cursor() # Generate the placeholder string for the IN statement placeholders = ','.join(['%s'] * len(request_ids)) # Execute the SELECT query with the filter query = f"SELECT * FROM fwd_api_subscriptionrequest WHERE request_id IN ({placeholders})" cursor.execute(query, request_ids) # Fetch the filtered data data = cursor.fetchall() # Define the CSV file path csv_file_path = f'{OUTPUT_NAME}.csv' # Write the data to the CSV file with open(csv_file_path, 'w', newline='') as csv_file: writer = csv.writer(csv_file) writer.writerow([desc[0] for desc in cursor.description]) # Write column headers writer.writerows(data) # Write the filtered data rows # Close the cursor and database connection cursor.close() conn.close() # Download folders from S3 s3_client = boto3.client( 's3', aws_access_key_id=access_key, aws_secret_access_key=secret_key ) for request_id in tqdm(request_ids): folder_key = f"{s3_folder_prefix}/{request_id}/" # Assuming folder structure like: s3_bucket_name/s3_folder_prefix/request_id/ local_folder_path = f"{OUTPUT_NAME}/{request_id}/" # Path to the local folder to save the downloaded files os.makedirs(OUTPUT_NAME, exist_ok=True) os.makedirs(local_folder_path, exist_ok=True) # List objects in the S3 folder response = s3_client.list_objects_v2(Bucket=s3_bucket_name, Prefix=folder_key) objects = response.get('Contents', []) for s3_object in objects: object_key = s3_object['Key'] local_file_path = local_folder_path + object_key.split('/')[-1] # Extracting the file name from the object key # Download the S3 object to the local file s3_client.download_file(s3_bucket_name, object_key, local_file_path)