Copy inspection data between Swift buckets

This script assumes that you have S3 credentials for the buckets. You only have to configure the 5 configuration parameters.

import boto3
import json
from botocore.exceptions import ClientError

# Configure your S3 buckets and Ceph endpoint
SOURCE_BUCKET = ''
DEST_BUCKET = ''

# Ceph S3 configuration
CEPH_ENDPOINT = ''
CEPH_ACCESS_KEY = ''
CEPH_SECRET_KEY = ''

def get_s3_client():
    """Initialize and return S3 client for Ceph"""
    session = boto3.Session(
        aws_secret_access_key=CEPH_SECRET_KEY,
        aws_access_key_id=CEPH_ACCESS_KEY)
    return session.client(
        's3',
        endpoint_url=CEPH_ENDPOINT)

def list_files_to_process(s3_client, bucket):
    """List all files in bucket that don't end with '-UNPROCESSED'"""
    files = []
    try:
        paginator = s3_client.get_paginator('list_objects_v2')
        for page in paginator.paginate(Bucket=bucket):
            if 'Contents' in page:
                for obj in page['Contents']:
                    key = obj['Key']
                    if not key.endswith('-UNPROCESSED'):
                        files.append(key)
    except ClientError as e:
        print(f"Error listing files: {e}")
        raise
    return files

def load_json_from_s3(s3_client, bucket, key):
    """Load and parse JSON file from S3"""
    try:
        response = s3_client.get_object(Bucket=bucket, Key=key)
        content = response['Body'].read().decode('utf-8')
        return json.loads(content)
    except ClientError as e:
        print(f"Error reading {key}: {e}")
        raise
    except json.JSONDecodeError as e:
        print(f"Error parsing JSON from {key}: {e}")
        raise

def save_json_to_s3(s3_client, bucket, key, data):
    """Save JSON data to S3"""
    try:
        s3_client.put_object(
            Bucket=bucket,
            Key=key,
            Body=json.dumps(data, indent=2),
            ContentType='application/json'
        )
        print(f"Saved: {key}")
    except ClientError as e:
        print(f"Error saving {key}: {e}")
        raise

def process_files():
    """Main processing function"""
    s3_client = get_s3_client()
    print(f"Fetching files from {SOURCE_BUCKET}...")
    files = list_files_to_process(s3_client, SOURCE_BUCKET)
    print(f"Found {len(files)} files to process")

    # Process each file
    for file_key in files:
        print(f"\nProcessing: {file_key}")

        try:
            # Load JSON data
            data = load_json_from_s3(s3_client, SOURCE_BUCKET, file_key)

            # Split data
            inventory = data.pop('inventory', None)
            plugin = data

            # Check if inventory key existed
            if inventory is None:
                print(f"Warning: 'inventory' key not found in {file_key}")

            # Generate output filenames
            inventory_key = f"{file_key}-inventory"
            plugin_key = f"{file_key}-plugin"

            # Save split files
            if inventory is not None:
                save_json_to_s3(s3_client, DEST_BUCKET, inventory_key, inventory)
            if plugin is not None:
                save_json_to_s3(s3_client, DEST_BUCKET, plugin_key, plugin)

        except Exception as e:
            print(f"Failed to process {file_key}: {e}")
            continue

    print("\nProcessing complete!")

if __name__ == "__main__":
    process_files()