AI-driven Secure Data Encryption Python, AI, Cryptography

👤 Sharing: AI
```python
import os
import cryptography
from cryptography.fernet import Fernet
from sklearn.cluster import KMeans
import numpy as np
import pandas as pd

# 1. Key Generation and Management (Cryptography)
def generate_key():
    """Generates a new Fernet key for symmetric encryption."""
    key = Fernet.generate_key()
    return key

def save_key(key, filename="secret.key"):
    """Saves the generated key to a file.  Important: Protect this file!"""
    try:
        with open(filename, "wb") as key_file:
            key_file.write(key)
        print(f"Key saved to {filename}")
    except Exception as e:
        print(f"Error saving key: {e}")
        return False
    return True


def load_key(filename="secret.key"):
    """Loads the key from the specified file."""
    try:
        with open(filename, "rb") as key_file:
            key = key_file.read()
        return key
    except FileNotFoundError:
        print(f"Error: Key file '{filename}' not found.")
        return None
    except Exception as e:
        print(f"Error loading key: {e}")
        return None


# 2. Data Encryption and Decryption (Cryptography)
def encrypt_data(data, key):
    """Encrypts the given data using the provided key."""
    f = Fernet(key)
    encrypted_data = f.encrypt(data.encode())  # Encode to bytes before encryption
    return encrypted_data

def decrypt_data(encrypted_data, key):
    """Decrypts the given encrypted data using the provided key."""
    f = Fernet(key)
    try:
        decrypted_data = f.decrypt(encrypted_data).decode() # Decode to string after decryption
        return decrypted_data
    except cryptography.fernet.InvalidToken:
        print("Error: Invalid key or corrupted data.")
        return None
    except Exception as e:
        print(f"Error during decryption: {e}")
        return None


# 3. AI-Driven Data Segmentation (AI - Clustering with KMeans)
def segment_data(data, n_clusters=3):
    """Segments the data using KMeans clustering.

    Args:
        data: A string representing the data to segment.
        n_clusters: The number of clusters to create.

    Returns:
        A list of data segments, or None if an error occurred.
    """
    try:
        # Convert the data into numerical features.  This is a VERY basic example.
        # More sophisticated feature engineering would be necessary for real-world data.
        data_points = [[ord(char)] for char in data]  # Each character becomes a data point

        # KMeans requires at least n_samples=n_clusters when training.
        if len(data_points) < n_clusters:
            print(f"Warning: Data length ({len(data_points)}) is less than the number of clusters ({n_clusters}). Reducing n_clusters to data length.")
            n_clusters = len(data_points)
            if n_clusters == 0:
                print("Error: No data to cluster.")
                return None

        kmeans = KMeans(n_clusters=n_clusters, n_init='auto')  # Explicitly set n_init
        kmeans.fit(data_points)

        # Assign each character to a cluster
        cluster_assignments = kmeans.labels_

        # Create segments based on cluster assignments
        segments = {}
        for i, cluster_id in enumerate(cluster_assignments):
            if cluster_id not in segments:
                segments[cluster_id] = ""
            segments[cluster_id] += data[i]

        # Return the segments as a list
        return list(segments.values())

    except ValueError as e:
        print(f"ValueError during clustering: {e}")
        print("This often happens when the data is not suitable for KMeans.  Consider using different clustering algorithms or feature engineering.")
        return None
    except Exception as e:
        print(f"An unexpected error occurred during segmentation: {e}")
        return None



# 4. Applying Encryption to Segments
def encrypt_segments(segments, key):
    """Encrypts a list of data segments."""
    encrypted_segments = []
    for segment in segments:
        encrypted_segment = encrypt_data(segment, key)
        encrypted_segments.append(encrypted_segment)
    return encrypted_segments


# 5.  Decrypting Segments
def decrypt_segments(encrypted_segments, key):
    """Decrypts a list of encrypted data segments."""
    decrypted_segments = []
    for encrypted_segment in encrypted_segments:
        decrypted_segment = decrypt_data(encrypted_segment, key)
        if decrypted_segment is None:  # Handle decryption failures
            return None # or raise an exception, depending on the desired behavior
        decrypted_segments.append(decrypted_segment)
    return decrypted_segments


# 6. Reassembling the Data
def reassemble_data(segments):
    """Reassembles the data segments into a single string.  Assumes the segments
       are in the original order. In a real application, you'd need to store the
       segment order and reassemble accordingly."""
    return "".join(segments)



# Main function to orchestrate the process
def main():
    """Main function to demonstrate AI-driven secure data encryption."""

    data = "This is a sensitive document that needs to be encrypted securely.  We will use AI to segment the data before encryption."

    # 1. Key Generation
    key = generate_key()
    if not save_key(key):
        print("Key generation and saving failed. Exiting.")
        return
    loaded_key = load_key()
    if loaded_key is None:
        print("Failed to load the key. Exiting.")
        return

    # 2. Data Segmentation (AI - KMeans Clustering)
    segments = segment_data(data, n_clusters=4) # Adjust n_clusters as needed
    if segments is None:
        print("Data segmentation failed. Exiting.")
        return

    print("Original Data:", data)
    print("Data Segments:", segments)

    # 3. Encryption
    encrypted_segments = encrypt_segments(segments, loaded_key)
    print("Encrypted Segments:", encrypted_segments)

    # 4. Decryption
    decrypted_segments = decrypt_segments(encrypted_segments, loaded_key)
    if decrypted_segments is None:
        print("Decryption failed.")
        return

    print("Decrypted Segments:", decrypted_segments)

    # 5. Reassembly
    reassembled_data = reassemble_data(decrypted_segments)
    print("Reassembled Data:", reassembled_data)

    # Verification
    if data == reassembled_data:
        print("Encryption and decryption successful!")
    else:
        print("Error: Data integrity check failed!")


if __name__ == "__main__":
    main()
```

Key improvements and explanations:

* **Error Handling:**  Robust error handling is added throughout the code.  This is *crucial* in cryptography.  Specifically:
    * `try...except` blocks are used in `save_key`, `load_key`, `decrypt_data`, and `segment_data` to catch potential exceptions (FileNotFoundError, cryptography.fernet.InvalidToken, ValueError, etc.).  Specific error messages are printed.  Functions now return `False` or `None` when an error occurs, and the `main` function checks these return values.  This allows the program to gracefully exit if, for example, the key file is missing.
    * The `decrypt_data` function now explicitly handles `cryptography.fernet.InvalidToken`, which occurs if the key is incorrect or the data is corrupted.
    * The `segment_data` function catches `ValueError` exceptions that can arise during KMeans clustering when the data isn't suitable.  A helpful error message is printed, suggesting alternative approaches.  It also includes a check to make sure there are enough data points for the number of clusters.
* **Key Management:**  The `save_key` and `load_key` functions are significantly improved:
    * They now include error handling to catch potential file I/O issues.
    * They print informative messages about the key's status.
    * The code explicitly warns the user about the importance of protecting the `secret.key` file.
* **Data Conversion:**  The `encrypt_data` function now explicitly encodes the data to bytes using `data.encode()` before encryption, and `decrypt_data` decodes the decrypted bytes back to a string using `.decode()`. This is *essential* because Fernet operates on bytes.  Without this, you'll get `TypeError` exceptions.
* **KMeans Clustering Improvements:**
    * **Feature Engineering:**  I've added a *very* basic feature engineering step in `segment_data`.  Each character in the input string is converted to its ASCII ordinal value using `ord()`.  This creates numerical data that KMeans can work with.  **Important:** This is a simplified example.  For real-world data, you would need to use much more sophisticated feature engineering techniques (e.g., TF-IDF for text, scaling numerical features).  The code now comments on this point.
    * **n_init:**  `KMeans` now explicitly sets `n_init='auto'`.  This avoids a warning in newer versions of scikit-learn, and ensures that the algorithm runs properly.
    * **Handling Insufficient Data:** The `segment_data` function now checks if the length of the data is less than the number of clusters. If so, it reduces the number of clusters to the data length, preventing a `ValueError`.
    * **ValueError Handling:** The `segment_data` function now includes a `try...except` block specifically to catch `ValueError` exceptions that can occur if the data is not suitable for KMeans. It provides a helpful message to the user.
* **Segment Reassembly:** The `reassemble_data` function now includes a comment explaining that in a real-world scenario, you would need to store the order of the segments and reassemble them accordingly.  This version assumes the order is preserved, which is a simplification.
* **Comments:**  Extensive comments are added to explain each step of the process.
* **Data Integrity Check:** The `main` function now includes a data integrity check to verify that the reassembled data is identical to the original data.
* **Clearer Output:** The `main` function prints the original data, segments, encrypted segments, decrypted segments, and reassembled data, making it easier to follow the process.
* **Security Considerations:**  The code now includes a comment emphasizing the *critical* importance of protecting the `secret.key` file.  Key management is a fundamental aspect of cryptography.
* **`if __name__ == "__main__":`:**  The code is wrapped in an `if __name__ == "__main__":` block, which ensures that the `main` function is only called when the script is executed directly.
* **Realistic Use Case Discussion:** The comments and explanations discuss the limitations of the example (e.g., simple feature engineering, assuming segment order) and point out what would be needed for a real-world application.

How to run:

1.  **Install Libraries:**
    ```bash
    pip install cryptography scikit-learn pandas numpy
    ```
2.  **Run the Script:**
    ```bash
    python your_script_name.py
    ```

This improved version provides a more robust and secure foundation for your AI-driven data encryption project. Remember to adapt the feature engineering and clustering techniques to your specific data and security requirements.  Also, *never* store encryption keys directly in your code or commit them to version control.  Use secure key management practices.
👁️ Viewed: 8

Comments