Zephyr Data Alchemist Python GUI

👤 Sharing: AI
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import seaborn as sns
import io
from contextlib import redirect_stdout

class DataAlchemistApp:
    def __init__(self, root):
        self.root = root
        self.root.title("Zephyr Data Alchemist")
        self.root.geometry("1200x800")

        self.data = None

        # Notebook for Tabbed Interface
        self.notebook = ttk.Notebook(root)
        self.notebook.pack(expand=True, fill="both", padx=10, pady=10)

        # Data Loading Tab
        self.data_tab = ttk.Frame(self.notebook)
        self.notebook.add(self.data_tab, text="Data Loading")
        self.create_data_loading_tab(self.data_tab)

        # Data Analysis Tab
        self.analysis_tab = ttk.Frame(self.notebook)
        self.notebook.add(self.analysis_tab, text="Data Analysis")
        self.create_data_analysis_tab(self.analysis_tab)

        # Data Visualization Tab
        self.visualization_tab = ttk.Frame(self.notebook)
        self.notebook.add(self.visualization_tab, text="Data Visualization")
        self.create_data_visualization_tab(self.visualization_tab)

        # Console Output
        self.console_tab = ttk.Frame(self.notebook)
        self.notebook.add(self.console_tab, text="Console Output")
        self.create_console_tab(self.console_tab)

    def create_data_loading_tab(self, tab):
        # Load Button
        load_button = ttk.Button(tab, text="Load CSV Data", command=self.load_data)
        load_button.pack(pady=20)

        # Data Preview
        self.data_preview = tk.Text(tab, height=15, width=120)
        self.data_preview.pack(padx=10, pady=10)
        self.data_preview.config(state=tk.DISABLED)

    def create_data_analysis_tab(self, tab):
        # Basic Statistics Button
        stats_button = ttk.Button(tab, text="Show Basic Statistics", command=self.show_statistics)
        stats_button.pack(pady=10)

        # Correlation Matrix Button
        corr_button = ttk.Button(tab, text="Show Correlation Matrix", command=self.show_correlation)
        corr_button.pack(pady=10)

        # Statistics Output
        self.stats_output = tk.Text(tab, height=15, width=120)
        self.stats_output.pack(padx=10, pady=10)
        self.stats_output.config(state=tk.DISABLED)

    def create_data_visualization_tab(self, tab):
        # Visualization Options
        self.visualization_type = tk.StringVar(value="histogram")
        histogram_radio = ttk.Radiobutton(tab, text="Histogram", variable=self.visualization_type, value="histogram")
        scatter_radio = ttk.Radiobutton(tab, text="Scatter Plot", variable=self.visualization_type, value="scatter")
        bar_radio = ttk.Radiobutton(tab, text="Bar Chart", variable=self.visualization_type, value="bar")

        histogram_radio.pack(pady=5)
        scatter_radio.pack(pady=5)
        bar_radio.pack(pady=5)

        # Column Selection
        self.x_column = tk.StringVar()
        self.y_column = tk.StringVar()

        self.x_label = ttk.Label(tab, text="X Column:")
        self.x_label.pack()
        self.x_dropdown = ttk.Combobox(tab, textvariable=self.x_column)
        self.x_dropdown.pack()

        self.y_label = ttk.Label(tab, text="Y Column (for scatter):")
        self.y_label.pack()
        self.y_dropdown = ttk.Combobox(tab, textvariable=self.y_column)
        self.y_dropdown.pack()

        # Create Visualization Button
        visualize_button = ttk.Button(tab, text="Create Visualization", command=self.create_visualization)
        visualize_button.pack(pady=10)

        # Canvas for Visualization
        self.fig, self.ax = plt.subplots(figsize=(10, 6))
        self.canvas = FigureCanvasTkAgg(self.fig, master=tab)
        self.canvas_widget = self.canvas.get_tk_widget()
        self.canvas_widget.pack(padx=10, pady=10)

    def create_console_tab(self, tab):
        self.console_output = tk.Text(tab, height=20, width=120)
        self.console_output.pack(padx=10, pady=10)
        self.console_output.config(state=tk.DISABLED)

    def load_data(self):
        filepath = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv")])
        if filepath:
            try:
                self.data = pd.read_csv(filepath)
                self.data_preview.config(state=tk.NORMAL)
                self.data_preview.delete("1.0", tk.END)
                self.data_preview.insert(tk.END, self.data.head().to_string())
                self.data_preview.config(state=tk.DISABLED)

                # Update dropdown options
                columns = list(self.data.columns)
                self.x_dropdown['values'] = columns
                self.y_dropdown['values'] = columns

                self.log_to_console(f"Data loaded successfully from: {filepath}")
            except Exception as e:
                messagebox.showerror("Error", f"Failed to load data: {e}")
                self.log_to_console(f"Error loading data: {e}")

    def show_statistics(self):
        if self.data is not None:
            try:
                description = self.data.describe(include='all')
                self.stats_output.config(state=tk.NORMAL)
                self.stats_output.delete("1.0", tk.END)
                self.stats_output.insert(tk.END, description.to_string())
                self.stats_output.config(state=tk.DISABLED)

                self.log_to_console("Basic statistics displayed.")
            except Exception as e:
                messagebox.showerror("Error", f"Failed to generate statistics: {e}")
                self.log_to_console(f"Error generating statistics: {e}")
        else:
            messagebox.showinfo("Info", "Please load data first.")
            self.log_to_console("User attempted to view statistics without loading data.")

    def show_correlation(self):
        if self.data is not None:
            try:
                correlation_matrix = self.data.corr(numeric_only=True)
                self.stats_output.config(state=tk.NORMAL)
                self.stats_output.delete("1.0", tk.END)
                self.stats_output.insert(tk.END, correlation_matrix.to_string())
                self.stats_output.config(state=tk.DISABLED)

                self.log_to_console("Correlation matrix displayed.")
            except Exception as e:
                messagebox.showerror("Error", f"Failed to generate correlation matrix: {e}")
                self.log_to_console(f"Error generating correlation matrix: {e}")
        else:
            messagebox.showinfo("Info", "Please load data first.")
            self.log_to_console("User attempted to view correlation matrix without loading data.")

    def create_visualization(self):
        if self.data is not None:
            try:
                self.ax.clear()
                vis_type = self.visualization_type.get()

                if vis_type == "histogram":
                    x_col = self.x_column.get()
                    if x_col:
                        self.data[x_col].hist(ax=self.ax)
                        self.ax.set_xlabel(x_col)
                        self.ax.set_ylabel("Frequency")
                        self.ax.set_title(f"Histogram of {x_col}")
                    else:
                        messagebox.showinfo("Info", "Please select a column for the histogram.")
                        return

                elif vis_type == "scatter":
                    x_col = self.x_column.get()
                    y_col = self.y_column.get()
                    if x_col and y_col:
                        self.data.plot.scatter(x=x_col, y=y_col, ax=self.ax)
                        self.ax.set_xlabel(x_col)
                        self.ax.set_ylabel(y_col)
                        self.ax.set_title(f"Scatter Plot of {x_col} vs {y_col}")
                    else:
                        messagebox.showinfo("Info", "Please select columns for the scatter plot.")
                        return

                elif vis_type == "bar":
                    x_col = self.x_column.get()
                    if x_col:
                        value_counts = self.data[x_col].value_counts()
                        value_counts.plot.bar(ax=self.ax)
                        self.ax.set_xlabel(x_col)
                        self.ax.set_ylabel("Count")
                        self.ax.set_title(f"Bar Chart of {x_col}")
                    else:
                        messagebox.showinfo("Info", "Please select a column for the bar chart.")
                        return

                self.fig.tight_layout()
                self.canvas.draw()
                self.log_to_console(f"{vis_type.capitalize()} created for {x_col} (and {y_col} if scatter).")

            except Exception as e:
                messagebox.showerror("Error", f"Failed to create visualization: {e}")
                self.log_to_console(f"Error creating visualization: {e}")
        else:
            messagebox.showinfo("Info", "Please load data first.")
            self.log_to_console("User attempted to create visualization without loading data.")

    def log_to_console(self, message):
        self.console_output.config(state=tk.NORMAL)
        self.console_output.insert(tk.END, message + "\n")
        self.console_output.config(state=tk.DISABLED)
        self.console_output.see(tk.END)  # Scroll to the end

def show_details():
    details_window = tk.Toplevel(root)
    details_window.title("Program Details")
    details_text = tk.Text(details_window, height=20, width=80)
    details_text.pack(padx=10, pady=10)
    details_text.insert(tk.END, """
Zephyr Data Alchemist is a versatile tool for data analysis and visualization. It allows users to load CSV data, preview it, perform basic statistical analysis, visualize data using histograms, scatter plots, and bar charts, and view console output for debugging and informational messages.

Key Features:
- **Data Loading:** Load CSV files and preview the data.
- **Data Analysis:** Calculate and display basic descriptive statistics and correlation matrices.
- **Data Visualization:** Create histograms, scatter plots, and bar charts based on selected columns.
- **Console Output:** View a log of program activities and error messages.

Use Cases:
- Exploratory Data Analysis (EDA)
- Data Cleaning and Preprocessing
- Statistical Analysis
- Data Visualization for Presentations
- Quick Data Insights

This program is designed to be a user-friendly alternative to complex, paid data analysis software, providing essential tools for understanding and visualizing data effectively.
""")
    details_text.config(state=tk.DISABLED)

if __name__ == "__main__":
    root = tk.Tk()
    app = DataAlchemistApp(root)

    # Details Button
    details_button = ttk.Button(root, text="Details", command=show_details)
    details_button.pack(pady=10)

    root.mainloop()
👁️ Viewed: 5

Comments