Zephyr Data Alchemist Python GUI
👤 Sharing: AI
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import seaborn as sns
import io
from contextlib import redirect_stdout
class DataAlchemistApp:
def __init__(self, root):
self.root = root
self.root.title("Zephyr Data Alchemist")
self.root.geometry("1200x800")
self.data = None
# Notebook for Tabbed Interface
self.notebook = ttk.Notebook(root)
self.notebook.pack(expand=True, fill="both", padx=10, pady=10)
# Data Loading Tab
self.data_tab = ttk.Frame(self.notebook)
self.notebook.add(self.data_tab, text="Data Loading")
self.create_data_loading_tab(self.data_tab)
# Data Analysis Tab
self.analysis_tab = ttk.Frame(self.notebook)
self.notebook.add(self.analysis_tab, text="Data Analysis")
self.create_data_analysis_tab(self.analysis_tab)
# Data Visualization Tab
self.visualization_tab = ttk.Frame(self.notebook)
self.notebook.add(self.visualization_tab, text="Data Visualization")
self.create_data_visualization_tab(self.visualization_tab)
# Console Output
self.console_tab = ttk.Frame(self.notebook)
self.notebook.add(self.console_tab, text="Console Output")
self.create_console_tab(self.console_tab)
def create_data_loading_tab(self, tab):
# Load Button
load_button = ttk.Button(tab, text="Load CSV Data", command=self.load_data)
load_button.pack(pady=20)
# Data Preview
self.data_preview = tk.Text(tab, height=15, width=120)
self.data_preview.pack(padx=10, pady=10)
self.data_preview.config(state=tk.DISABLED)
def create_data_analysis_tab(self, tab):
# Basic Statistics Button
stats_button = ttk.Button(tab, text="Show Basic Statistics", command=self.show_statistics)
stats_button.pack(pady=10)
# Correlation Matrix Button
corr_button = ttk.Button(tab, text="Show Correlation Matrix", command=self.show_correlation)
corr_button.pack(pady=10)
# Statistics Output
self.stats_output = tk.Text(tab, height=15, width=120)
self.stats_output.pack(padx=10, pady=10)
self.stats_output.config(state=tk.DISABLED)
def create_data_visualization_tab(self, tab):
# Visualization Options
self.visualization_type = tk.StringVar(value="histogram")
histogram_radio = ttk.Radiobutton(tab, text="Histogram", variable=self.visualization_type, value="histogram")
scatter_radio = ttk.Radiobutton(tab, text="Scatter Plot", variable=self.visualization_type, value="scatter")
bar_radio = ttk.Radiobutton(tab, text="Bar Chart", variable=self.visualization_type, value="bar")
histogram_radio.pack(pady=5)
scatter_radio.pack(pady=5)
bar_radio.pack(pady=5)
# Column Selection
self.x_column = tk.StringVar()
self.y_column = tk.StringVar()
self.x_label = ttk.Label(tab, text="X Column:")
self.x_label.pack()
self.x_dropdown = ttk.Combobox(tab, textvariable=self.x_column)
self.x_dropdown.pack()
self.y_label = ttk.Label(tab, text="Y Column (for scatter):")
self.y_label.pack()
self.y_dropdown = ttk.Combobox(tab, textvariable=self.y_column)
self.y_dropdown.pack()
# Create Visualization Button
visualize_button = ttk.Button(tab, text="Create Visualization", command=self.create_visualization)
visualize_button.pack(pady=10)
# Canvas for Visualization
self.fig, self.ax = plt.subplots(figsize=(10, 6))
self.canvas = FigureCanvasTkAgg(self.fig, master=tab)
self.canvas_widget = self.canvas.get_tk_widget()
self.canvas_widget.pack(padx=10, pady=10)
def create_console_tab(self, tab):
self.console_output = tk.Text(tab, height=20, width=120)
self.console_output.pack(padx=10, pady=10)
self.console_output.config(state=tk.DISABLED)
def load_data(self):
filepath = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv")])
if filepath:
try:
self.data = pd.read_csv(filepath)
self.data_preview.config(state=tk.NORMAL)
self.data_preview.delete("1.0", tk.END)
self.data_preview.insert(tk.END, self.data.head().to_string())
self.data_preview.config(state=tk.DISABLED)
# Update dropdown options
columns = list(self.data.columns)
self.x_dropdown['values'] = columns
self.y_dropdown['values'] = columns
self.log_to_console(f"Data loaded successfully from: {filepath}")
except Exception as e:
messagebox.showerror("Error", f"Failed to load data: {e}")
self.log_to_console(f"Error loading data: {e}")
def show_statistics(self):
if self.data is not None:
try:
description = self.data.describe(include='all')
self.stats_output.config(state=tk.NORMAL)
self.stats_output.delete("1.0", tk.END)
self.stats_output.insert(tk.END, description.to_string())
self.stats_output.config(state=tk.DISABLED)
self.log_to_console("Basic statistics displayed.")
except Exception as e:
messagebox.showerror("Error", f"Failed to generate statistics: {e}")
self.log_to_console(f"Error generating statistics: {e}")
else:
messagebox.showinfo("Info", "Please load data first.")
self.log_to_console("User attempted to view statistics without loading data.")
def show_correlation(self):
if self.data is not None:
try:
correlation_matrix = self.data.corr(numeric_only=True)
self.stats_output.config(state=tk.NORMAL)
self.stats_output.delete("1.0", tk.END)
self.stats_output.insert(tk.END, correlation_matrix.to_string())
self.stats_output.config(state=tk.DISABLED)
self.log_to_console("Correlation matrix displayed.")
except Exception as e:
messagebox.showerror("Error", f"Failed to generate correlation matrix: {e}")
self.log_to_console(f"Error generating correlation matrix: {e}")
else:
messagebox.showinfo("Info", "Please load data first.")
self.log_to_console("User attempted to view correlation matrix without loading data.")
def create_visualization(self):
if self.data is not None:
try:
self.ax.clear()
vis_type = self.visualization_type.get()
if vis_type == "histogram":
x_col = self.x_column.get()
if x_col:
self.data[x_col].hist(ax=self.ax)
self.ax.set_xlabel(x_col)
self.ax.set_ylabel("Frequency")
self.ax.set_title(f"Histogram of {x_col}")
else:
messagebox.showinfo("Info", "Please select a column for the histogram.")
return
elif vis_type == "scatter":
x_col = self.x_column.get()
y_col = self.y_column.get()
if x_col and y_col:
self.data.plot.scatter(x=x_col, y=y_col, ax=self.ax)
self.ax.set_xlabel(x_col)
self.ax.set_ylabel(y_col)
self.ax.set_title(f"Scatter Plot of {x_col} vs {y_col}")
else:
messagebox.showinfo("Info", "Please select columns for the scatter plot.")
return
elif vis_type == "bar":
x_col = self.x_column.get()
if x_col:
value_counts = self.data[x_col].value_counts()
value_counts.plot.bar(ax=self.ax)
self.ax.set_xlabel(x_col)
self.ax.set_ylabel("Count")
self.ax.set_title(f"Bar Chart of {x_col}")
else:
messagebox.showinfo("Info", "Please select a column for the bar chart.")
return
self.fig.tight_layout()
self.canvas.draw()
self.log_to_console(f"{vis_type.capitalize()} created for {x_col} (and {y_col} if scatter).")
except Exception as e:
messagebox.showerror("Error", f"Failed to create visualization: {e}")
self.log_to_console(f"Error creating visualization: {e}")
else:
messagebox.showinfo("Info", "Please load data first.")
self.log_to_console("User attempted to create visualization without loading data.")
def log_to_console(self, message):
self.console_output.config(state=tk.NORMAL)
self.console_output.insert(tk.END, message + "\n")
self.console_output.config(state=tk.DISABLED)
self.console_output.see(tk.END) # Scroll to the end
def show_details():
details_window = tk.Toplevel(root)
details_window.title("Program Details")
details_text = tk.Text(details_window, height=20, width=80)
details_text.pack(padx=10, pady=10)
details_text.insert(tk.END, """
Zephyr Data Alchemist is a versatile tool for data analysis and visualization. It allows users to load CSV data, preview it, perform basic statistical analysis, visualize data using histograms, scatter plots, and bar charts, and view console output for debugging and informational messages.
Key Features:
- **Data Loading:** Load CSV files and preview the data.
- **Data Analysis:** Calculate and display basic descriptive statistics and correlation matrices.
- **Data Visualization:** Create histograms, scatter plots, and bar charts based on selected columns.
- **Console Output:** View a log of program activities and error messages.
Use Cases:
- Exploratory Data Analysis (EDA)
- Data Cleaning and Preprocessing
- Statistical Analysis
- Data Visualization for Presentations
- Quick Data Insights
This program is designed to be a user-friendly alternative to complex, paid data analysis software, providing essential tools for understanding and visualizing data effectively.
""")
details_text.config(state=tk.DISABLED)
if __name__ == "__main__":
root = tk.Tk()
app = DataAlchemistApp(root)
# Details Button
details_button = ttk.Button(root, text="Details", command=show_details)
details_button.pack(pady=10)
root.mainloop()
👁️ Viewed: 5
Comments