Xenodochial Data Sculptor Python GUI

👤 Sharing: AI
import tkinter as tk
from tkinter import ttk
import pandas as pd
from tkinter import filedialog, messagebox
import matplotlib.pyplot as plt
from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg
import seaborn as sns

class DataSculptorApp:
    def __init__(self, master):
        self.master = master
        master.title("Xenodochial Data Sculptor")

        self.notebook = ttk.Notebook(master)
        self.notebook.pack(expand=True, fill="both")

        self.create_data_loader_tab()
        self.create_data_visualization_tab()
        self.create_data_transformation_tab()
        self.create_about_tab()

    def create_data_loader_tab(self):
        self.data_loader_tab = ttk.Frame(self.notebook)
        self.notebook.add(self.data_loader_tab, text="Data Loader")

        self.load_button = ttk.Button(self.data_loader_tab, text="Load CSV Data", command=self.load_csv)
        self.load_button.pack(pady=20)

        self.data = None
        self.loaded_file_label = tk.Label(self.data_loader_tab, text="No file loaded")
        self.loaded_file_label.pack()

    def create_data_visualization_tab(self):
        self.data_visualization_tab = ttk.Frame(self.notebook)
        self.notebook.add(self.data_visualization_tab, text="Data Visualization")

        self.visualize_button = ttk.Button(self.data_visualization_tab, text="Generate Visualizations", command=self.generate_visualizations)
        self.visualize_button.pack(pady=20)

        self.visualization_frame = tk.Frame(self.data_visualization_tab)
        self.visualization_frame.pack()

    def create_data_transformation_tab(self):
        self.data_transformation_tab = ttk.Frame(self.notebook)
        self.notebook.add(self.data_transformation_tab, text="Data Transformation")

        self.transformation_label = tk.Label(self.data_transformation_tab, text="Available Transformations:")
        self.transformation_label.pack(pady=10)

        self.transformation_listbox = tk.Listbox(self.data_transformation_tab, selectmode=tk.MULTIPLE)
        self.transformation_listbox.insert(tk.END, "Normalize Data")
        self.transformation_listbox.insert(tk.END, "Fill Missing Values")
        self.transformation_listbox.insert(tk.END, "One-Hot Encode Categorical Features")
        self.transformation_listbox.pack(pady=10)

        self.apply_transformations_button = ttk.Button(self.data_transformation_tab, text="Apply Transformations", command=self.apply_transformations)
        self.apply_transformations_button.pack(pady=10)

    def create_about_tab(self):
        self.about_tab = ttk.Frame(self.notebook)
        self.notebook.add(self.about_tab, text="About")

        self.about_text = tk.Text(self.about_tab, wrap=tk.WORD, height=10, width=60)
        self.about_text.insert(tk.END, "Xenodochial Data Sculptor is a versatile tool for data loading, visualization, and transformation. It allows users to load CSV data, generate basic visualizations like histograms and scatter plots, and apply transformations such as normalization, missing value imputation, and one-hot encoding. This tool is designed to simplify the initial steps of data analysis and exploration.")
        self.about_text.config(state=tk.DISABLED)
        self.about_text.pack(padx=20, pady=20)

    def load_csv(self):
        filename = filedialog.askopenfilename(filetypes=[("CSV Files", "*.csv")])
        if filename:
            try:
                self.data = pd.read_csv(filename)
                self.loaded_file_label.config(text=f"Loaded: {filename}")
            except Exception as e:
                messagebox.showerror("Error", f"Could not load file: {e}")

    def generate_visualizations(self):
        if self.data is None:
            messagebox.showinfo("Info", "Please load data first.")
            return

        for widget in self.visualization_frame.winfo_children():
            widget.destroy()

        # Generate Histogram
        try:
            fig_hist, ax_hist = plt.subplots(figsize=(6, 4))
            self.data.hist(ax=ax_hist)
            plt.tight_layout()

            canvas_hist = FigureCanvasTkAgg(fig_hist, master=self.visualization_frame)
            canvas_hist.draw()
            canvas_hist.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
        except Exception as e:
            messagebox.showerror("Error", f"Could not generate histogram: {e}")

        # Generate Scatter Plot (using first two numerical columns)
        try:
            numerical_cols = self.data.select_dtypes(include=['number']).columns
            if len(numerical_cols) >= 2:
                fig_scatter, ax_scatter = plt.subplots(figsize=(6, 4))
                sns.scatterplot(x=numerical_cols[0], y=numerical_cols[1], data=self.data, ax=ax_scatter)
                plt.tight_layout()

                canvas_scatter = FigureCanvasTkAgg(fig_scatter, master=self.visualization_frame)
                canvas_scatter.draw()
                canvas_scatter.get_tk_widget().pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
            else:
                messagebox.showinfo("Info", "Not enough numerical columns for scatter plot.")
        except Exception as e:
            messagebox.showerror("Error", f"Could not generate scatter plot: {e}")

    def apply_transformations(self):
        if self.data is None:
            messagebox.showinfo("Info", "Please load data first.")
            return

        selected_transformations = [self.transformation_listbox.get(i) for i in self.transformation_listbox.curselection()]

        transformed_data = self.data.copy()

        if "Normalize Data" in selected_transformations:
            try:
                numerical_cols = transformed_data.select_dtypes(include=['number']).columns
                for col in numerical_cols:
                    transformed_data[col] = (transformed_data[col] - transformed_data[col].mean()) / transformed_data[col].std()
                messagebox.showinfo("Info", "Data normalized successfully.")
            except Exception as e:
                messagebox.showerror("Error", f"Could not normalize data: {e}")

        if "Fill Missing Values" in selected_transformations:
            try:
                transformed_data = transformed_data.fillna(transformed_data.mean(numeric_only=True))
                messagebox.showinfo("Info", "Missing values filled successfully.")
            except Exception as e:
                messagebox.showerror("Error", f"Could not fill missing values: {e}")

        if "One-Hot Encode Categorical Features" in selected_transformations:
            try:
                categorical_cols = transformed_data.select_dtypes(include=['object', 'category']).columns
                transformed_data = pd.get_dummies(transformed_data, columns=categorical_cols)
                messagebox.showinfo("Info", "Categorical features one-hot encoded successfully.")
            except Exception as e:
                messagebox.showerror("Error", f"Could not one-hot encode: {e}")

        # Update the data with the transformed data
        self.data = transformed_data


root = tk.Tk()
app = DataSculptorApp(root)
root.mainloop()
👁️ Viewed: 6

Comments