Vibrant Data Harmonizer Python GUI
👤 Sharing: AI
import tkinter as tk
from tkinter import ttk, filedialog, messagebox
import pandas as pd
import os
class DataHarmonizer:
def __init__(self, master):
self.master = master
master.title("Vibrant Data Harmonizer")
self.data = None
self.filepath = None
# --- File Menu --- #
self.menubar = tk.Menu(master)
self.filemenu = tk.Menu(self.menubar, tearoff=0)
self.filemenu.add_command(label="Open", command=self.load_data)
self.filemenu.add_command(label="Save", command=self.save_data)
self.filemenu.add_separator()
self.filemenu.add_command(label="Exit", command=master.quit)
self.menubar.add_cascade(label="File", menu=self.filemenu)
master.config(menu=self.menubar)
# --- Main Frame --- #
self.main_frame = ttk.Frame(master, padding=(10, 10, 10, 10))
self.main_frame.grid(column=0, row=0, sticky=(tk.N, tk.W, tk.E, tk.S))
# --- Data Display --- #
self.tree = ttk.Treeview(self.main_frame, columns=('Column 1'), show='headings')
self.tree.grid(column=0, row=0, columnspan=3, sticky=(tk.N, tk.W, tk.E, tk.S))
self.vsb = ttk.Scrollbar(self.main_frame, orient="vertical", command=self.tree.yview)
self.vsb.grid(column=3, row=0, sticky=(tk.N, tk.S))
self.tree.configure(yscrollcommand=self.vsb.set)
self.hsb = ttk.Scrollbar(self.main_frame, orient="horizontal", command=self.tree.xview)
self.hsb.grid(column=0, row=1, columnspan=3, sticky=(tk.W, tk.E))
self.tree.configure(xscrollcommand=self.hsb.set)
# --- Buttons --- #
self.remove_duplicates_button = ttk.Button(self.main_frame, text="Remove Duplicates", command=self.remove_duplicates)
self.remove_duplicates_button.grid(column=0, row=2, sticky=(tk.W, tk.E))
self.fill_missing_button = ttk.Button(self.main_frame, text="Fill Missing Values", command=self.fill_missing)
self.fill_missing_button.grid(column=1, row=2, sticky=(tk.W, tk.E))
self.convert_types_button = ttk.Button(self.main_frame, text="Convert Data Types", command=self.convert_data_types)
self.convert_types_button.grid(column=2, row=2, sticky=(tk.W, tk.E))
# --- About Button --- #
self.about_button = ttk.Button(self.main_frame, text="About", command=self.show_about)
self.about_button.grid(column=0, row=3, columnspan=3, sticky=(tk.W, tk.E))
# --- Weighting --- #
self.master.columnconfigure(0, weight=1)
self.master.rowconfigure(0, weight=1)
self.main_frame.columnconfigure(0, weight=1)
self.main_frame.columnconfigure(1, weight=1)
self.main_frame.columnconfigure(2, weight=1)
self.main_frame.rowconfigure(0, weight=1)
def load_data(self):
self.filepath = filedialog.askopenfilename(title="Select Data File", filetypes=[("CSV files", "*.csv"), ("Excel files", "*.xlsx;*.xls")])
if self.filepath:
try:
if self.filepath.endswith(('.xlsx', '.xls')):
self.data = pd.read_excel(self.filepath)
else:
self.data = pd.read_csv(self.filepath)
self.populate_tree()
except Exception as e:
messagebox.showerror("Error", f"Failed to load data: {e}")
def save_data(self):
if self.data is None:
messagebox.showinfo("Info", "No data loaded to save.")
return
filepath = filedialog.asksaveasfilename(defaultextension=".csv", filetypes=[("CSV file", "*.csv")])
if filepath:
try:
self.data.to_csv(filepath, index=False)
messagebox.showinfo("Success", "Data saved successfully!")
except Exception as e:
messagebox.showerror("Error", f"Failed to save data: {e}")
def populate_tree(self):
for item in self.tree.get_children():
self.tree.delete(item)
if self.data is not None:
self.tree['columns'] = list(self.data.columns)
for col in self.data.columns:
self.tree.heading(col, text=col)
self.tree.column(col, width=100)
for index, row in self.data.iterrows():
self.tree.insert('', 'end', values=list(row))
def remove_duplicates(self):
if self.data is not None:
initial_size = len(self.data)
self.data.drop_duplicates(inplace=True)
new_size = len(self.data)
self.data.reset_index(drop=True, inplace=True)
self.populate_tree()
messagebox.showinfo("Info", f"Removed {initial_size - new_size} duplicate rows.")
else:
messagebox.showinfo("Info", "No data loaded.")
def fill_missing(self):
if self.data is not None:
# For simplicity, filling with the mean for numerical columns and mode for others
for col in self.data.columns:
if pd.api.types.is_numeric_dtype(self.data[col]):
self.data[col].fillna(self.data[col].mean(), inplace=True)
else:
self.data[col].fillna(self.data[col].mode()[0], inplace=True)
self.populate_tree()
messagebox.showinfo("Info", "Missing values filled.")
else:
messagebox.showinfo("Info", "No data loaded.")
def convert_data_types(self):
if self.data is not None:
# Simple example: Convert all string columns to lowercase
for col in self.data.select_dtypes(include=['object']).columns:
try:
self.data[col] = self.data[col].str.lower()
except:
pass # if column has any value that's not a string, ignore
self.populate_tree()
messagebox.showinfo("Info", "String columns converted to lowercase.")
else:
messagebox.showinfo("Info", "No data loaded.")
def show_about(self):
messagebox.showinfo("About Vibrant Data Harmonizer",
"Vibrant Data Harmonizer is a versatile tool designed to streamline your data preparation workflow. It allows you to load data from CSV or Excel files, display it in a tabular format, remove duplicate entries, fill missing values using appropriate strategies, and convert data types for consistency. This program is designed to make data cleaning and preprocessing more accessible and efficient, empowering users to gain valuable insights from their datasets. It's a work in progress, and more features will be added in the future.")
root = tk.Tk()
app = DataHarmonizer(root)
root.mainloop()
👁️ Viewed: 5
Comments