Web Scraping Tool Gemini API Ollama MYSQL Python GUI
👤 Sharing: AI
----------------------requirements.txt----------------
------------------------------------------------------
mysql-connector-python==8.0.33
requests==2.31.0
google-genai==1.22.0
feedparser==6.0.10
beautifulsoup4==4.12.2
selenium==4.15.2
newspaper3k==0.2.8
scrapy==2.11.0
aiohttp==3.9.1
lxml==4.9.3
requests-html==0.10.0
mechanicalsoup==1.3.0
cloudscraper==1.2.71
html5lib==1.1
urllib3==2.0.7
pillow==10.2.0
pyparsing==3.1.1
PyYAML==6.0.1
cssselect==1.2.0
parsel==1.8.1
w3lib==2.1.2
certifi==2023.11.17
idna==3.6
chardet==5.2.0
cssutils==2.9.0
pyppeteer==1.0.2
fake-useragent==1.4.0
----------------------------------------------------------
---------------------------ai_manager.py------------------
----------------------------------------------------------
import requests
from google import genai
from ollama import Client
import json
class AIManager:
def __init__(self):
self.ollama_url = None
self.ollama_model = None
self.ollama_client = None
self.gemini_api_key = None
self.current_ai = None # "ollama" veya "gemini"
def connect_ollama(self, url, model):
"""Ollama'ya ba?lan"""
try:
self.ollama_url = url.rstrip('/')
self.ollama_model = model
# Ollama client'?n? olu?tur
self.ollama_client = Client(host=self.ollama_url)
# Ba?lant?y? test et
test_response = self.ollama_client.chat(
model=model,
messages=[{'role': 'user', 'content': 'test'}],
options={'temperature': 0}
)
if test_response and 'message' in test_response:
self.current_ai = "ollama"
return True
else:
print("Ollama ba?lant? testi ba?ar?s?z")
return False
except Exception as e:
print(f"Ollama ba?lant? hatas?: {e}")
return False
def connect_gemini(self, api_key):
"""Gemini API'ye ba?lan"""
try:
self.gemini_api_key = api_key
client = genai.Client(api_key=api_key)
# Modeli test et
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=["test"]
)
if response:
self.current_ai = "gemini"
return True
else:
return False
except Exception as e:
print(f"Gemini ba?lant? hatas?: {e}")
return False
def send_message(self, message):
"""AI'ya mesaj g?nder"""
try:
if self.current_ai == "ollama":
return self._send_to_ollama(message)
elif self.current_ai == "gemini":
return self._send_to_gemini(message)
else:
raise Exception("AI ba?lant?s? kurulmam??!")
except Exception as e:
print(f"AI send_message hatas?: {e}")
raise e
def _send_to_ollama(self, message):
"""Ollama'ya mesaj g?nder"""
try:
print(f"Ollama'ya mesaj g?nderiliyor: {message[:100]}...")
response = self.ollama_client.chat(
model=self.ollama_model,
messages=[{'role': 'user', 'content': message}],
options={'temperature': 0.7}
)
print(f"Ollama yan?t?: {response}")
if response and 'message' in response:
content = response['message']['content']
print(f"Ollama i?erik uzunlu?u: {len(content)}")
return content
else:
raise Exception("Ollama yan?t vermedi")
except Exception as e:
print(f"Ollama mesaj g?nderme hatas?: {e}")
raise Exception(f"Ollama mesaj g?nderme hatas?: {e}")
def _send_to_gemini(self, message):
"""Gemini'ye mesaj g?nder"""
try:
print(f"Gemini'ye mesaj g?nderiliyor: {message[:100]}...")
client = genai.Client(api_key=self.gemini_api_key)
response = client.models.generate_content(
model="gemini-2.0-flash",
contents=[message]
)
print(f"Gemini yan?t?: {response}")
if response:
content = response.text.strip()
print(f"Gemini i?erik uzunlu?u: {len(content)}")
return content
else:
raise Exception("Gemini yan?t vermedi")
except Exception as e:
print(f"Gemini mesaj g?nderme hatas?: {e}")
raise Exception(f"Gemini mesaj g?nderme hatas?: {e}")
def is_connected(self):
"""AI ba?lant?s?n?n durumunu kontrol et"""
try:
print(f"AI ba?lant? kontrol?: current_ai = {self.current_ai}")
if self.current_ai == "ollama":
if not self.ollama_client or not self.ollama_model:
print("Ollama client veya model eksik")
return False
# Basit bir test mesaj? g?nder
test_response = self.ollama_client.chat(
model=self.ollama_model,
messages=[{'role': 'user', 'content': 'test'}],
options={'temperature': 0.1}
)
print(f"Ollama test yan?t?: {test_response}")
return test_response is not None
elif self.current_ai == "gemini":
if not self.gemini_api_key:
print("Gemini API key eksik")
return False
# Basit bir test mesaj? g?nder
client = genai.Client(api_key=self.gemini_api_key)
test_response = client.models.generate_content(
model="gemini-2.0-flash",
contents=["test"]
)
print(f"Gemini test yan?t?: {test_response}")
return test_response is not None
else:
print("Hi?bir AI ba?lant?s? kurulmam??")
return False
except Exception as e:
print(f"AI ba?lant? kontrol? hatas?: {e}")
return False
def get_current_ai(self):
"""Mevcut AI t?r?n? d?nd?r"""
return self.current_ai
def disconnect(self):
"""AI ba?lant?s?n? kapat"""
self.current_ai = None
self.ollama_url = None
self.ollama_model = None
self.ollama_client = None
self.gemini_api_key = None
-------------------------------------------------------------------------
--------------------------------database_manager.py----------------------
-------------------------------------------------------------------------
import mysql.connector
from mysql.connector import Error
import json
class DatabaseManager:
def __init__(self):
self.connection = None
self.cursor = None
def connect(self, host, port, username, password, database):
"""MySQL veritaban?na ba?lan"""
try:
self.connection = mysql.connector.connect(
host=host,
port=port,
user=username,
password=password,
database=database
)
if self.connection.is_connected():
self.cursor = self.connection.cursor(dictionary=True)
return True
return False
except Error as e:
print(f"MySQL ba?lant? hatas?: {e}")
return False
def disconnect(self):
"""Ba?lant?y? kapat"""
if self.cursor:
self.cursor.close()
if self.connection and self.connection.is_connected():
self.connection.close()
def get_tables(self):
"""Veritaban?ndaki tablolar? listele"""
try:
self.cursor.execute("SHOW TABLES")
tables = [table[list(table.keys())[0]] for table in self.cursor.fetchall()]
return tables
except Error as e:
print(f"Tablolar listelenirken hata: {e}")
return []
def get_table_columns(self, table_name):
"""Tablo s?tunlar?n? getir"""
try:
self.cursor.execute(f"DESCRIBE {table_name}")
columns = [column['Field'] for column in self.cursor.fetchall()]
return columns
except Error as e:
print(f"S?tunlar getirilirken hata: {e}")
return []
def get_table_data(self, table_name, limit=100):
"""Tablo verilerini getir"""
try:
self.cursor.execute(f"SELECT * FROM {table_name} LIMIT {limit}")
data = self.cursor.fetchall()
return data
except Error as e:
print(f"Veri getirilirken hata: {e}")
return []
def insert_data(self, table_name, data):
"""Tabloya veri ekle"""
try:
if not isinstance(data, dict):
return False
columns = list(data.keys())
# None olanlar? MySQL NULL olarak g?nder
values = [v if v is not None else None for v in data.values()]
placeholders = ', '.join(['%s'] * len(values))
column_str = ', '.join(columns)
query = f"INSERT INTO {table_name} ({column_str}) VALUES ({placeholders})"
try:
self.cursor.execute(query, values)
self.connection.commit()
return True
except Error as e:
print(f"[insert_data] Hata: {e}\nTablo: {table_name}\nKolonlar: {columns}\nVeriler: {values}")
if self.connection:
self.connection.rollback()
return False
except Error as e:
print(f"Veri eklenirken hata: {e}")
if self.connection:
self.connection.rollback()
return False
def execute_query(self, query):
"""?zel sorgu ?al??t?r"""
try:
self.cursor.execute(query)
if query.strip().upper().startswith('SELECT'):
return self.cursor.fetchall()
else:
self.connection.commit()
return True
except Error as e:
print(f"Sorgu ?al??t?r?l?rken hata: {e}")
if self.connection:
self.connection.rollback()
return False
def is_connected(self):
"""Ba?lant? durumunu kontrol et"""
return self.connection and self.connection.is_connected()
-----------------------------------------------------------------
---------------------------main.py-------------------------------
-----------------------------------------------------------------
import os
import locale
# Locale hatasını çözmek için
try:
locale.setlocale(locale.LC_ALL, '')
except locale.Error:
try:
locale.setlocale(locale.LC_ALL, 'C')
except locale.Error:
pass
import tkinter as tk
from tkinter import ttk, messagebox
import mysql.connector
import requests
from google import genai
import json
from database_manager import DatabaseManager
from ai_manager import AIManager
# Yeni web scraping kütüphaneleri
import feedparser
import re
from html import unescape
from bs4 import BeautifulSoup
import urllib.parse
from urllib.parse import urljoin, urlparse
import time
import random
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import newspaper
from newspaper import Article
import scrapy
from scrapy.crawler import CrawlerProcess
import aiohttp
import asyncio
import threading
from concurrent.futures import ThreadPoolExecutor
import lxml.html
import requests_html
from requests_html import HTMLSession
import mechanicalsoup
import cloudscraper
class MySQLToolApp:
def __init__(self, root):
self.root = root
self.root.title("MySQL Veri Gönderme Aracı")
self.root.geometry("1200x800")
# Managers
self.db_manager = DatabaseManager()
self.ai_manager = AIManager()
# Variables
self.connection_status = tk.StringVar(value="Bağlantı Yok")
self.ai_status = tk.StringVar(value="AI Bağlantısı Yok")
self.ai_toggle_var = tk.BooleanVar(value=False) # False = Ollama, True = Gemini
# Web scraping metodları için değişkenler
self.scraping_method_var = tk.StringVar(value="rss")
self.scraping_status = tk.StringVar(value="Hazır")
self.scraping_progress = tk.DoubleVar(value=0)
self.is_scraping = False
self.scraped_data = []
# Web scraping ayarları
self.scraping_settings = {
"delay": 1.0, # Saniye
"timeout": 30,
"max_retries": 3,
"user_agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
# Sabit bağlantı profilleri
self.connection_profiles = [
{
"name": "",
"host": "",
"port": 3306,
"username": "",
"password": "",
"database": ""
},
{
"name": "",
"host": "",
"port": 3306,
"username": "",
"password": "",
"database": ""
},
{
"name": "Other Connection",
"host": "",
"port": 3306,
"username": "",
"password": "",
"database": ""
}
]
self.setup_ui()
def setup_ui(self):
# Ana frame
main_frame = ttk.Frame(self.root, padding="10")
main_frame.pack(fill=tk.BOTH, expand=True)
# Başlık
title_label = ttk.Label(main_frame, text="MySQL Veri Gönderme Aracı",
font=("Arial", 16, "bold"))
title_label.pack(pady=(0, 20))
# Durum çubuğu
status_frame = ttk.Frame(main_frame)
status_frame.pack(fill=tk.X, pady=(0, 10))
ttk.Label(status_frame, text="MySQL Durumu:").pack(side=tk.LEFT)
ttk.Label(status_frame, textvariable=self.connection_status,
foreground="red").pack(side=tk.LEFT, padx=(5, 20))
ttk.Label(status_frame, text="AI Durumu:").pack(side=tk.LEFT)
ttk.Label(status_frame, textvariable=self.ai_status,
foreground="red").pack(side=tk.LEFT, padx=(5, 0))
# Notebook (sekmeli arayüz)
self.notebook = ttk.Notebook(main_frame)
self.notebook.pack(fill=tk.BOTH, expand=True, pady=10)
# Sekmeleri oluştur
self.create_connection_tab()
self.create_database_tab()
self.create_ai_tab()
self.create_data_tab()
self.create_news_tab()
def create_connection_tab(self):
"""MySQL bağlantı sekmesi"""
connection_frame = ttk.Frame(self.notebook, padding="20")
self.notebook.add(connection_frame, text="Bağlantı")
# Bağlantı profili seçimi
profile_frame = ttk.LabelFrame(connection_frame, text="Bağlantı Profili", padding="10")
profile_frame.pack(fill=tk.X, pady=(0, 10))
ttk.Label(profile_frame, text="Connection Profile:").pack(side=tk.LEFT)
self.profile_combobox = ttk.Combobox(profile_frame, width=30, state="readonly",
values=[p["name"] for p in self.connection_profiles])
self.profile_combobox.pack(side=tk.LEFT, padx=(10, 0))
self.profile_combobox.bind("<<ComboboxSelected>>", self.on_profile_selected)
self.profile_combobox.current(0)
# MySQL Bağlantı Bölümü
mysql_frame = ttk.LabelFrame(connection_frame, text="MySQL Bağlantısı", padding="15")
mysql_frame.pack(fill=tk.X, pady=(0, 20))
# Host
ttk.Label(mysql_frame, text="Host:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.host_entry = ttk.Entry(mysql_frame, width=30)
self.host_entry.grid(row=0, column=1, padx=(10, 0), pady=5)
# Port
ttk.Label(mysql_frame, text="Port:").grid(row=1, column=0, sticky=tk.W, pady=5)
self.port_entry = ttk.Entry(mysql_frame, width=30)
self.port_entry.grid(row=1, column=1, padx=(10, 0), pady=5)
# Kullanıcı adı
ttk.Label(mysql_frame, text="Kullanıcı Adı:").grid(row=2, column=0, sticky=tk.W, pady=5)
self.username_entry = ttk.Entry(mysql_frame, width=30)
self.username_entry.grid(row=2, column=1, padx=(10, 0), pady=5)
# Şifre
ttk.Label(mysql_frame, text="Şifre:").grid(row=3, column=0, sticky=tk.W, pady=5)
self.password_entry = ttk.Entry(mysql_frame, width=30, show="*")
self.password_entry.grid(row=3, column=1, padx=(10, 0), pady=5)
# Veritabanı
ttk.Label(mysql_frame, text="Veritabanı:").grid(row=4, column=0, sticky=tk.W, pady=5)
self.database_entry = ttk.Entry(mysql_frame, width=30)
self.database_entry.grid(row=4, column=1, padx=(10, 0), pady=5)
# Bağlan butonu
connect_btn = ttk.Button(mysql_frame, text="Bağlan",
command=self.connect_to_mysql)
connect_btn.grid(row=5, column=0, columnspan=2, pady=20)
# Tablolar listesi
tables_frame = ttk.LabelFrame(connection_frame, text="Tablolar", padding="15")
tables_frame.pack(fill=tk.BOTH, expand=True)
self.tables_tree = ttk.Treeview(tables_frame, columns=("columns",), show="tree headings")
self.tables_tree.heading("#0", text="Tablo Adı")
self.tables_tree.heading("columns", text="Sütunlar")
self.tables_tree.pack(fill=tk.BOTH, expand=True)
# İlk profili otomatik doldur
self.fill_connection_fields_from_profile(0)
def on_profile_selected(self, event):
idx = self.profile_combobox.current()
self.fill_connection_fields_from_profile(idx)
def fill_connection_fields_from_profile(self, idx):
profile = self.connection_profiles[idx]
self.host_entry.delete(0, tk.END)
self.host_entry.insert(0, profile["host"])
self.port_entry.delete(0, tk.END)
self.port_entry.insert(0, str(profile["port"]))
self.username_entry.delete(0, tk.END)
self.username_entry.insert(0, profile["username"])
self.password_entry.delete(0, tk.END)
self.password_entry.insert(0, profile["password"])
self.database_entry.delete(0, tk.END)
self.database_entry.insert(0, profile["database"])
def create_database_tab(self):
"""Veritabanı işlemleri sekmesi"""
db_frame = ttk.Frame(self.notebook, padding="20")
self.notebook.add(db_frame, text="Veritabanı")
# Tablo seçimi
table_frame = ttk.LabelFrame(db_frame, text="Tablo İşlemleri", padding="15")
table_frame.pack(fill=tk.X, pady=(0, 20))
ttk.Label(table_frame, text="Tablo Seç:").pack(side=tk.LEFT)
self.table_combobox = ttk.Combobox(table_frame, width=30)
self.table_combobox.pack(side=tk.LEFT, padx=(10, 0))
self.table_combobox.bind("<<ComboboxSelected>>", self.on_table_selected)
# Veri görüntüleme
data_frame = ttk.LabelFrame(db_frame, text="Tablo Verileri", padding="15")
data_frame.pack(fill=tk.BOTH, expand=True)
# Treeview for data
columns = ("id", "name", "description")
self.data_tree = ttk.Treeview(data_frame, columns=columns, show="headings")
for col in columns:
self.data_tree.heading(col, text=col.title())
self.data_tree.column(col, width=100)
# Scrollbar
scrollbar = ttk.Scrollbar(data_frame, orient=tk.VERTICAL, command=self.data_tree.yview)
self.data_tree.configure(yscrollcommand=scrollbar.set)
self.data_tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
def create_ai_tab(self):
"""AI bağlantı sekmesi"""
ai_frame = ttk.Frame(self.notebook, padding="20")
self.notebook.add(ai_frame, text="AI Bağlantısı")
# AI Seçimi Toggle
toggle_frame = ttk.LabelFrame(ai_frame, text="AI Seçimi", padding="15")
toggle_frame.pack(fill=tk.X, pady=(0, 20))
ttk.Label(toggle_frame, text="AI Türü:").pack(side=tk.LEFT)
toggle_btn = ttk.Checkbutton(toggle_frame, text="Gemini API",
variable=self.ai_toggle_var,
command=self.on_ai_toggle_change)
toggle_btn.pack(side=tk.LEFT, padx=(10, 0))
ttk.Label(toggle_frame, text="(Kapalı: Ollama, Açık: Gemini)").pack(side=tk.LEFT, padx=(5, 0))
# AI Bağlantı Frame'i (tek frame, içerik değişir)
self.ai_connection_frame = ttk.LabelFrame(ai_frame, text="AI Bağlantısı", padding="15")
self.ai_connection_frame.pack(fill=tk.X, pady=(0, 20))
# Ollama Bağlantısı
self.ollama_frame = ttk.Frame(self.ai_connection_frame)
ttk.Label(self.ollama_frame, text="Ollama URL:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.ollama_url_entry = ttk.Entry(self.ollama_frame, width=40)
self.ollama_url_entry.insert(0, "http://localhost:11434")
self.ollama_url_entry.grid(row=0, column=1, padx=(10, 0), pady=5)
ttk.Label(self.ollama_frame, text="Model:").grid(row=1, column=0, sticky=tk.W, pady=5)
self.ollama_model_entry = ttk.Entry(self.ollama_frame, width=40)
self.ollama_model_entry.insert(0, "asd1")
self.ollama_model_entry.grid(row=1, column=1, padx=(10, 0), pady=5)
self.ollama_btn = ttk.Button(self.ollama_frame, text="Ollama Bağlan",
command=self.connect_ollama)
self.ollama_btn.grid(row=2, column=0, columnspan=2, pady=20)
# Gemini API Bağlantısı
self.gemini_frame = ttk.Frame(self.ai_connection_frame)
ttk.Label(self.gemini_frame, text="API Key:").grid(row=0, column=0, sticky=tk.W, pady=5)
self.gemini_key_entry = ttk.Entry(self.gemini_frame, width=50, show="*")
self.gemini_key_entry.grid(row=0, column=1, padx=(10, 0), pady=5)
self.gemini_btn = ttk.Button(self.gemini_frame, text="Gemini Bağlan",
command=self.connect_gemini)
self.gemini_btn.grid(row=1, column=0, columnspan=2, pady=20)
# AI Test
test_frame = ttk.LabelFrame(ai_frame, text="AI Test", padding="15")
test_frame.pack(fill=tk.BOTH, expand=True)
ttk.Label(test_frame, text="Test Mesajı:").pack(anchor=tk.W)
self.test_message_entry = ttk.Entry(test_frame, width=60)
self.test_message_entry.insert(0, "Merhaba, nasılsın?")
self.test_message_entry.pack(fill=tk.X, pady=(5, 10))
test_btn = ttk.Button(test_frame, text="AI'ya Gönder",
command=self.test_ai)
test_btn.pack(pady=(0, 10))
# AI yanıt alanı
ttk.Label(test_frame, text="AI Yanıtı:").pack(anchor=tk.W)
self.ai_response_text = tk.Text(test_frame, height=10, wrap=tk.WORD)
self.ai_response_text.pack(fill=tk.BOTH, expand=True)
# İlk durumu ayarla
self.on_ai_toggle_change()
def on_ai_toggle_change(self):
"""AI toggle değiştiğinde frame'leri göster/gizle"""
# Mevcut frame'leri temizle
for widget in self.ai_connection_frame.winfo_children():
widget.pack_forget()
if self.ai_toggle_var.get(): # Gemini seçili
self.ai_connection_frame.configure(text="Gemini API Bağlantısı")
self.gemini_frame.pack(fill=tk.BOTH, expand=True)
else: # Ollama seçili
self.ai_connection_frame.configure(text="Ollama Bağlantısı")
self.ollama_frame.pack(fill=tk.BOTH, expand=True)
def create_data_tab(self):
"""Veri gönderme sekmesi"""
data_frame = ttk.Frame(self.notebook, padding="20")
self.notebook.add(data_frame, text="Veri Gönder")
# Veri girişi
input_frame = ttk.LabelFrame(data_frame, text="Veri Girişi", padding="15")
input_frame.pack(fill=tk.X, pady=(0, 20))
ttk.Label(input_frame, text="JSON Veri:").pack(anchor=tk.W)
self.json_data_text = tk.Text(input_frame, height=8, wrap=tk.WORD)
self.json_data_text.pack(fill=tk.X, pady=(5, 10))
# Örnek veri
example_data = {
"name": "Örnek Kullanıcı",
"email": "ornek@email.com",
"age": 25
}
self.json_data_text.insert("1.0", json.dumps(example_data, indent=2, ensure_ascii=False))
# Butonlar
button_frame = ttk.Frame(input_frame)
button_frame.pack(fill=tk.X, pady=10)
ttk.Button(button_frame, text="Veriyi Gönder",
command=self.send_data).pack(side=tk.LEFT, padx=(0, 10))
ttk.Button(button_frame, text="AI ile Veri Oluştur",
command=self.generate_data_with_ai).pack(side=tk.LEFT)
# Sonuç alanı
result_frame = ttk.LabelFrame(data_frame, text="Sonuç", padding="15")
result_frame.pack(fill=tk.BOTH, expand=True)
self.result_text = tk.Text(result_frame, wrap=tk.WORD)
self.result_text.pack(fill=tk.BOTH, expand=True)
def create_news_tab(self):
"""Web Haber sekmesi - Gelişmiş tarama metodları ile"""
# Ana container frame
container_frame = ttk.Frame(self.notebook)
self.notebook.add(container_frame, text="Web Haber")
# Canvas ve scrollbar oluştur
canvas = tk.Canvas(container_frame)
scrollbar = ttk.Scrollbar(container_frame, orient="vertical", command=canvas.yview)
scrollable_frame = ttk.Frame(canvas)
scrollable_frame.bind(
"<Configure>",
lambda e: canvas.configure(scrollregion=canvas.bbox("all"))
)
canvas.create_window((0, 0), window=scrollable_frame, anchor="nw")
canvas.configure(yscrollcommand=scrollbar.set)
# Mouse wheel binding
def _on_mousewheel(event):
canvas.yview_scroll(int(-1*(event.delta/120)), "units")
canvas.bind_all("<MouseWheel>", _on_mousewheel)
# Ana içerik frame'i
news_frame = ttk.Frame(scrollable_frame, padding="3")
news_frame.pack(fill=tk.BOTH, expand=True)
# Tarama Metodu Seçimi
method_frame = ttk.LabelFrame(news_frame, text="Tarama Metodu", padding="15")
method_frame.pack(fill=tk.X, pady=(0, 20))
# Metod seçimi için radio butonlar
methods_frame = ttk.Frame(method_frame)
methods_frame.pack(fill=tk.X)
ttk.Radiobutton(methods_frame, text="RSS Feed", variable=self.scraping_method_var,
value="rss", command=self.on_method_change).pack(side=tk.LEFT, padx=(0, 20))
ttk.Radiobutton(methods_frame, text="BeautifulSoup", variable=self.scraping_method_var,
value="beautifulsoup", command=self.on_method_change).pack(side=tk.LEFT, padx=(0, 20))
ttk.Radiobutton(methods_frame, text="Selenium", variable=self.scraping_method_var,
value="selenium", command=self.on_method_change).pack(side=tk.LEFT, padx=(0, 20))
ttk.Radiobutton(methods_frame, text="Newspaper3k", variable=self.scraping_method_var,
value="newspaper", command=self.on_method_change).pack(side=tk.LEFT, padx=(0, 20))
ttk.Radiobutton(methods_frame, text="Requests-HTML", variable=self.scraping_method_var,
value="requests_html", command=self.on_method_change).pack(side=tk.LEFT, padx=(0, 20))
ttk.Radiobutton(methods_frame, text="MechanicalSoup", variable=self.scraping_method_var,
value="mechanicalsoup", command=self.on_method_change).pack(side=tk.LEFT, padx=(0, 20))
ttk.Radiobutton(methods_frame, text="CloudScraper", variable=self.scraping_method_var,
value="cloudscraper", command=self.on_method_change).pack(side=tk.LEFT, padx=(0, 20))
# Kaynak seçimi
source_frame = ttk.LabelFrame(news_frame, text="Haber Kaynağı", padding="15")
source_frame.pack(fill=tk.X, pady=(0, 20))
# RSS kaynakları
self.rss_sources = [
("TechCrunch", "http://feeds.feedburner.com/TechCrunch/"),
("The Verge", "https://www.theverge.com/rss/index.xml"),
("Wired", "https://www.wired.com/feed/rss"),
("Engadget", "https://www.engadget.com/rss.xml"),
("Ars Technica", "http://feeds.arstechnica.com/arstechnica/index"),
("CNET", "https://www.cnet.com/rss/news/"),
("Mashable", "http://feeds.mashable.com/Mashable"),
("Gizmodo", "https://gizmodo.com/rss"),
("ZDNet", "https://www.zdnet.com/news/rss.xml"),
("Digital Trends", "https://www.digitaltrends.com/feed/"),
("VentureBeat", "https://venturebeat.com/feed/"),
("ReadWrite", "https://readwrite.com/feed/"),
("Slashdot", "http://rss.slashdot.org/Slashdot/slashdot"),
("TechRadar", "https://www.techradar.com/rss"),
("PCMag", "https://www.pcmag.com/feeds/rss/all"),
("ExtremeTech", "https://www.extremetech.com/feed"),
("MakeUseOf", "https://www.makeuseof.com/feed/"),
("Tom's Hardware", "https://www.tomshardware.com/feeds/all"),
("AnandTech", "https://www.anandtech.com/rss/"),
("Lifehacker", "https://lifehacker.com/rss"),
("Futurism", "https://futurism.com/feed"),
("ScienceDaily AI", "https://www.sciencedaily.com/rss/computers_math/artificial_intelligence.xml"),
("ScienceDaily Computer Science", "https://www.sciencedaily.com/rss/computers_math/computer_science.xml"),
("AI Trends", "https://www.aitrends.com/feed/"),
("Synced", "https://syncedreview.com/feed/"),
("Next Big Future", "https://www.nextbigfuture.com/feed"),
("InfoWorld", "https://www.infoworld.com/index.rss"),
("Network World", "https://www.networkworld.com/index.rss"),
("The Register", "https://www.theregister.com/headlines.atom"),
("BetaNews", "https://betanews.com/feed/"),
("TechSpot", "https://www.techspot.com/backend.xml"),
("BleepingComputer", "https://www.bleepingcomputer.com/feed/"),
("The Hacker News", "https://feeds.feedburner.com/TheHackersNews"),
("Threatpost", "https://threatpost.com/feed/"),
("Dark Reading", "https://www.darkreading.com/rss.xml"),
("Help Net Security", "https://www.helpnetsecurity.com/feed/"),
("SecurityWeek", "https://feeds.feedburner.com/Securityweek"),
("CoinDesk", "https://www.coindesk.com/arc/outboundfeeds/rss/"),
("Cointelegraph", "https://cointelegraph.com/rss"),
("VentureBeat AI", "https://venturebeat.com/category/ai/feed/"),
("MIT Technology Review", "https://www.technologyreview.com/feed/"),
("IEEE Spectrum", "https://spectrum.ieee.org/rss/fulltext"),
("New Scientist Tech", "https://www.newscientist.com/section/technology/feed/"),
("Popular Science", "https://www.popsci.com/rss.xml"),
("ScienceAlert", "https://www.sciencealert.com/rss"),
("Singularity Hub", "https://singularityhub.com/feed/"),
("Robohub", "https://robohub.org/feed/"),
("Robotics Business Review", "https://www.roboticsbusinessreview.com/feed/"),
("AI News", "https://artificialintelligence-news.com/feed/"),
("Data Science Central", "https://www.datasciencecentral.com/main/feed"),
("KDnuggets", "https://www.kdnuggets.com/feed"),
("Analytics Vidhya", "https://www.analyticsvidhya.com/blog/feed/"),
("Datafloq", "https://datafloq.com/feed/"),
("Towards Data Science", "https://towardsdatascience.com/feed"),
("OpenAI Blog", "https://openai.com/blog/rss/"),
("Google AI Blog", "https://ai.googleblog.com/feeds/posts/default"),
("Microsoft AI Blog", "https://blogs.microsoft.com/ai/feed/"),
("NVIDIA Blog", "https://blogs.nvidia.com/blog/category/ai/feed/"),
("DeepMind Blog", "https://deepmind.com/blog/feed/basic/"),
("Open Robotics", "https://www.openrobotics.org/feed/"),
("Stack Overflow Blog", "https://stackoverflow.blog/feed/"),
("GitHub Blog", "https://github.blog/feed/"),
("Hacker Noon", "https://hackernoon.com/feed"),
("Product Hunt Tech", "https://www.producthunt.com/stories.rss"),
("Reddit r/MachineLearning", "https://www.reddit.com/r/MachineLearning/.rss"),
("Reddit r/artificial", "https://www.reddit.com/r/artificial/.rss"),
("Reddit r/technology", "https://www.reddit.com/r/technology/.rss"),
("Reddit r/Futurology", "https://www.reddit.com/r/Futurology/.rss"),
("Reddit r/computerscience", "https://www.reddit.com/r/computerscience/.rss"),
("Reddit r/robotics", "https://www.reddit.com/r/robotics/.rss"),
("Reddit r/datascience", "https://www.reddit.com/r/datascience/.rss"),
("Reddit r/cybersecurity", "https://www.reddit.com/r/cybersecurity/.rss"),
("Reddit r/programming", "https://www.reddit.com/r/programming/.rss"),
("Reddit r/opensource", "https://www.reddit.com/r/opensource/.rss"),
("Reddit r/gadgets", "https://www.reddit.com/r/gadgets/.rss"),
("Reddit r/hardware", "https://www.reddit.com/r/hardware/.rss"),
("Reddit r/software", "https://www.reddit.com/r/software/.rss"),
("Reddit r/learnprogramming", "https://www.reddit.com/r/learnprogramming/.rss"),
("Reddit r/learnmachinelearning", "https://www.reddit.com/r/learnmachinelearning/.rss"),
("Reddit r/AskComputerScience", "https://www.reddit.com/r/AskComputerScience/.rss"),
]
# Web siteleri (diğer metodlar için)
self.web_sources = [
("TechCrunch", "https://techcrunch.com"),
("The Verge", "https://www.theverge.com"),
("Wired", "https://www.wired.com"),
("Engadget", "https://www.engadget.com"),
("Ars Technica", "https://arstechnica.com"),
("CNET", "https://www.cnet.com"),
("Mashable", "https://mashable.com"),
("Gizmodo", "https://gizmodo.com"),
("ZDNet", "https://www.zdnet.com"),
("Digital Trends", "https://www.digitaltrends.com"),
("VentureBeat", "https://venturebeat.com"),
("ReadWrite", "https://readwrite.com"),
("Slashdot", "https://slashdot.org"),
("TechRadar", "https://www.techradar.com"),
("PCMag", "https://www.pcmag.com"),
("ExtremeTech", "https://www.extremetech.com"),
("MakeUseOf", "https://www.makeuseof.com"),
("Tom's Hardware", "https://www.tomshardware.com"),
("AnandTech", "https://www.anandtech.com"),
("Lifehacker", "https://lifehacker.com"),
("Futurism", "https://futurism.com"),
("ScienceDaily", "https://www.sciencedaily.com"),
("AI Trends", "https://www.aitrends.com"),
("Synced", "https://syncedreview.com"),
("Next Big Future", "https://www.nextbigfuture.com"),
("InfoWorld", "https://www.infoworld.com"),
("Network World", "https://www.networkworld.com"),
("The Register", "https://www.theregister.com"),
("BetaNews", "https://betanews.com"),
("TechSpot", "https://www.techspot.com"),
("BleepingComputer", "https://www.bleepingcomputer.com"),
("The Hacker News", "https://thehackernews.com"),
("Threatpost", "https://threatpost.com"),
("Dark Reading", "https://www.darkreading.com"),
("Help Net Security", "https://www.helpnetsecurity.com"),
("SecurityWeek", "https://www.securityweek.com"),
("CoinDesk", "https://www.coindesk.com"),
("Cointelegraph", "https://cointelegraph.com"),
("MIT Technology Review", "https://www.technologyreview.com"),
("IEEE Spectrum", "https://spectrum.ieee.org"),
("New Scientist", "https://www.newscientist.com"),
("Popular Science", "https://www.popsci.com"),
("ScienceAlert", "https://www.sciencealert.com"),
("Singularity Hub", "https://singularityhub.com"),
("Robohub", "https://robohub.org"),
("Robotics Business Review", "https://www.roboticsbusinessreview.com"),
("AI News", "https://artificialintelligence-news.com"),
("Data Science Central", "https://www.datasciencecentral.com"),
("KDnuggets", "https://www.kdnuggets.com"),
("Analytics Vidhya", "https://www.analyticsvidhya.com"),
("Datafloq", "https://datafloq.com"),
("Towards Data Science", "https://towardsdatascience.com"),
("OpenAI Blog", "https://openai.com/blog"),
("Google AI Blog", "https://ai.googleblog.com"),
("Microsoft AI Blog", "https://blogs.microsoft.com/ai"),
("NVIDIA Blog", "https://blogs.nvidia.com"),
("DeepMind Blog", "https://deepmind.com/blog"),
("Open Robotics", "https://www.openrobotics.org"),
("Stack Overflow Blog", "https://stackoverflow.blog"),
("GitHub Blog", "https://github.blog"),
("Hacker Noon", "https://hackernoon.com"),
("Product Hunt", "https://www.producthunt.com"),
]
# Kaynak seçimi
ttk.Label(source_frame, text="Kaynak:").pack(side=tk.LEFT)
self.source_var = tk.StringVar()
self.source_combobox = ttk.Combobox(source_frame, width=40, state="readonly")
self.source_combobox.pack(side=tk.LEFT, padx=(10, 0))
self.source_combobox.bind("<<ComboboxSelected>>", self.on_source_change)
# URL girişi
ttk.Label(source_frame, text="URL:").pack(side=tk.LEFT, padx=(20, 0))
self.url_entry = ttk.Entry(source_frame, width=50)
self.url_entry.pack(side=tk.LEFT, padx=(5, 0))
# Tablo seçimi
ttk.Label(source_frame, text="Tablo:").pack(side=tk.LEFT, padx=(20, 0))
self.news_table_combobox = ttk.Combobox(source_frame, width=20, state="readonly")
self.news_table_combobox.pack(side=tk.LEFT, padx=(5, 0))
self.news_table_combobox.bind("<<ComboboxSelected>>", self.on_news_table_selected)
# Tarama Ayarları
settings_frame = ttk.LabelFrame(news_frame, text="Tarama Ayarları", padding="15")
settings_frame.pack(fill=tk.X, pady=(0, 20))
# Sol taraf - Temel ayarlar
left_settings = ttk.Frame(settings_frame)
left_settings.pack(side=tk.LEFT, fill=tk.X, expand=True)
ttk.Label(left_settings, text="Gecikme (saniye):").grid(row=0, column=0, sticky=tk.W, pady=2)
self.delay_entry = ttk.Entry(left_settings, width=10)
self.delay_entry.insert(0, "1.0")
self.delay_entry.grid(row=0, column=1, padx=(5, 20), pady=2)
ttk.Label(left_settings, text="Timeout (saniye):").grid(row=0, column=2, sticky=tk.W, pady=2)
self.timeout_entry = ttk.Entry(left_settings, width=10)
self.timeout_entry.insert(0, "30")
self.timeout_entry.grid(row=0, column=3, padx=(5, 20), pady=2)
ttk.Label(left_settings, text="Maksimum Deneme:").grid(row=0, column=4, sticky=tk.W, pady=2)
self.retries_entry = ttk.Entry(left_settings, width=10)
self.retries_entry.insert(0, "3")
self.retries_entry.grid(row=0, column=5, padx=(5, 0), pady=2)
# Sağ taraf - Özel ayarlar
right_settings = ttk.Frame(settings_frame)
right_settings.pack(side=tk.RIGHT, fill=tk.X)
self.headless_var = tk.BooleanVar(value=True)
ttk.Checkbutton(right_settings, text="Headless Browser",
variable=self.headless_var).pack(side=tk.LEFT, padx=(0, 10))
self.proxy_var = tk.BooleanVar(value=False)
ttk.Checkbutton(right_settings, text="Proxy Kullan",
variable=self.proxy_var).pack(side=tk.LEFT, padx=(0, 10))
self.ai_summary_var = tk.BooleanVar(value=False)
ttk.Checkbutton(right_settings, text="AI ile Özetle",
variable=self.ai_summary_var).pack(side=tk.LEFT)
# Sütun eşleştirme alanı
self.mapping_frame = ttk.LabelFrame(news_frame, text="Sütun Eşleştirme", padding="10")
self.mapping_frame.pack(fill=tk.X, pady=(0, 10))
self.mapping_title = ttk.Combobox(self.mapping_frame, width=20, state="readonly")
self.mapping_content = ttk.Combobox(self.mapping_frame, width=20, state="readonly")
self.mapping_image = ttk.Combobox(self.mapping_frame, width=20, state="readonly")
self.mapping_category = ttk.Combobox(self.mapping_frame, width=20, state="readonly")
self.mapping_date = ttk.Combobox(self.mapping_frame, width=20, state="readonly")
ttk.Label(self.mapping_frame, text="Başlık:").grid(row=0, column=0, padx=5, pady=5)
self.mapping_title.grid(row=0, column=1, padx=5, pady=5)
ttk.Label(self.mapping_frame, text="İçerik:").grid(row=0, column=2, padx=5, pady=5)
self.mapping_content.grid(row=0, column=3, padx=5, pady=5)
ttk.Label(self.mapping_frame, text="Resim URL:").grid(row=0, column=4, padx=5, pady=5)
self.mapping_image.grid(row=0, column=5, padx=5, pady=5)
ttk.Label(self.mapping_frame, text="Kategori:").grid(row=0, column=6, padx=5, pady=5)
self.mapping_category.grid(row=0, column=7, padx=5, pady=5)
ttk.Label(self.mapping_frame, text="Tarih:").grid(row=0, column=8, padx=5, pady=5)
self.mapping_date.grid(row=0, column=9, padx=5, pady=5)
# Kontrol butonları
control_frame = ttk.Frame(news_frame)
control_frame.pack(fill=tk.X, pady=(0, 3))
self.scrape_btn = ttk.Button(control_frame, text="Tarama Başlat",
command=self.start_scraping)
self.scrape_btn.pack(side=tk.LEFT, padx=(0, 3))
self.stop_btn = ttk.Button(control_frame, text="Durdur",
command=self.stop_scraping, state="disabled")
self.stop_btn.pack(side=tk.LEFT, padx=(0, 3))
ttk.Label(control_frame, text="Durum:").pack(side=tk.LEFT, padx=(20, 5))
ttk.Label(control_frame, textvariable=self.scraping_status,
foreground="blue").pack(side=tk.LEFT, padx=(0, 10))
# Progress bar
self.progress_bar = ttk.Progressbar(control_frame, variable=self.scraping_progress,
maximum=100)
self.progress_bar.pack(side=tk.LEFT, fill=tk.X, expand=True, padx=(0, 20))
# Haber listesi
news_list_frame = ttk.LabelFrame(news_frame, text="Haberler", padding="15")
news_list_frame.pack(fill=tk.BOTH,padx=(100, 0))
# Treeview
self.news_tree = ttk.Treeview(news_list_frame,
columns=("title", "summary", "image_url", "date", "source"),
show="headings", selectmode="extended")
self.news_tree.heading("title", text="Başlık")
self.news_tree.heading("summary", text="İçerik")
self.news_tree.heading("image_url", text="Resim URL")
self.news_tree.heading("date", text="Tarih")
self.news_tree.heading("source", text="Kaynak")
self.news_tree.column("title", width=250)
self.news_tree.column("summary", width=400)
self.news_tree.column("image_url", width=150)
self.news_tree.column("date", width=100)
self.news_tree.column("source", width=100)
# Scrollbar
scrollbar = ttk.Scrollbar(news_list_frame, orient=tk.VERTICAL, command=self.news_tree.yview)
self.news_tree.configure(yscrollcommand=scrollbar.set)
self.news_tree.pack(side=tk.LEFT, fill=tk.BOTH, expand=True)
scrollbar.pack(side=tk.RIGHT, fill=tk.Y)
# Veritabanına gönder butonları
btn_frame = ttk.Frame(news_frame)
btn_frame.pack(pady=0)
ttk.Button(btn_frame, text="Seçili Haberi Kaydet",
command=self.save_selected_news_to_db).pack(side=tk.BOTTOM, padx=0)
ttk.Button(btn_frame, text="Seçili Tümünü Kaydet",
command=self.save_all_selected_news_to_db).pack(side=tk.LEFT, padx=0)
ttk.Button(btn_frame, text="Tümünü Kaydet",
command=self.save_all_news_to_db).pack(side=tk.LEFT, padx=0)
ttk.Button(btn_frame, text="AI ile Özetle",
command=self.summarize_with_ai).pack(side=tk.LEFT, padx=0)
# İlk metod için kaynakları ayarla
self.on_method_change()
# Canvas ve scrollbar'ı pack et
canvas.pack(side="left", fill="both", expand=True)
scrollbar.pack(side="right", fill="y")
def on_method_change(self):
"""Metod değiştiğinde kaynakları ve URL girişini güncelle"""
selected_method = self.scraping_method_var.get()
if selected_method == "rss":
self.source_combobox['values'] = [name for name, url in self.rss_sources]
self.source_combobox.current(0)
else:
self.source_combobox['values'] = [name for name, url in self.web_sources]
self.source_combobox.current(0)
def on_source_change(self, event):
selected_idx = self.source_combobox.current()
if selected_idx >= 0:
method = self.scraping_method_var.get()
if method == "rss":
source_url = self.rss_sources[selected_idx][1]
else:
source_url = self.web_sources[selected_idx][1]
self.url_entry.delete(0, tk.END)
self.url_entry.insert(0, source_url)
def on_news_table_selected(self, event):
selected_table = self.news_table_combobox.get()
if not selected_table:
self.mapping_title['values'] = []
self.mapping_content['values'] = []
self.mapping_image['values'] = []
self.mapping_category['values'] = []
self.mapping_date['values'] = []
return
try:
columns = self.get_longtext_columns(selected_table)
self.mapping_title['values'] = columns
self.mapping_content['values'] = columns
self.mapping_image['values'] = columns
self.mapping_category['values'] = columns
self.mapping_date['values'] = columns
# Otomatik eşleştirme önerisi
for col in columns:
if col.lower() in ["title", "baslik", "name"]:
self.mapping_title.set(col)
if col.lower() in ["content", "icerik", "summary", "description"]:
self.mapping_content.set(col)
if col.lower() in ["image", "image_url", "resim", "resim_url"]:
self.mapping_image.set(col)
if col.lower() in ["kategori", "category"]:
self.mapping_category.set(col)
if col.lower() in ["date", "tarih"]:
self.mapping_date.set(col)
except Exception as e:
self.mapping_title['values'] = []
self.mapping_content['values'] = []
self.mapping_image['values'] = []
self.mapping_category['values'] = []
self.mapping_date['values'] = []
messagebox.showerror("Hata", f"Tablo sütunları alınamadı: {str(e)}")
def get_longtext_columns(self, table_name):
"""Tablodaki LONGTEXT sütunları döndür"""
try:
self.db_manager.cursor.execute(f"SHOW FULL COLUMNS FROM {table_name}")
columns = [row['Field'] for row in self.db_manager.cursor.fetchall() if row['Type'].upper()]
return columns
except Exception as e:
messagebox.showerror("Hata", f"LONGTEXT sütunları alınamadı: {str(e)}")
return []
def save_selected_news_to_db(self):
"""Seçili haberi veritabanına kaydet"""
selected = self.news_tree.selection()
if not selected:
messagebox.showerror("Hata", "Lütfen bir veya birden fazla haber seçin!")
return
self._save_news_items_to_db(selected)
def save_all_selected_news_to_db(self):
"""Seçili tüm haberleri veritabanına kaydet"""
selected = self.news_tree.selection()
if not selected:
# Hiçbiri seçili değilse, tüm haberleri gönder
all_items = self.news_tree.get_children()
if not all_items:
messagebox.showerror("Hata", "Gönderilecek haber yok!")
return
self._save_news_items_to_db(all_items)
else:
self._save_news_items_to_db(selected)
def _save_news_items_to_db(self, items):
selected_table = self.news_table_combobox.get()
if not selected_table:
messagebox.showerror("Hata", "Lütfen bir tablo seçin (üstteki Tablo kutusundan)!")
return
col_title = self.mapping_title.get().strip() if self.mapping_title.get() else None
col_content = self.mapping_content.get().strip() if self.mapping_content.get() else None
col_image = self.mapping_image.get().strip() if self.mapping_image.get() else None
col_category = self.mapping_category.get().strip() if self.mapping_category.get() else None
selected_cols = [c for c in [col_title, col_content, col_image, col_category] if c]
if len(selected_cols) != len(set(selected_cols)):
messagebox.showerror("Hata", "Aynı tablo sütunu birden fazla alana eşlenemez!")
return
if not col_title or not col_content:
messagebox.showerror("Hata", "Başlık ve İçerik için eşleştirme yapmalısınız!")
return
try:
columns = self.get_longtext_columns(selected_table)
except Exception as e:
messagebox.showerror("Hata", f"Tablo sütunları alınamadı: {str(e)}")
return
if not columns:
messagebox.showerror("Hata", "Seçili tabloda LONGTEXT tipinde sütun yok! Lütfen uygun tablo seçin.")
return
for c in [col_title, col_content, col_image, col_category]:
if c and c not in columns:
messagebox.showerror("Hata", f"Seçilen sütun '{c}' tablonuzda yok veya LONGTEXT değil!")
return
# Mevcut başlıkları çek (tekrarı önlemek için)
try:
existing_titles = set()
self.db_manager.cursor.execute(f"SELECT {col_title} FROM {selected_table}")
for row in self.db_manager.cursor.fetchall():
val = row.get(col_title)
if val:
existing_titles.add(val.strip())
except Exception as e:
messagebox.showerror("Hata", f"Mevcut başlıklar alınamadı: {str(e)}")
return
# Kaynak adı (kategori için)
source_idx = self.source_combobox.current()
method = self.scraping_method_var.get()
if method == "rss" and source_idx >= 0:
source_name = self.rss_sources[source_idx][0]
elif source_idx >= 0:
source_name = self.web_sources[source_idx][0]
else:
source_name = "Kaynak"
# Her haber için ekleme işlemi
success_count = 0
fail_count = 0
skipped_count = 0
for item_id in items:
item = self.news_tree.item(item_id)
title, summary, image_url, date, source = item['values']
if title.strip() in existing_titles:
skipped_count += 1
continue
data = {}
for col in columns:
if col == col_title:
data[col] = title
elif col == col_content:
data[col] = summary
elif col_image and col == col_image:
data[col] = image_url
elif col_category and col == col_category:
data[col] = source_name
else:
data[col] = None
try:
success = self.db_manager.insert_data(selected_table, data)
if success:
success_count += 1
existing_titles.add(title.strip())
else:
fail_count += 1
except Exception as e:
fail_count += 1
msg = []
if success_count > 0:
msg.append(f"{success_count} haber başarıyla kaydedildi!")
self.load_table_data(selected_table)
if skipped_count > 0:
msg.append(f"{skipped_count} haber zaten vardı, eklenmedi.")
if fail_count > 0:
msg.append(f"{fail_count} haber kaydedilemedi! Lütfen eşleştirmeleri ve tabloyu kontrol edin.")
if msg:
messagebox.showinfo("Sonuç", '\n'.join(msg))
def connect_to_mysql(self):
"""MySQL'e bağlan"""
try:
host = self.host_entry.get()
port = int(self.port_entry.get())
username = self.username_entry.get()
password = self.password_entry.get()
database = self.database_entry.get()
success = self.db_manager.connect(host, port, username, password, database)
if success:
self.connection_status.set("Bağlı")
messagebox.showinfo("Başarılı", "MySQL bağlantısı kuruldu!")
self.load_tables()
else:
messagebox.showerror("Hata", "MySQL bağlantısı kurulamadı!")
except Exception as e:
messagebox.showerror("Hata", f"Bağlantı hatası: {str(e)}")
def load_tables(self):
"""Tabloları yükle"""
try:
tables = self.db_manager.get_tables()
self.tables_tree.delete(*self.tables_tree.get_children())
for table in tables:
columns = self.db_manager.get_table_columns(table)
column_str = ", ".join(columns)
self.tables_tree.insert("", "end", text=table, values=(column_str,))
# Combobox'ı güncelle
self.table_combobox['values'] = tables
# Web Haber sekmesindeki tablo combobox'unu da güncelle
if hasattr(self, 'news_table_combobox'):
self.news_table_combobox['values'] = tables
except Exception as e:
messagebox.showerror("Hata", f"Tablolar yüklenirken hata: {str(e)}")
def on_table_selected(self, event):
"""Tablo seçildiğinde verileri yükle"""
selected_table = self.table_combobox.get()
if selected_table:
self.load_table_data(selected_table)
def load_table_data(self, table_name):
"""Tablo verilerini yükle"""
try:
data = self.db_manager.get_table_data(table_name)
# Treeview'ı temizle ve sütunları ayarla
self.data_tree.delete(*self.data_tree.get_children())
if data:
# Sütunları ayarla
columns = list(data[0].keys())
self.data_tree['columns'] = columns
for col in columns:
self.data_tree.heading(col, text=col.title())
self.data_tree.column(col, width=100)
# Verileri ekle
for row in data:
values = [row.get(col, "") for col in columns]
self.data_tree.insert("", "end", values=values)
except Exception as e:
messagebox.showerror("Hata", f"Veri yüklenirken hata: {str(e)}")
def connect_ollama(self):
"""Ollama'ya bağlan"""
try:
url = self.ollama_url_entry.get()
model = self.ollama_model_entry.get()
success = self.ai_manager.connect_ollama(url, model)
if success:
self.ai_status.set("Ollama Bağlı")
messagebox.showinfo("Başarılı", "Ollama bağlantısı kuruldu!")
else:
messagebox.showerror("Hata", "Ollama bağlantısı kurulamadı!")
except Exception as e:
messagebox.showerror("Hata", f"Ollama bağlantı hatası: {str(e)}")
def connect_gemini(self):
"""Gemini API'ye bağlan"""
try:
api_key = self.gemini_key_entry.get()
success = self.ai_manager.connect_gemini(api_key)
if success:
self.ai_status.set("Gemini Bağlı")
messagebox.showinfo("Başarılı", "Gemini API bağlantısı kuruldu!")
else:
messagebox.showerror("Hata", "Gemini API bağlantısı kurulamadı!")
except Exception as e:
messagebox.showerror("Hata", f"Gemini bağlantı hatası: {str(e)}")
def test_ai(self):
"""AI'yı test et"""
try:
message = self.test_message_entry.get()
response = self.ai_manager.send_message(message)
self.ai_response_text.delete("1.0", tk.END)
self.ai_response_text.insert("1.0", response)
except Exception as e:
messagebox.showerror("Hata", f"AI test hatası: {str(e)}")
def send_data(self):
"""Veriyi MySQL'e gönder"""
try:
json_str = self.json_data_text.get("1.0", tk.END).strip()
data = json.loads(json_str)
selected_table = self.table_combobox.get()
if not selected_table:
messagebox.showerror("Hata", "Lütfen bir tablo seçin!")
return
success = self.db_manager.insert_data(selected_table, data)
if success:
self.result_text.delete("1.0", tk.END)
self.result_text.insert("1.0", "Veri başarıyla eklendi!")
self.load_table_data(selected_table) # Tabloyu yenile
else:
self.result_text.delete("1.0", tk.END)
self.result_text.insert("1.0", "Veri eklenirken hata oluştu!")
except json.JSONDecodeError:
messagebox.showerror("Hata", "Geçersiz JSON formatı!")
except Exception as e:
messagebox.showerror("Hata", f"Veri gönderme hatası: {str(e)}")
def generate_data_with_ai(self):
"""AI ile veri oluştur"""
try:
selected_table = self.table_combobox.get()
if not selected_table:
messagebox.showerror("Hata", "Lütfen bir tablo seçin!")
return
columns = self.db_manager.get_table_columns(selected_table)
prompt = f"'{selected_table}' tablosu için örnek veri oluştur. Sütunlar: {', '.join(columns)}. JSON formatında döndür."
response = self.ai_manager.send_message(prompt)
# JSON'u çıkar
import re
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
json_str = json_match.group()
self.json_data_text.delete("1.0", tk.END)
self.json_data_text.insert("1.0", json_str)
self.result_text.delete("1.0", tk.END)
self.result_text.insert("1.0", "AI ile veri oluşturuldu!")
else:
self.result_text.delete("1.0", tk.END)
self.result_text.insert("1.0", "AI yanıtından JSON çıkarılamadı.")
except Exception as e:
messagebox.showerror("Hata", f"AI veri oluşturma hatası: {str(e)}")
# ==================== YENİ TARAMA METODLARI ====================
def start_scraping(self):
"""Tarama işlemini başlat"""
if self.is_scraping:
messagebox.showwarning("Uyarı", "Tarama zaten devam ediyor!")
return
url = self.url_entry.get().strip()
if not url:
messagebox.showerror("Hata", "Lütfen bir URL girin!")
return
self.is_scraping = True
self.scraping_status.set("Tarama başlatılıyor...")
self.scrape_btn.config(state="disabled")
self.stop_btn.config(state="normal")
self.scraping_progress.set(0)
# Tarama işlemini ayrı thread'de başlat
threading.Thread(target=self._scrape_worker, args=(url,), daemon=True).start()
def stop_scraping(self):
"""Tarama işlemini durdur"""
self.is_scraping = False
self.scraping_status.set("Durduruldu")
self.scrape_btn.config(state="normal")
self.stop_btn.config(state="disabled")
def _scrape_worker(self, url):
"""Tarama işlemini gerçekleştiren worker thread"""
try:
method = self.scraping_method_var.get()
self.scraping_status.set(f"{method.upper()} ile tarama başlatıldı...")
if method == "rss":
self._scrape_rss(url)
elif method == "beautifulsoup":
self._scrape_beautifulsoup(url)
elif method == "selenium":
self._scrape_selenium(url)
elif method == "newspaper":
self._scrape_newspaper(url)
elif method == "requests_html":
self._scrape_requests_html(url)
elif method == "mechanicalsoup":
self._scrape_mechanicalsoup(url)
elif method == "cloudscraper":
self._scrape_cloudscraper(url)
else:
raise ValueError(f"Bilinmeyen tarama metodu: {method}")
except Exception as e:
self.scraping_status.set(f"Hata: {str(e)}")
messagebox.showerror("Tarama Hatası", str(e))
finally:
self.is_scraping = False
self.scrape_btn.config(state="normal")
self.stop_btn.config(state="disabled")
def _scrape_rss(self, url):
"""RSS feed'den haber çek"""
self.scraping_status.set("RSS feed okunuyor...")
feed = feedparser.parse(url)
self.scraped_data = []
total_entries = len(feed.entries)
for i, entry in enumerate(feed.entries):
if not self.is_scraping:
break
try:
title = entry.title
summary = getattr(entry, 'summary', '') or getattr(entry, 'description', '')
# HTML tag'lerini temizle
summary = re.sub(r'<[^>]+>', '', summary)
summary = unescape(summary)
summary = re.sub(r'\s+', ' ', summary).strip()
# Resim URL'si bul
image_url = ''
if hasattr(entry, 'media_content') and entry.media_content:
image_url = entry.media_content[0].get('url', '')
elif hasattr(entry, 'media_thumbnail') and entry.media_thumbnail:
image_url = entry.media_thumbnail[0].get('url', '')
elif hasattr(entry, 'enclosures') and entry.enclosures:
image_url = entry.enclosures[0].get('href', '')
# Tarih
date = ''
if hasattr(entry, 'published'):
date = entry.published
elif hasattr(entry, 'updated'):
date = entry.updated
# Kaynak
source = feed.feed.get('title', 'RSS Feed')
self.scraped_data.append({
'title': title,
'summary': summary,
'image_url': image_url,
'date': date,
'source': source
})
# Progress güncelle
progress = (i + 1) / total_entries * 100
self.scraping_progress.set(progress)
self.scraping_status.set(f"RSS: {i+1}/{total_entries} haber işlendi")
time.sleep(float(self.delay_entry.get()))
except Exception as e:
print(f"RSS entry hatası: {e}")
continue
self._update_news_tree()
def _scrape_beautifulsoup(self, url):
"""BeautifulSoup ile web sitesinden haber çek"""
self.scraping_status.set("BeautifulSoup ile tarama başlatıldı...")
headers = {'User-Agent': self.scraping_settings['user_agent']}
response = requests.get(url, headers=headers, timeout=int(self.timeout_entry.get()))
response.raise_for_status()
soup = BeautifulSoup(response.content, 'html.parser')
self.scraped_data = []
# Yaygın haber seçicileri
selectors = [
'article', '.article', '.post', '.news-item', '.story',
'h1', 'h2', 'h3', '.headline', '.title',
'.content', '.summary', '.excerpt', '.description'
]
articles = []
for selector in selectors:
articles.extend(soup.select(selector))
if len(articles) > 10: # Yeterli sayıda bulduysak dur
break
for i, article in enumerate(articles[:20]): # Maksimum 20 haber
if not self.is_scraping:
break
try:
# Başlık
title_elem = article.find(['h1', 'h2', 'h3', 'h4']) or article.find(class_=re.compile(r'title|headline'))
title = title_elem.get_text().strip() if title_elem else ''
# İçerik
content_elem = article.find(['p', 'div']) or article.find(class_=re.compile(r'content|summary|excerpt'))
content = content_elem.get_text().strip() if content_elem else ''
# Resim
img_elem = article.find('img')
image_url = img_elem.get('src', '') if img_elem else ''
if image_url and not image_url.startswith('http'):
image_url = urljoin(url, image_url)
# Tarih
date_elem = article.find(class_=re.compile(r'date|time|published'))
date = date_elem.get_text().strip() if date_elem else ''
if title and content:
self.scraped_data.append({
'title': title,
'summary': content[:500] + '...' if len(content) > 500 else content,
'image_url': image_url,
'date': date,
'source': urlparse(url).netloc
})
self.scraping_progress.set((i + 1) / len(articles) * 100)
self.scraping_status.set(f"BeautifulSoup: {i+1}/{len(articles)} haber işlendi")
time.sleep(float(self.delay_entry.get()))
except Exception as e:
print(f"BeautifulSoup article hatası: {e}")
continue
self._update_news_tree()
def _scrape_selenium(self, url):
"""Selenium ile web sitesinden haber çek"""
self.scraping_status.set("Selenium ile tarama başlatıldı...")
options = Options()
if self.headless_var.get():
options.add_argument('--headless')
options.add_argument('--no-sandbox')
options.add_argument('--disable-dev-shm-usage')
options.add_argument(f'--user-agent={self.scraping_settings["user_agent"]}')
driver = None
try:
driver = webdriver.Chrome(options=options)
driver.set_page_load_timeout(int(self.timeout_entry.get()))
driver.get(url)
# Sayfanın yüklenmesini bekle
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.TAG_NAME, "body"))
)
# JavaScript ile sayfa kaydırma (lazy loading için)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(2)
# Haber elementlerini bul
selectors = [
'article', '.article', '.post', '.news-item', '.story',
'h1', 'h2', 'h3', '.headline', '.title'
]
articles = []
for selector in selectors:
elements = driver.find_elements(By.CSS_SELECTOR, selector)
articles.extend(elements)
if len(articles) > 15:
break
self.scraped_data = []
for i, article in enumerate(articles[:15]):
if not self.is_scraping:
break
try:
# Başlık
title_elem = article.find_element(By.CSS_SELECTOR, 'h1, h2, h3, h4, .title, .headline')
title = title_elem.text.strip()
# İçerik
try:
content_elem = article.find_element(By.CSS_SELECTOR, 'p, .content, .summary, .excerpt')
content = content_elem.text.strip()
except:
content = article.text.strip()
# Resim
try:
img_elem = article.find_element(By.TAG_NAME, 'img')
image_url = img_elem.get_attribute('src')
except:
image_url = ''
if title and content:
self.scraped_data.append({
'title': title,
'summary': content[:500] + '...' if len(content) > 500 else content,
'image_url': image_url,
'date': '',
'source': urlparse(url).netloc
})
self.scraping_progress.set((i + 1) / len(articles) * 100)
self.scraping_status.set(f"Selenium: {i+1}/{len(articles)} haber işlendi")
except Exception as e:
print(f"Selenium article hatası: {e}")
continue
finally:
if driver:
driver.quit()
self._update_news_tree()
def _scrape_newspaper(self, url):
"""Newspaper3k ile haber çek"""
self.scraping_status.set("Newspaper3k ile tarama başlatıldı...")
try:
# Siteyi yapılandır
site = newspaper.build(url, memoize_articles=False)
self.scraped_data = []
articles = site.articles[:10] # İlk 10 makale
for i, article in enumerate(articles):
if not self.is_scraping:
break
try:
article.download()
article.parse()
if article.title and article.text:
self.scraped_data.append({
'title': article.title,
'summary': article.text[:500] + '...' if len(article.text) > 500 else article.text,
'image_url': article.top_image if hasattr(article, 'top_image') else '',
'date': str(article.publish_date) if article.publish_date else '',
'source': urlparse(url).netloc
})
self.scraping_progress.set((i + 1) / len(articles) * 100)
self.scraping_status.set(f"Newspaper3k: {i+1}/{len(articles)} haber işlendi")
time.sleep(float(self.delay_entry.get()))
except Exception as e:
print(f"Newspaper3k article hatası: {e}")
continue
except Exception as e:
raise Exception(f"Newspaper3k hatası: {e}")
self._update_news_tree()
def _scrape_requests_html(self, url):
"""Requests-HTML ile haber çek"""
self.scraping_status.set("Requests-HTML ile tarama başlatıldı...")
session = HTMLSession()
response = session.get(url)
response.html.render(timeout=int(self.timeout_entry.get()))
self.scraped_data = []
# Haber elementlerini bul
selectors = [
'article', '.article', '.post', '.news-item', '.story',
'h1', 'h2', 'h3', '.headline', '.title'
]
articles = []
for selector in selectors:
elements = response.html.find(selector)
articles.extend(elements)
if len(articles) > 15:
break
for i, article in enumerate(articles[:15]):
if not self.is_scraping:
break
try:
# Başlık
title_elem = article.find('h1, h2, h3, h4, .title, .headline', first=True)
title = title_elem.text.strip() if title_elem else ''
# İçerik
content_elem = article.find('p, .content, .summary, .excerpt', first=True)
content = content_elem.text.strip() if content_elem else article.text.strip()
# Resim
img_elem = article.find('img', first=True)
image_url = img_elem.attrs.get('src', '') if img_elem else ''
if title and content:
self.scraped_data.append({
'title': title,
'summary': content[:500] + '...' if len(content) > 500 else content,
'image_url': image_url,
'date': '',
'source': urlparse(url).netloc
})
self.scraping_progress.set((i + 1) / len(articles) * 100)
self.scraping_status.set(f"Requests-HTML: {i+1}/{len(articles)} haber işlendi")
except Exception as e:
print(f"Requests-HTML article hatası: {e}")
continue
self._update_news_tree()
def _scrape_mechanicalsoup(self, url):
"""MechanicalSoup ile haber çek"""
self.scraping_status.set("MechanicalSoup ile tarama başlatıldı...")
browser = mechanicalsoup.Browser()
page = browser.get(url)
self.scraped_data = []
# Haber elementlerini bul
selectors = [
'article', '.article', '.post', '.news-item', '.story',
'h1', 'h2', 'h3', '.headline', '.title'
]
articles = []
for selector in selectors:
elements = page.soup.select(selector)
articles.extend(elements)
if len(articles) > 15:
break
for i, article in enumerate(articles[:15]):
if not self.is_scraping:
break
try:
# Başlık
title_elem = article.select_one('h1, h2, h3, h4, .title, .headline')
title = title_elem.get_text().strip() if title_elem else ''
# İçerik
content_elem = article.select_one('p, .content, .summary, .excerpt')
content = content_elem.get_text().strip() if content_elem else article.get_text().strip()
# Resim
img_elem = article.select_one('img')
image_url = img_elem.get('src', '') if img_elem else ''
if title and content:
self.scraped_data.append({
'title': title,
'summary': content[:500] + '...' if len(content) > 500 else content,
'image_url': image_url,
'date': '',
'source': urlparse(url).netloc
})
self.scraping_progress.set((i + 1) / len(articles) * 100)
self.scraping_status.set(f"MechanicalSoup: {i+1}/{len(articles)} haber işlendi")
except Exception as e:
print(f"MechanicalSoup article hatası: {e}")
continue
self._update_news_tree()
def _scrape_cloudscraper(self, url):
"""CloudScraper ile korumalı sitelerden haber çek"""
self.scraping_status.set("CloudScraper ile tarama başlatıldı...")
scraper = cloudscraper.create_scraper()
response = scraper.get(url)
soup = BeautifulSoup(response.content, 'html.parser')
self.scraped_data = []
# Haber elementlerini bul
selectors = [
'article', '.article', '.post', '.news-item', '.story',
'h1', 'h2', 'h3', '.headline', '.title'
]
articles = []
for selector in selectors:
elements = soup.select(selector)
articles.extend(elements)
if len(articles) > 15:
break
for i, article in enumerate(articles[:15]):
if not self.is_scraping:
break
try:
# Başlık
title_elem = article.select_one('h1, h2, h3, h4, .title, .headline')
title = title_elem.get_text().strip() if title_elem else ''
# İçerik
content_elem = article.select_one('p, .content, .summary, .excerpt')
content = content_elem.get_text().strip() if content_elem else article.get_text().strip()
# Resim
img_elem = article.select_one('img')
image_url = img_elem.get('src', '') if img_elem else ''
if title and content:
self.scraped_data.append({
'title': title,
'summary': content[:500] + '...' if len(content) > 500 else content,
'image_url': image_url,
'date': '',
'source': urlparse(url).netloc
})
self.scraping_progress.set((i + 1) / len(articles) * 100)
self.scraping_status.set(f"CloudScraper: {i+1}/{len(articles)} haber işlendi")
except Exception as e:
print(f"CloudScraper article hatası: {e}")
continue
self._update_news_tree()
def _update_news_tree(self):
"""Haber listesini güncelle"""
self.news_tree.delete(*self.news_tree.get_children())
# AI özetleme seçili mi kontrol et
use_ai_summary = self.ai_summary_var.get() and self.ai_manager.is_connected()
for item in self.scraped_data:
summary = item['summary']
# AI özetleme aktifse ve AI bağlıysa
if use_ai_summary and len(summary) > 100: # Sadece uzun metinleri özetle
try:
prompt = f"""Aşağıdaki haber metnini Türkçe olarak yeniden oluştur makale haline getir ve düzenle.
Başlık: {item['title']}
İçerik: {summary}
Özetleme kuralları:
1. Ana fikirleri koru
3. Akıcı ve anlaşılır Türkçe kullan
4. 5000-10000 kelime arasında tut
5. Paragraflar halinde düzenle
6. Teknik terimleri açıkla
Özetlenmiş metin:"""
ai_summary = self.ai_manager.send_message(prompt)
summary = ai_summary
except Exception as e:
print(f"AI özetleme hatası: {e}")
# Hata durumunda orijinal metni kullan
pass
self.news_tree.insert("", "end", values=(
item['title'],
summary,
item['image_url'],
item['date'],
item['source']
))
status_text = f"Tamamlandı! {len(self.scraped_data)} haber bulundu."
if use_ai_summary:
status_text += " (AI ile özetlendi)"
self.scraping_status.set(status_text)
self.scraping_progress.set(100)
def save_all_news_to_db(self):
"""Tüm haberleri veritabanına kaydet"""
all_items = self.news_tree.get_children()
if not all_items:
messagebox.showerror("Hata", "Gönderilecek haber yok!")
return
self._save_news_items_to_db(all_items)
def summarize_with_ai(self):
"""Seçili haberleri AI ile özetle"""
selected = self.news_tree.selection()
if not selected:
messagebox.showerror("Hata", "Lütfen özetlenecek haberleri seçin!")
return
if not self.ai_manager.is_connected():
messagebox.showerror("Hata", "AI bağlantısı kurulmamış! Lütfen önce AI sekmesinden bağlantı kurun.")
return
try:
# Progress dialog oluştur
progress_window = tk.Toplevel(self.root)
progress_window.title("AI Özetleme")
progress_window.geometry("500x200")
progress_window.transient(self.root)
progress_window.grab_set()
ttk.Label(progress_window, text="Seçili haberler AI ile özetleniyor...").pack(pady=10)
progress_var = tk.DoubleVar()
progress_bar = ttk.Progressbar(progress_window, variable=progress_var, maximum=len(selected))
progress_bar.pack(fill=tk.X, padx=20, pady=10)
status_label = ttk.Label(progress_window, text="")
status_label.pack(pady=5)
log_label = ttk.Label(progress_window, text="", wraplength=450)
log_label.pack(pady=5)
# Her seçili haber için AI özetleme
for i, item_id in enumerate(selected):
try:
item = self.news_tree.item(item_id)
title, summary, image_url, date, source = item['values']
status_label.config(text=f"Özetleniyor: {title[:50]}...")
log_label.config(text=f"İşleniyor: {i+1}/{len(selected)}")
progress_window.update()
# AI ile özetleme
try:
prompt = f"""Aşağıdaki haber metnini Türkçe olarak yeniden oluştur makale haline getir ve düzenle.
Başlık: {title}
İçerik: {summary}
Özetleme kuralları:
1. Ana fikirleri koru
3. Akıcı ve anlaşılır Türkçe kullan
4. 5000-10000 kelime arasında tut
5. Paragraflar halinde düzenle
6. Teknik terimleri açıkla
Özetlenmiş metin:"""
log_label.config(text=f"AI'ya gönderiliyor: {title[:30]}...")
progress_window.update()
ai_summary = self.ai_manager.send_message(prompt)
if ai_summary and len(ai_summary.strip()) > 0:
# Treeview'da güncelle
self.news_tree.set(item_id, "summary", ai_summary)
log_label.config(text=f"Başarılı: {title[:30]}...")
else:
log_label.config(text=f"AI boş yanıt verdi: {title[:30]}...")
except Exception as e:
error_msg = f"AI özetleme hatası: {str(e)}"
print(error_msg)
log_label.config(text=error_msg)
continue
progress_var.set(i + 1)
progress_window.update()
# Kısa bekleme
time.sleep(1)
except Exception as e:
print(f"Genel hata: {e}")
log_label.config(text=f"Genel hata: {str(e)}")
continue
progress_window.destroy()
messagebox.showinfo("Başarılı", f"{len(selected)} haber AI ile özetlendi!")
except Exception as e:
messagebox.showerror("Hata", f"AI özetleme hatası: {str(e)}")
print(f"AI özetleme genel hatası: {e}")
def main():
root = tk.Tk()
app = MySQLToolApp(root)
root.mainloop()
if __name__ == "__main__":
main()
----------------------Built with CURSOR----------------
------------------------------------------------------
👁️ Viewed: 94
Comments