上传文件至 /

main
dghc2023 2024-11-26 16:30:00 +00:00
parent 750390f862
commit e6842c6d7f
4 changed files with 1027 additions and 546 deletions

View File

@ -12,25 +12,63 @@ class AccountData:
new_password: str # 新密码 new_password: str # 新密码
new_aux_email: str # 新辅助邮箱 new_aux_email: str # 新辅助邮箱
change_status: str # 是否更改完成 change_status: str # 是否更改完成
proxy: str # 代理
region: Optional[str] = None # 区域(可选)
class AccountManagerSQLite: class AccountManagerSQLite:
def __init__(self, db_path="accounts.db"): def __init__(self, db_path="accounts.db", debug=False):
self.db_path = db_path self.db_path = db_path
self.debug = debug
self._initialize_db() self._initialize_db()
def debug_print(self, *args):
"""仅在 debug 模式下输出调试信息"""
if self.debug:
print(*args)
def _initialize_db(self): def _initialize_db(self):
"""初始化数据库结构""" """初始化或检查数据库结构"""
with self._get_connection() as conn: with self._get_connection() as conn:
# 启用 WAL 模式
current_mode = conn.execute("PRAGMA journal_mode").fetchone()[0]
if current_mode != "wal":
self.debug_print("切换到 WAL 模式...")
conn.execute("PRAGMA journal_mode=WAL")
# 检查表是否存在
cursor = conn.execute("PRAGMA table_info(accounts)")
columns = [row[1] for row in cursor.fetchall()]
# 定义表结构
required_columns = [
"email", "original_password", "original_aux_email",
"new_password", "new_aux_email", "change_status", "proxy", "region"
]
if not columns: # 表不存在
self.debug_print("表不存在,正在创建表...")
elif columns != required_columns: # 表存在但结构不一致
self.debug_print(f"表结构不一致,当前列: {columns}, 期望列: {required_columns}")
self.debug_print("正在重建表...")
conn.execute("DROP TABLE IF EXISTS accounts")
else: # 表存在且结构一致
self.debug_print("表结构检查通过,无需更改。")
return
# 创建表
conn.execute(""" conn.execute("""
CREATE TABLE IF NOT EXISTS accounts ( CREATE TABLE accounts (
email TEXT PRIMARY KEY, email TEXT PRIMARY KEY,
original_password TEXT, original_password TEXT,
original_aux_email TEXT, original_aux_email TEXT,
new_password TEXT, new_password TEXT,
new_aux_email TEXT, new_aux_email TEXT,
change_status TEXT change_status TEXT,
proxy TEXT,
region TEXT
) )
""") """)
self.debug_print("表已创建。")
@contextmanager @contextmanager
def _get_connection(self): def _get_connection(self):
@ -47,10 +85,10 @@ class AccountManagerSQLite:
try: try:
conn.execute("DELETE FROM accounts") conn.execute("DELETE FROM accounts")
conn.commit() conn.commit()
print("数据库已清空。") self.debug_print("数据库已清空。")
except sqlite3.Error as e: except sqlite3.Error as e:
conn.rollback() conn.rollback()
print(f"清空数据库失败:{e}") self.debug_print(f"清空数据库失败:{e}")
raise raise
def import_data(self, account_list: List[AccountData]): def import_data(self, account_list: List[AccountData]):
@ -60,18 +98,20 @@ class AccountManagerSQLite:
conn.executemany(""" conn.executemany("""
INSERT OR REPLACE INTO accounts ( INSERT OR REPLACE INTO accounts (
email, original_password, original_aux_email, email, original_password, original_aux_email,
new_password, new_aux_email, change_status new_password, new_aux_email, change_status, proxy, region
) VALUES (?, ?, ?, ?, ?, ?) ) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
""", [ """, [
( (
account.email, account.original_password, account.original_aux_email, account.email, account.original_password, account.original_aux_email,
account.new_password, account.new_aux_email, account.change_status account.new_password, account.new_aux_email, account.change_status,
account.proxy, account.region
) for account in account_list ) for account in account_list
]) ])
conn.commit() conn.commit()
self.debug_print(f"成功导入 {len(account_list)} 条数据!")
except sqlite3.Error as e: except sqlite3.Error as e:
conn.rollback() conn.rollback()
print(f"Error importing data: {e}") self.debug_print(f"Error importing data: {e}")
raise raise
def export_data(self) -> List[AccountData]: def export_data(self) -> List[AccountData]:
@ -92,11 +132,7 @@ class AccountManagerSQLite:
return [AccountData(*row) for row in rows] return [AccountData(*row) for row in rows]
def update_record(self, email: str, **kwargs): def update_record(self, email: str, **kwargs):
""" """更新记录的指定字段"""
更新记录的指定字段
:param email: 要更新的记录的邮箱
:param kwargs: 要更新的字段和值键为字段名值为更新的值
"""
if not kwargs: if not kwargs:
raise ValueError("没有指定任何更新的字段") raise ValueError("没有指定任何更新的字段")
@ -115,44 +151,50 @@ class AccountManagerSQLite:
try: try:
conn.execute(query, values) conn.execute(query, values)
conn.commit() conn.commit()
print(f"成功更新记录: {email}") self.debug_print(f"成功更新记录: {email}")
except sqlite3.Error as e: except sqlite3.Error as e:
conn.rollback() conn.rollback()
print(f"更新记录失败:{e}") self.debug_print(f"更新记录失败:{e}")
raise raise
def delete_account(self, email: str): def delete_account(self, email: str):
"""删除某个账户""" """删除某个账户"""
with self._get_connection() as conn: with self._get_connection() as conn:
try: try:
conn.execute("DELETE FROM accounts WHERE email = ?", (email,)) conn.execute("DELETE FROM accounts WHERE email = ?", (email,))
conn.commit() conn.commit()
self.debug_print(f"成功删除账户: {email}")
except sqlite3.Error as e: except sqlite3.Error as e:
conn.rollback() conn.rollback()
print(f"Error deleting account: {e}") self.debug_print(f"Error deleting account: {e}")
raise raise
def import_from_excel(self, excel_path: str, clear_old: bool = False): def import_from_excel(self, excel_path: str, clear_old: bool = False):
""" """从 Excel 文件导入数据"""
Excel 文件导入数据
:param excel_path: Excel 文件路径
:param clear_old: 是否清空旧数据
"""
try: try:
# 如果 clear_old 为 True先清空数据库
if clear_old: if clear_old:
self.clear() self.clear()
# 读取 Excel 文件的第一个工作簿 # 读取 Excel 文件
df = pd.read_excel(excel_path, sheet_name=0) df = pd.read_excel(excel_path, sheet_name=0)
# 校验表格格式 # 校验表格格式
required_columns = ["邮箱", "原密码", "原辅助邮箱", "新密码", "新辅助邮箱", "是否更改完成"] required_columns = [
if not all(col in df.columns for col in required_columns): "邮箱", "原密码", "原辅助邮箱", "新密码", "新辅助邮箱", "是否更改完成", "代理"
raise ValueError(f"表格缺少必要的列:{required_columns}") ]
optional_columns = ["区域"]
all_columns = required_columns + optional_columns
# 将数据转换为 AccountData 对象 missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
raise ValueError(f"表格缺少必要的列:{missing_columns}")
# 添加缺失的可选列并填充默认值
for col in optional_columns:
if col not in df.columns:
df[col] = None
# 转换数据为 AccountData 对象
account_list = [ account_list = [
AccountData( AccountData(
email=row["邮箱"], email=row["邮箱"],
@ -160,40 +202,35 @@ class AccountManagerSQLite:
original_aux_email=row["原辅助邮箱"], original_aux_email=row["原辅助邮箱"],
new_password=row["新密码"], new_password=row["新密码"],
new_aux_email=row["新辅助邮箱"], new_aux_email=row["新辅助邮箱"],
change_status=row["是否更改完成"] change_status=row["是否更改完成"],
proxy=row["代理"],
region=row.get("区域", None)
) )
for _, row in df.iterrows() for _, row in df.iterrows()
] ]
# 批量导入数据到数据库
self.import_data(account_list) self.import_data(account_list)
print(f"成功导入 {len(account_list)} 条数据!")
except Exception as e: except Exception as e:
print(f"导入失败:{e}") self.debug_print(f"导入失败:{e}")
raise raise
def export_to_excel(self, excel_path: str): def export_to_excel(self, excel_path: str):
""" """导出数据到 Excel 文件"""
导出数据到 Excel 文件
:param excel_path: Excel 文件路径
"""
try: try:
# 从数据库中获取所有数据
accounts = self.export_data() accounts = self.export_data()
# 转换为 DataFrame
df = pd.DataFrame([{ df = pd.DataFrame([{
"邮箱": account.email, "邮箱": account.email,
"原密码": account.original_password, "原密码": account.original_password,
"原辅助邮箱": account.original_aux_email, "原辅助邮箱": account.original_aux_email,
"新密码": account.new_password, "新密码": account.new_password,
"新辅助邮箱": account.new_aux_email, "新辅助邮箱": account.new_aux_email,
"是否更改完成": account.change_status "是否更改完成": account.change_status,
"代理": account.proxy,
"区域": account.region
} for account in accounts]) } for account in accounts])
# 写入 Excel 文件
df.to_excel(excel_path, index=False, sheet_name="Accounts") df.to_excel(excel_path, index=False, sheet_name="Accounts")
print(f"成功导出数据到 {excel_path}") self.debug_print(f"成功导出数据到 {excel_path}")
except Exception as e: except Exception as e:
print(f"导出失败:{e}") self.debug_print(f"导出失败:{e}")
raise raise

434
mail.py Normal file
View File

@ -0,0 +1,434 @@
import imaplib
import email
import re
from email.header import decode_header
from datetime import datetime, timezone
from email.utils import parsedate_to_datetime
import time
from datetime import datetime, timedelta
def to_utf7_imap(text):
"""
将文本转换为 IMAP UTF-7 编码格式
参数
text (str): 要转换的文本
返回
str: 转换后的 UTF-7 编码文本
"""
if not text:
return ""
# 匹配所有非 ASCII 字符
def encode_match(match):
return "&" + match.group(0).encode("utf-16be").hex().upper() + "-"
# 替换非 ASCII 字符为 UTF-7 格式
return re.sub(r"[^\x20-\x7E]", encode_match, text)
def build_search_criteria(from_email=None, subject=None, body_keyword=None, since=None, before=None, unseen=False):
"""
构建 IMAP 搜索条件字符串支持简单查询逻辑
参数
from_email (str): 发件人邮箱地址
subject (str): 邮件标题的关键字
body_keyword (str): 邮件正文的关键字
since (datetime): 起始时间筛选此时间之后的邮件
before (datetime): 截止时间筛选此时间之前的邮件
unseen (bool): 是否仅筛选未读邮件
返回
str: 构建的 IMAP 搜索条件字符串
"""
criteria = []
if unseen:
criteria.append("UNSEEN")
if from_email:
criteria.append(f'FROM "{to_utf7_imap(from_email)}"')
if subject:
criteria.append(f'SUBJECT "{to_utf7_imap(subject)}"')
if body_keyword:
criteria.append(f'BODY "{to_utf7_imap(body_keyword)}"')
if since:
criteria.append(f'SINCE {since.strftime("%d-%b-%Y")}')
if before:
criteria.append(f'BEFORE {before.strftime("%d-%b-%Y")}')
# 用空格拼接所有条件IMAP 默认 AND 逻辑)
return " ".join(criteria) if criteria else "ALL"
def parse_email_date(date_str, default_tz=timezone.utc):
"""
安全解析邮件日期
"""
if not date_str:
return None
try:
email_date = parsedate_to_datetime(date_str)
if email_date and email_date.tzinfo is None:
email_date = email_date.replace(tzinfo=default_tz)
return email_date
except Exception as e:
print(f"Warning: Failed to parse date '{date_str}': {e}")
return None
class EmailClient:
def __init__(self, host, username, password):
self.host = host
self.username = username
self.password = password
self.connection = None
def connect(self):
self.connection = imaplib.IMAP4(self.host)
self.connection.login(self.username, self.password)
self.connection.select("inbox")
def disconnect(self):
if self.connection:
self.connection.logout()
def search_emails(self, search_criteria):
"""
使用自由构建的搜索条件执行 IMAP 搜索
参数
search_criteria (str): 用于 IMAP 搜索的条件字符串
返回
tuple: 搜索结果 (result, data)其中
- result (str): 搜索状态 ("OK" 表示成功)
- data (list): 搜索到的邮件 ID 列表
Raises:
Exception: 如果搜索失败
"""
result, data = self.connection.search(None, search_criteria)
if result != "OK":
raise Exception(f"Failed to search emails with criteria: {search_criteria}")
email_ids = data[0].split()
if not email_ids:
print("Debug: No matching emails found.")
return result, [] # 返回空列表以便后续处理
return result, data
def fetch_recent_emails(self, from_email=None, subject=None, body_keyword=None, since=None, before=None,
unseen=False, max_count=100):
"""
使用构建的搜索条件查询最近的邮件
参数
from_email (str): 发件人邮箱地址
subject (str): 邮件标题关键字
body_keyword (str): 邮件正文关键字
since (datetime): 起始时间
before (datetime): 截止时间
unseen (bool): 是否只查询未读邮件
max_count (int): 返回的邮件数量上限
返回
list: 符合条件的邮件 ID 列表按接收时间倒序排列
"""
search_criteria = build_search_criteria(
from_email=from_email,
subject=subject,
body_keyword=body_keyword,
since=since,
before=before,
unseen=unseen
)
result, data = self.search_emails(search_criteria)
# 检查 `data` 是否有效
if not data or not data[0]:
print("Debug: No emails found matching the criteria.")
return [] # 返回空列表
# 正常处理邮件 ID
email_ids = data[0].split()[-max_count:]
return list(reversed(email_ids))
def fetch_all_matching_emails(self, email_ids, sender_pattern=None, keyword_pattern=None, subject_pattern=None,
start_time=None, max_results=None):
if max_results is not None and max_results <= 0:
raise ValueError("max_results must be a positive integer or None")
sender_regex = re.compile(sender_pattern) if sender_pattern else None
keyword_regex = re.compile(keyword_pattern) if keyword_pattern else None
subject_regex = re.compile(subject_pattern) if subject_pattern else None
all_matched_emails = []
for email_id in email_ids:
try:
result, data = self.connection.fetch(email_id, "(RFC822)")
if result != "OK":
print(f"Warning: Failed to fetch email with ID {email_id}")
continue
msg = email.message_from_bytes(data[0][1])
from_email = email.utils.parseaddr(msg["From"])[1] if msg["From"] else ""
if sender_regex and not sender_regex.search(from_email):
continue
subject, encoding = decode_header(msg["Subject"])[0]
subject = subject.decode(encoding or "utf-8") if isinstance(subject, bytes) else subject
if subject_regex and not subject_regex.search(subject):
continue
print("1没报错")
content = ""
for part in msg.walk():
content_type = part.get_content_type()
print("2没报错")
try:
if content_type == "text/plain":
content += part.get_payload(decode=True).decode(part.get_content_charset() or "utf-8")
print("3没报错")
elif content_type == "text/html" and not content:
content += part.get_payload(decode=True).decode(part.get_content_charset() or "utf-8")
print("4没报错")
except Exception as e:
print(f"Warning: Failed to decode {content_type} content: {e}")
if keyword_regex and not keyword_regex.search(content):
continue
date_str = msg.get("Date")
email_date = parse_email_date(date_str) if date_str else None
# print('email_date:',email_date)
# print('start_time:',start_time)
if start_time and email_date and email_date < start_time:
continue
matched_email = {
"subject": subject,
"from": from_email,
"content": content,
"date": email_date
}
all_matched_emails.append(matched_email)
if max_results and len(all_matched_emails) >= max_results:
break
except Exception as e:
print(f"Error: Failed to process email ID {email_id}: {e}")
return all_matched_emails
def filter_emails_by_sender_and_keyword(self, email_ids, sender_pattern=None, keyword_pattern=None,
subject_pattern=None, start_time=None):
matched_emails = self.fetch_all_matching_emails(
email_ids=email_ids,
sender_pattern=sender_pattern,
keyword_pattern=keyword_pattern,
subject_pattern=subject_pattern,
start_time=start_time,
max_results=1
)
return matched_emails[0] if matched_emails else None
class GoogleCodeReceiver:
def __init__(self, email_client):
self.email_client = email_client
self.last_timestamp = datetime.now(timezone.utc)
def _update_last_timestamp(self):
self.last_timestamp = datetime.now(timezone.utc)
def wait_code(self, username, timeout=60, interval=3, start_time=None):
"""
等待 Google 验证码邮件
参数
username (str): 用户名用于在正文中检索匹配邮件
timeout (int): 最大等待时间单位为秒
interval (int): 轮询间隔单位为秒
start_time (datetime): 开始检索邮件的时间默认值为当前时间
返回
str: 提取的验证码
"""
# 如果未指定 `start_time`,使用当前时间
if start_time is None:
start_time = datetime.now(timezone.utc) # 默认使用当前 UTC 时间
elif start_time.tzinfo is None:
# 如果 `start_time` 没有时区信息,假定为本地时间,并转为 UTC
local_time = start_time.astimezone()
start_time = local_time.astimezone(timezone.utc)
else:
# 如果已经有时区信息,统一转为 UTC
start_time = start_time.astimezone(timezone.utc)
# 确定 `since` 时间,取 `start_time` 往前推 2 天
since = start_time - timedelta(days=2)
# 更新 `last_timestamp`
self._update_last_timestamp()
end_time = datetime.now(timezone.utc) + timedelta(seconds=timeout)
subject_pattern = r"(?:Email verification code|电子邮件验证码)[:]?\s*(\d{6})"
# sender_pattern = r"noreply@google\.com"
# keyword_pattern = re.escape(username)
while datetime.now(timezone.utc) < end_time:
try:
# Fetch recent emails with the adjusted `since`
email_ids = self.email_client.fetch_recent_emails(
max_count=10,
from_email="noreply@google.com",
body_keyword=username,
since=since # 从 `start_time` 往前推 2 天开始检索
)
matched_email = self.email_client.filter_emails_by_sender_and_keyword(
email_ids=email_ids,
# sender_pattern=sender_pattern,
# keyword_pattern=keyword_pattern,
subject_pattern=subject_pattern,
start_time=start_time # 确保只检索 `start_time` 之后的邮件
)
if matched_email:
match = re.search(subject_pattern, matched_email["subject"])
if match:
return match.group(1)
except Exception as e:
print(f"Warning: Error occurred while fetching code: {e}")
time.sleep(interval)
raise TimeoutError("Timeout waiting for verification code.")
# 老的通用邮箱测试
def mailTest():
server = "server-10474.cuiqiu.vip" # 替换为你的 IMAP 服务器地址
username = "gmailvinted@mailezu.com"
password = "g1l2o0hld84"
client = EmailClient(server, username, password)
client.connect()
start_time = datetime(2024, 11, 22)
sender_pattern = r".*google.*" # 使用正则表达式匹配发件人邮箱
keyword_pattern = r".*" # 替换为你想要匹配的关键字或正则表达式
try:
email_ids = client.fetch_recent_emails(
max_count=10,
from_email="noreply@google.com",
# subject='Email verification code',#中文邮件叫 '电子邮件验证码‘
body_keyword='RibeAchour875@gmail.com',
since=start_time,
)
# 获取时间上最新的匹配项,应用起始时间过滤器
latest_matched_email = client.filter_emails_by_sender_and_keyword(email_ids, sender_pattern, keyword_pattern)
if latest_matched_email:
print("\n时间上最新的匹配邮件:")
print("主题:", latest_matched_email["subject"])
print("发件人:", latest_matched_email["from"])
print("内容:", latest_matched_email["content"])
print("时间:", latest_matched_email["date"])
else:
print("没有符合条件的时间上最新的匹配邮件")
# print(f"ids:{email_ids}")
# 获取所有匹配的邮件,应用起始时间过滤器
all_matched_emails = client.fetch_all_matching_emails(email_ids, sender_pattern, keyword_pattern)
if all_matched_emails:
print("\n所有匹配的邮件:")
for idx, email in enumerate(all_matched_emails):
print(f"邮件 {idx + 1}:")
print("主题:", email["subject"])
print("发件人:", email["from"])
print("内容:", email["content"], "\n") # 显示内容
print("时间:", email["date"])
else:
print("没有符合条件的所有匹配邮件")
finally:
client.disconnect()
def codeTest():
server = "server-10474.cuiqiu.vip" # 替换为你的 IMAP 服务器地址
username = "gmailvinted@mailezu.com"
password = "g1l2o0hld84"
client = EmailClient(server, username, password)
client.connect()
try:
code_receiver = GoogleCodeReceiver(client)
# 这里改成要捕获的目标邮件地址
code = code_receiver.wait_code(
username="RibeAchour875@gmail.com", timeout=300, interval=5,
start_time=datetime(2024, 11, 10))
# code = code_receiver.wait_code(username="RibeAchour875@gmail.com", timeout=300, interval=5)
print(f"收到谷歌验证码: {code}")
except TimeoutError as e:
print(e)
except Exception as e:
print(f"An unexpected error occurred: {e}")
finally:
client.disconnect()
def test3():
server = "server-10474.cuiqiu.vip" # 替换为你的 IMAP 服务器地址
username = "gmailvinted@mailezu.com"
password = "g1l2o0hld84"
# server = "imap.qq.com"
# username = "bigemon@foxmail.com"
# password = "ejudkkdfiuemcaaj"
client = EmailClient(server, username, password)
client.connect()
# mailTest()
ok, email_ids = client.search_emails('FROM "noreply@google.com" BODY "RibeAchour875@gmail.com" SINCE 11-Nov-2024')
if email_ids:
print(email_ids)
sender_pattern = r".*google.*" # 使用正则表达式匹配发件人邮箱
keyword_pattern = r".*" # 替换为你想要匹配的关键字或正则表达式
all_matched_emails = client.fetch_all_matching_emails(email_ids)
if all_matched_emails:
print("\n所有匹配的邮件:")
for idx, email in enumerate(all_matched_emails):
print(f"邮件 {idx + 1}:")
print("主题:", email["subject"])
print("发件人:", email["from"])
print("内容:", email["content"], "\n") # 显示内容
print("时间:", email["date"])
else:
print("没有符合条件的所有匹配邮件")
else:
print("查不到")
# 使用示例
if __name__ == "__main__":
# test3()
codeTest()

677
main.py

File diff suppressed because it is too large Load Diff

325
proxy.py
View File

@ -1,107 +1,258 @@
import sqlite3
import random import random
import requests from contextlib import contextmanager
from typing import List, Optional, Dict
class ProxyManager:
def __init__(self):
self.proxies = []
def import_proxies(self, file_path): class ProxyManagerSQLite:
""" def __init__(self, db_path="proxies.db", debug=False):
导入代理列表支持文件路径格式为 host:port:user:password self.db_path = db_path
""" self.debug = debug
try: self._initialize_db()
with open(file_path, 'r') as file:
content = file.read().replace('\r\n', '\n') # 替换 Windows 风格换行符
lines = content.strip().split('\n')
for line in lines:
parts = line.split(':')
if len(parts) == 4: # 确保格式正确
proxy = {
'host': parts[0],
'port': parts[1],
'user': parts[2],
'password': parts[3],
'protocol': 'http'
}
self.proxies.append(proxy)
except FileNotFoundError:
print(f"Error: File not found at {file_path}.")
return False
except Exception as e:
print(f"Error: {str(e)}")
return False
return True def debug_print(self, *args):
"""仅在 debug 模式下输出调试信息"""
if self.debug:
print(*args)
def get_random_proxy(self): def _initialize_db(self):
""" """初始化或检查数据库结构"""
随机获取一个代理 with self._get_connection() as conn:
""" # 启用 WAL 模式
if not self.proxies: current_mode = conn.execute("PRAGMA journal_mode").fetchone()[0]
print("No proxies available.") if current_mode != "wal":
return None self.debug_print("切换到 WAL 模式...")
return random.choice(self.proxies) conn.execute("PRAGMA journal_mode=WAL")
def test_proxy(self, proxy): # 检查表是否存在
""" cursor = conn.execute("PRAGMA table_info(proxies)")
测试代理的对外 IP columns = [row[1] for row in cursor.fetchall()]
:param proxy: 格式为 {'host': '...', 'port': '...', 'user': '...', 'password': '...', 'protocol': '...'}
"""
if not proxy:
print("Invalid proxy provided.")
return None
proxy_url = f"{proxy['protocol']}://{proxy['user']}:{proxy['password']}@{proxy['host']}:{proxy['port']}" required_columns = ["host", "port", "user", "password", "protocol", "region"]
proxies = {'http': proxy_url, 'https': proxy_url}
try: if not columns:
response = requests.get("http://jsonip.com", proxies=proxies, timeout=5) self.debug_print("表不存在,正在创建表...")
if response.status_code == 200: elif columns != required_columns:
return response.json() self.debug_print(f"表结构不一致,当前列: {columns}, 期望列: {required_columns}")
self.debug_print("正在重建表...")
conn.execute("DROP TABLE IF EXISTS proxies")
else: else:
print(f"Failed with status code: {response.status_code}") self.debug_print("表结构检查通过,无需更改。")
return None return
except requests.RequestException as e:
print(f"Request failed: {str(e)}")
return None
def is_empty(self): # 创建表
return not self.proxies conn.execute("""
CREATE TABLE proxies (
id INTEGER PRIMARY KEY AUTOINCREMENT,
host TEXT NOT NULL,
port TEXT NOT NULL,
user TEXT NOT NULL,
password TEXT NOT NULL,
protocol TEXT DEFAULT 'http',
region TEXT NOT NULL
)
""")
self.debug_print("表已创建。")
def get_and_test_random_proxy(self): @contextmanager
def _get_connection(self):
"""获取 SQLite 数据库连接"""
conn = sqlite3.connect(self.db_path)
try:
yield conn
finally:
conn.close()
def import_proxies_with_classifier(self, file_path: str, classifier):
""" """
从代理列表中随机获取一个代理测试联通性并从列表中移除 从文件导入代理列表并分类
:param file_path: 文件路径格式为 host:port:user:password
:param classifier: 分类函数接受代理行字符串返回国家/地区代码
""" """
if not self.proxies: try:
print("No proxies available.") with open(file_path, "r") as file:
return None lines = file.read().replace("\r\n", "\n").strip().split("\n")
proxy = random.choice(self.proxies) proxies = []
test_result = self.test_proxy(proxy) for line in lines:
if test_result: parts = line.split(":")
print(f"Proxy works: {test_result}") if len(parts) == 4:
self.proxies.remove(proxy) # 移除成功的代理 proxy = {
return proxy, test_result "host": parts[0],
else: "port": parts[1],
print("Proxy failed. Removing from the list.") "user": parts[2],
self.proxies.remove(proxy) # 移除失败的代理 "password": parts[3],
return None, None "region": classifier(line),
"protocol": "http",
}
proxies.append(proxy)
with self._get_connection() as conn:
conn.executemany("""
INSERT INTO proxies (host, port, user, password, protocol, region)
VALUES (:host, :port, :user, :password, :protocol, :region)
""", proxies)
conn.commit()
self.debug_print(f"成功导入 {len(proxies)} 条代理数据!")
except Exception as e:
self.debug_print(f"Error importing proxies: {str(e)}")
raise
def get_random_proxy_by_region(self, region: Optional[str] = None, remove_after_fetch: bool = False) -> Optional[Dict]:
"""
随机获取代理支持按区域筛选
:param region: 国家/地区代码若为 None 则随机选择
:param remove_after_fetch: 是否从数据库中删除取出的代理
:return: 随机选取的代理字典或 None
"""
with self._get_connection() as conn:
if region is None or region == "ALL":
cursor = conn.execute("SELECT * FROM proxies ORDER BY RANDOM() LIMIT 1")
else:
cursor = conn.execute("SELECT * FROM proxies WHERE region = ? ORDER BY RANDOM() LIMIT 1", (region,))
proxy = cursor.fetchone()
if proxy and remove_after_fetch:
conn.execute("DELETE FROM proxies WHERE id = ?", (proxy[0],))
conn.commit()
return dict(zip(["id", "host", "port", "user", "password", "protocol", "region"], proxy)) if proxy else None
def get_proxy_count(self, region: Optional[str] = "ALL") -> int:
"""
获取指定区域的代理数量
:param region: 区域代码默认为 "ALL"
:return: 代理数量
"""
with self._get_connection() as conn:
if region == "ALL":
cursor = conn.execute("SELECT COUNT(*) FROM proxies")
else:
cursor = conn.execute("SELECT COUNT(*) FROM proxies WHERE region = ?", (region,))
return cursor.fetchone()[0]
def get_summary(self) -> Dict[str, int]:
"""
获取所有区域的代理统计数量
:return: 包含区域代码和代理数量的字典
"""
with self._get_connection() as conn:
cursor = conn.execute("SELECT region, COUNT(*) FROM proxies GROUP BY region")
return {row[0]: row[1] for row in cursor.fetchall()}
def export_proxies(self, file_path: str, serializer=None):
"""
导出代理到文件
:param file_path: 文件路径
:param serializer: 可选的序列化函数接受代理字典返回字符串
"""
if serializer is None:
serializer = serializer_smartproxy # 使用默认的序列化器
try:
with self._get_connection() as conn:
cursor = conn.execute("SELECT host, port, user, password, protocol, region FROM proxies")
proxies = cursor.fetchall()
with open(file_path, "w") as file:
for proxy in proxies:
proxy_dict = dict(zip(["host", "port", "user", "password", "protocol", "region"], proxy))
try:
line = serializer(proxy_dict)
file.write(line + "\n")
except Exception as e:
self.debug_print(f"序列化失败: {e}")
continue # 跳过错误的代理数据
self.debug_print(f"成功导出代理到 {file_path}")
except Exception as e:
self.debug_print(f"Error exporting proxies: {str(e)}")
raise
def get_proxies(self, region: Optional[str] = None) -> List[Dict]:
"""
获取指定区域的所有代理列表
:param region: 指定区域代码如果为 None "ALL"返回所有代理列表
:return: 包含代理信息的字典列表
"""
with self._get_connection() as conn:
if region is None or region == "ALL":
cursor = conn.execute("SELECT * FROM proxies")
else:
cursor = conn.execute("SELECT * FROM proxies WHERE region = ?", (region,))
rows = cursor.fetchall()
return [
dict(zip(["id", "host", "port", "user", "password", "protocol", "region"], row))
for row in rows
]
def clear(self):
"""清空数据库中的所有数据"""
with self._get_connection() as conn:
conn.execute("DELETE FROM proxies")
conn.commit()
self.debug_print("数据库已清空。")
def classifier_smartproxy(proxy_line):
"""
从代理行中提取区域代码
区域代码格式: "_area-XX_", 提取 "XX" 部分作为区域代码
:param proxy_line: 代理行字符串
:return: 区域代码 ( "PL") "OTHER" 如果提取失败
"""
try:
# 找到 "_area-" 的起始位置
start_index = proxy_line.find("_area-")
if start_index == -1:
return "OTHER"
# 区域代码从 "_area-" 之后开始,到下一个 "_" 之前结束
start_index += len("_area-")
end_index = proxy_line.find("_", start_index)
if end_index == -1:
return "OTHER" # 无法找到结束符,返回 "OTHER"
# 提取区域代码并返回
region_code = proxy_line[start_index:end_index]
return region_code.upper() # 返回大写的区域代码
except Exception as e:
print(f"Error in region classification: {str(e)}")
return "OTHER"
def serializer_smartproxy(proxy: Dict) -> str:
"""
默认的代理导出序列化函数
:param proxy: 代理字典
:return: 格式化后的字符串格式为 host:port:user:password 没有区域信息的原因在于smartproxy 的格式里user 字段就包含了区域信息
"""
try:
# 使用 "|" 分隔符标记区域,方便后续导入时解析
return f"{proxy['host']}:{proxy['port']}:{proxy['user']}:{proxy['password']}"
except KeyError as e:
raise ValueError(f"代理信息缺少必要字段: {e}")
if __name__ == "__main__": if __name__ == "__main__":
manager = ProxyManager() manager = ProxyManagerSQLite()
print(f'测试是否是空的:{manager.is_empty()}') # 导入代理
print(manager.import_proxies('IP.txt')) manager.import_proxies_with_classifier("IP.txt", classifier=classifier_smartproxy)
print(f'再测试是否是空的:{manager.is_empty()}')
random_proxy = manager.get_random_proxy() # 获取汇总统计
print(f"获取到的随机代理:{random_proxy}") print("代理统计:", manager.get_summary())
test_result = manager.test_proxy(random_proxy) # 获取随机代理
print(f"随机代理的出口 IP:{test_result}") proxy = manager.get_random_proxy_by_region(region="PL", remove_after_fetch=True)
print(f"取出的代理: {proxy}")
# 获取代理数量
print("所有代理总数:", manager.get_proxy_count("ALL"))
print("PL 区域代理数:", manager.get_proxy_count("PL"))
print("PL 区域当前列表:",manager.get_proxies("PL"))
print("目前所有的可用代理列表:",manager.get_proxies("ALL"))
manager.export_proxies("剩下的可用IP.txt")
proxy, result = manager.get_and_test_random_proxy()
if result:
print(f"测试成功 : {result},取得的代理是:{proxy},这个代理已经从代理管理器里移除。")
else:
print("测试失败.")