Gmail/proxy.py

258 lines
10 KiB
Python
Raw Normal View History

2024-11-26 16:30:00 +00:00
import sqlite3
2024-11-23 14:27:50 +00:00
import random
2024-11-26 16:30:00 +00:00
from contextlib import contextmanager
from typing import List, Optional, Dict
2024-11-23 14:27:50 +00:00
2024-11-26 16:30:00 +00:00
class ProxyManagerSQLite:
def __init__(self, db_path="proxies.db", debug=False):
self.db_path = db_path
self.debug = debug
self._initialize_db()
def debug_print(self, *args):
"""仅在 debug 模式下输出调试信息"""
if self.debug:
print(*args)
def _initialize_db(self):
"""初始化或检查数据库结构"""
with self._get_connection() as conn:
# 启用 WAL 模式
current_mode = conn.execute("PRAGMA journal_mode").fetchone()[0]
if current_mode != "wal":
self.debug_print("切换到 WAL 模式...")
conn.execute("PRAGMA journal_mode=WAL")
# 检查表是否存在
cursor = conn.execute("PRAGMA table_info(proxies)")
columns = [row[1] for row in cursor.fetchall()]
required_columns = ["host", "port", "user", "password", "protocol", "region"]
if not columns:
self.debug_print("表不存在,正在创建表...")
elif columns != required_columns:
self.debug_print(f"表结构不一致,当前列: {columns}, 期望列: {required_columns}")
self.debug_print("正在重建表...")
conn.execute("DROP TABLE IF EXISTS proxies")
else:
self.debug_print("表结构检查通过,无需更改。")
return
# 创建表
conn.execute("""
CREATE TABLE proxies (
id INTEGER PRIMARY KEY AUTOINCREMENT,
host TEXT NOT NULL,
port TEXT NOT NULL,
user TEXT NOT NULL,
password TEXT NOT NULL,
protocol TEXT DEFAULT 'http',
region TEXT NOT NULL
)
""")
self.debug_print("表已创建。")
@contextmanager
def _get_connection(self):
"""获取 SQLite 数据库连接"""
conn = sqlite3.connect(self.db_path)
try:
yield conn
finally:
conn.close()
def import_proxies_with_classifier(self, file_path: str, classifier):
2024-11-23 14:27:50 +00:00
"""
2024-11-26 16:30:00 +00:00
从文件导入代理列表并分类
:param file_path: 文件路径格式为 host:port:user:password
:param classifier: 分类函数接受代理行字符串返回国家/地区代码
2024-11-23 14:27:50 +00:00
"""
try:
2024-11-26 16:30:00 +00:00
with open(file_path, "r") as file:
lines = file.read().replace("\r\n", "\n").strip().split("\n")
proxies = []
for line in lines:
parts = line.split(":")
if len(parts) == 4:
proxy = {
"host": parts[0],
"port": parts[1],
"user": parts[2],
"password": parts[3],
"region": classifier(line),
"protocol": "http",
}
proxies.append(proxy)
with self._get_connection() as conn:
conn.executemany("""
INSERT INTO proxies (host, port, user, password, protocol, region)
VALUES (:host, :port, :user, :password, :protocol, :region)
""", proxies)
conn.commit()
self.debug_print(f"成功导入 {len(proxies)} 条代理数据!")
2024-11-23 14:27:50 +00:00
except Exception as e:
2024-11-26 16:30:00 +00:00
self.debug_print(f"Error importing proxies: {str(e)}")
raise
2024-11-23 14:27:50 +00:00
2024-11-26 16:30:00 +00:00
def get_random_proxy_by_region(self, region: Optional[str] = None, remove_after_fetch: bool = False) -> Optional[Dict]:
2024-11-23 14:27:50 +00:00
"""
2024-11-26 16:30:00 +00:00
随机获取代理支持按区域筛选
:param region: 国家/地区代码若为 None 则随机选择
:param remove_after_fetch: 是否从数据库中删除取出的代理
:return: 随机选取的代理字典或 None
2024-11-23 14:27:50 +00:00
"""
2024-11-26 16:30:00 +00:00
with self._get_connection() as conn:
if region is None or region == "ALL":
cursor = conn.execute("SELECT * FROM proxies ORDER BY RANDOM() LIMIT 1")
else:
cursor = conn.execute("SELECT * FROM proxies WHERE region = ? ORDER BY RANDOM() LIMIT 1", (region,))
proxy = cursor.fetchone()
if proxy and remove_after_fetch:
conn.execute("DELETE FROM proxies WHERE id = ?", (proxy[0],))
conn.commit()
return dict(zip(["id", "host", "port", "user", "password", "protocol", "region"], proxy)) if proxy else None
def get_proxy_count(self, region: Optional[str] = "ALL") -> int:
"""
获取指定区域的代理数量
:param region: 区域代码默认为 "ALL"
:return: 代理数量
"""
with self._get_connection() as conn:
if region == "ALL":
cursor = conn.execute("SELECT COUNT(*) FROM proxies")
else:
cursor = conn.execute("SELECT COUNT(*) FROM proxies WHERE region = ?", (region,))
return cursor.fetchone()[0]
2024-11-23 14:27:50 +00:00
2024-11-26 16:30:00 +00:00
def get_summary(self) -> Dict[str, int]:
2024-11-23 14:27:50 +00:00
"""
2024-11-26 16:30:00 +00:00
获取所有区域的代理统计数量
:return: 包含区域代码和代理数量的字典
2024-11-23 14:27:50 +00:00
"""
2024-11-26 16:30:00 +00:00
with self._get_connection() as conn:
cursor = conn.execute("SELECT region, COUNT(*) FROM proxies GROUP BY region")
return {row[0]: row[1] for row in cursor.fetchall()}
2024-11-23 14:27:50 +00:00
2024-11-26 16:30:00 +00:00
def export_proxies(self, file_path: str, serializer=None):
"""
导出代理到文件
:param file_path: 文件路径
:param serializer: 可选的序列化函数接受代理字典返回字符串
"""
if serializer is None:
serializer = serializer_smartproxy # 使用默认的序列化器
2024-11-23 14:27:50 +00:00
try:
2024-11-26 16:30:00 +00:00
with self._get_connection() as conn:
cursor = conn.execute("SELECT host, port, user, password, protocol, region FROM proxies")
proxies = cursor.fetchall()
with open(file_path, "w") as file:
for proxy in proxies:
proxy_dict = dict(zip(["host", "port", "user", "password", "protocol", "region"], proxy))
try:
line = serializer(proxy_dict)
file.write(line + "\n")
except Exception as e:
self.debug_print(f"序列化失败: {e}")
continue # 跳过错误的代理数据
self.debug_print(f"成功导出代理到 {file_path}")
except Exception as e:
self.debug_print(f"Error exporting proxies: {str(e)}")
raise
def get_proxies(self, region: Optional[str] = None) -> List[Dict]:
"""
获取指定区域的所有代理列表
:param region: 指定区域代码如果为 None "ALL"返回所有代理列表
:return: 包含代理信息的字典列表
"""
with self._get_connection() as conn:
if region is None or region == "ALL":
cursor = conn.execute("SELECT * FROM proxies")
2024-11-23 14:27:50 +00:00
else:
2024-11-26 16:30:00 +00:00
cursor = conn.execute("SELECT * FROM proxies WHERE region = ?", (region,))
rows = cursor.fetchall()
return [
dict(zip(["id", "host", "port", "user", "password", "protocol", "region"], row))
for row in rows
]
def clear(self):
"""清空数据库中的所有数据"""
with self._get_connection() as conn:
conn.execute("DELETE FROM proxies")
conn.commit()
self.debug_print("数据库已清空。")
def classifier_smartproxy(proxy_line):
"""
从代理行中提取区域代码
区域代码格式: "_area-XX_", 提取 "XX" 部分作为区域代码
:param proxy_line: 代理行字符串
:return: 区域代码 ( "PL") "OTHER" 如果提取失败
"""
try:
# 找到 "_area-" 的起始位置
start_index = proxy_line.find("_area-")
if start_index == -1:
return "OTHER"
2024-11-23 14:27:50 +00:00
2024-11-26 16:30:00 +00:00
# 区域代码从 "_area-" 之后开始,到下一个 "_" 之前结束
start_index += len("_area-")
end_index = proxy_line.find("_", start_index)
if end_index == -1:
return "OTHER" # 无法找到结束符,返回 "OTHER"
# 提取区域代码并返回
region_code = proxy_line[start_index:end_index]
return region_code.upper() # 返回大写的区域代码
except Exception as e:
print(f"Error in region classification: {str(e)}")
return "OTHER"
2024-11-23 14:27:50 +00:00
2024-11-26 16:30:00 +00:00
def serializer_smartproxy(proxy: Dict) -> str:
"""
默认的代理导出序列化函数
:param proxy: 代理字典
:return: 格式化后的字符串格式为 host:port:user:password 没有区域信息的原因在于smartproxy 的格式里user 字段就包含了区域信息
"""
try:
# 使用 "|" 分隔符标记区域,方便后续导入时解析
return f"{proxy['host']}:{proxy['port']}:{proxy['user']}:{proxy['password']}"
except KeyError as e:
raise ValueError(f"代理信息缺少必要字段: {e}")
2024-11-23 14:27:50 +00:00
if __name__ == "__main__":
2024-11-26 16:30:00 +00:00
manager = ProxyManagerSQLite()
# 导入代理
manager.import_proxies_with_classifier("IP.txt", classifier=classifier_smartproxy)
# 获取汇总统计
print("代理统计:", manager.get_summary())
# 获取随机代理
proxy = manager.get_random_proxy_by_region(region="PL", remove_after_fetch=True)
print(f"取出的代理: {proxy}")
# 获取代理数量
print("所有代理总数:", manager.get_proxy_count("ALL"))
print("PL 区域代理数:", manager.get_proxy_count("PL"))
print("PL 区域当前列表:",manager.get_proxies("PL"))
print("目前所有的可用代理列表:",manager.get_proxies("ALL"))
manager.export_proxies("剩下的可用IP.txt")