2024-11-26 16:30:00 +00:00
|
|
|
|
import sqlite3
|
2024-11-23 14:27:50 +00:00
|
|
|
|
import random
|
2024-11-26 16:30:00 +00:00
|
|
|
|
from contextlib import contextmanager
|
|
|
|
|
from typing import List, Optional, Dict
|
2024-11-23 14:27:50 +00:00
|
|
|
|
|
|
|
|
|
|
2024-11-26 16:30:00 +00:00
|
|
|
|
class ProxyManagerSQLite:
|
|
|
|
|
def __init__(self, db_path="proxies.db", debug=False):
|
|
|
|
|
self.db_path = db_path
|
|
|
|
|
self.debug = debug
|
|
|
|
|
self._initialize_db()
|
|
|
|
|
|
|
|
|
|
def debug_print(self, *args):
|
|
|
|
|
"""仅在 debug 模式下输出调试信息"""
|
|
|
|
|
if self.debug:
|
|
|
|
|
print(*args)
|
|
|
|
|
|
|
|
|
|
def _initialize_db(self):
|
|
|
|
|
"""初始化或检查数据库结构"""
|
|
|
|
|
with self._get_connection() as conn:
|
|
|
|
|
# 启用 WAL 模式
|
|
|
|
|
current_mode = conn.execute("PRAGMA journal_mode").fetchone()[0]
|
|
|
|
|
if current_mode != "wal":
|
|
|
|
|
self.debug_print("切换到 WAL 模式...")
|
|
|
|
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
|
|
|
|
|
|
|
|
# 检查表是否存在
|
|
|
|
|
cursor = conn.execute("PRAGMA table_info(proxies)")
|
|
|
|
|
columns = [row[1] for row in cursor.fetchall()]
|
|
|
|
|
|
|
|
|
|
required_columns = ["host", "port", "user", "password", "protocol", "region"]
|
|
|
|
|
|
|
|
|
|
if not columns:
|
|
|
|
|
self.debug_print("表不存在,正在创建表...")
|
|
|
|
|
elif columns != required_columns:
|
|
|
|
|
self.debug_print(f"表结构不一致,当前列: {columns}, 期望列: {required_columns}")
|
|
|
|
|
self.debug_print("正在重建表...")
|
|
|
|
|
conn.execute("DROP TABLE IF EXISTS proxies")
|
|
|
|
|
else:
|
|
|
|
|
self.debug_print("表结构检查通过,无需更改。")
|
|
|
|
|
return
|
|
|
|
|
|
|
|
|
|
# 创建表
|
|
|
|
|
conn.execute("""
|
|
|
|
|
CREATE TABLE proxies (
|
|
|
|
|
id INTEGER PRIMARY KEY AUTOINCREMENT,
|
|
|
|
|
host TEXT NOT NULL,
|
|
|
|
|
port TEXT NOT NULL,
|
|
|
|
|
user TEXT NOT NULL,
|
|
|
|
|
password TEXT NOT NULL,
|
|
|
|
|
protocol TEXT DEFAULT 'http',
|
|
|
|
|
region TEXT NOT NULL
|
|
|
|
|
)
|
|
|
|
|
""")
|
|
|
|
|
self.debug_print("表已创建。")
|
|
|
|
|
|
|
|
|
|
@contextmanager
|
|
|
|
|
def _get_connection(self):
|
|
|
|
|
"""获取 SQLite 数据库连接"""
|
|
|
|
|
conn = sqlite3.connect(self.db_path)
|
|
|
|
|
try:
|
|
|
|
|
yield conn
|
|
|
|
|
finally:
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
|
|
def import_proxies_with_classifier(self, file_path: str, classifier):
|
2024-11-23 14:27:50 +00:00
|
|
|
|
"""
|
2024-11-26 16:30:00 +00:00
|
|
|
|
从文件导入代理列表并分类
|
|
|
|
|
:param file_path: 文件路径,格式为 host:port:user:password
|
|
|
|
|
:param classifier: 分类函数,接受代理行字符串,返回国家/地区代码
|
2024-11-23 14:27:50 +00:00
|
|
|
|
"""
|
|
|
|
|
try:
|
2024-11-26 16:30:00 +00:00
|
|
|
|
with open(file_path, "r") as file:
|
|
|
|
|
lines = file.read().replace("\r\n", "\n").strip().split("\n")
|
|
|
|
|
|
|
|
|
|
proxies = []
|
|
|
|
|
for line in lines:
|
|
|
|
|
parts = line.split(":")
|
|
|
|
|
if len(parts) == 4:
|
|
|
|
|
proxy = {
|
|
|
|
|
"host": parts[0],
|
|
|
|
|
"port": parts[1],
|
|
|
|
|
"user": parts[2],
|
|
|
|
|
"password": parts[3],
|
|
|
|
|
"region": classifier(line),
|
|
|
|
|
"protocol": "http",
|
|
|
|
|
}
|
|
|
|
|
proxies.append(proxy)
|
|
|
|
|
|
|
|
|
|
with self._get_connection() as conn:
|
|
|
|
|
conn.executemany("""
|
|
|
|
|
INSERT INTO proxies (host, port, user, password, protocol, region)
|
|
|
|
|
VALUES (:host, :port, :user, :password, :protocol, :region)
|
|
|
|
|
""", proxies)
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
|
|
self.debug_print(f"成功导入 {len(proxies)} 条代理数据!")
|
2024-11-23 14:27:50 +00:00
|
|
|
|
except Exception as e:
|
2024-11-26 16:30:00 +00:00
|
|
|
|
self.debug_print(f"Error importing proxies: {str(e)}")
|
|
|
|
|
raise
|
2024-11-23 14:27:50 +00:00
|
|
|
|
|
2024-11-26 16:30:00 +00:00
|
|
|
|
def get_random_proxy_by_region(self, region: Optional[str] = None, remove_after_fetch: bool = False) -> Optional[Dict]:
|
2024-11-23 14:27:50 +00:00
|
|
|
|
"""
|
2024-11-26 16:30:00 +00:00
|
|
|
|
随机获取代理,支持按区域筛选
|
|
|
|
|
:param region: 国家/地区代码,若为 None 则随机选择
|
|
|
|
|
:param remove_after_fetch: 是否从数据库中删除取出的代理
|
|
|
|
|
:return: 随机选取的代理字典或 None
|
2024-11-23 14:27:50 +00:00
|
|
|
|
"""
|
2024-11-26 16:30:00 +00:00
|
|
|
|
with self._get_connection() as conn:
|
|
|
|
|
if region is None or region == "ALL":
|
|
|
|
|
cursor = conn.execute("SELECT * FROM proxies ORDER BY RANDOM() LIMIT 1")
|
|
|
|
|
else:
|
|
|
|
|
cursor = conn.execute("SELECT * FROM proxies WHERE region = ? ORDER BY RANDOM() LIMIT 1", (region,))
|
|
|
|
|
|
|
|
|
|
proxy = cursor.fetchone()
|
|
|
|
|
if proxy and remove_after_fetch:
|
|
|
|
|
conn.execute("DELETE FROM proxies WHERE id = ?", (proxy[0],))
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
|
|
return dict(zip(["id", "host", "port", "user", "password", "protocol", "region"], proxy)) if proxy else None
|
|
|
|
|
|
|
|
|
|
def get_proxy_count(self, region: Optional[str] = "ALL") -> int:
|
|
|
|
|
"""
|
|
|
|
|
获取指定区域的代理数量
|
|
|
|
|
:param region: 区域代码,默认为 "ALL"
|
|
|
|
|
:return: 代理数量
|
|
|
|
|
"""
|
|
|
|
|
with self._get_connection() as conn:
|
|
|
|
|
if region == "ALL":
|
|
|
|
|
cursor = conn.execute("SELECT COUNT(*) FROM proxies")
|
|
|
|
|
else:
|
|
|
|
|
cursor = conn.execute("SELECT COUNT(*) FROM proxies WHERE region = ?", (region,))
|
|
|
|
|
return cursor.fetchone()[0]
|
2024-11-23 14:27:50 +00:00
|
|
|
|
|
2024-11-26 16:30:00 +00:00
|
|
|
|
def get_summary(self) -> Dict[str, int]:
|
2024-11-23 14:27:50 +00:00
|
|
|
|
"""
|
2024-11-26 16:30:00 +00:00
|
|
|
|
获取所有区域的代理统计数量
|
|
|
|
|
:return: 包含区域代码和代理数量的字典
|
2024-11-23 14:27:50 +00:00
|
|
|
|
"""
|
2024-11-26 16:30:00 +00:00
|
|
|
|
with self._get_connection() as conn:
|
|
|
|
|
cursor = conn.execute("SELECT region, COUNT(*) FROM proxies GROUP BY region")
|
|
|
|
|
return {row[0]: row[1] for row in cursor.fetchall()}
|
2024-11-23 14:27:50 +00:00
|
|
|
|
|
2024-11-26 16:30:00 +00:00
|
|
|
|
def export_proxies(self, file_path: str, serializer=None):
|
|
|
|
|
"""
|
|
|
|
|
导出代理到文件
|
|
|
|
|
:param file_path: 文件路径
|
|
|
|
|
:param serializer: 可选的序列化函数,接受代理字典,返回字符串
|
|
|
|
|
"""
|
|
|
|
|
if serializer is None:
|
|
|
|
|
serializer = serializer_smartproxy # 使用默认的序列化器
|
2024-11-23 14:27:50 +00:00
|
|
|
|
|
|
|
|
|
try:
|
2024-11-26 16:30:00 +00:00
|
|
|
|
with self._get_connection() as conn:
|
|
|
|
|
cursor = conn.execute("SELECT host, port, user, password, protocol, region FROM proxies")
|
|
|
|
|
proxies = cursor.fetchall()
|
|
|
|
|
|
|
|
|
|
with open(file_path, "w") as file:
|
|
|
|
|
for proxy in proxies:
|
|
|
|
|
proxy_dict = dict(zip(["host", "port", "user", "password", "protocol", "region"], proxy))
|
|
|
|
|
try:
|
|
|
|
|
line = serializer(proxy_dict)
|
|
|
|
|
file.write(line + "\n")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self.debug_print(f"序列化失败: {e}")
|
|
|
|
|
continue # 跳过错误的代理数据
|
|
|
|
|
|
|
|
|
|
self.debug_print(f"成功导出代理到 {file_path}!")
|
|
|
|
|
except Exception as e:
|
|
|
|
|
self.debug_print(f"Error exporting proxies: {str(e)}")
|
|
|
|
|
raise
|
|
|
|
|
|
|
|
|
|
def get_proxies(self, region: Optional[str] = None) -> List[Dict]:
|
|
|
|
|
"""
|
|
|
|
|
获取指定区域的所有代理列表
|
|
|
|
|
:param region: 指定区域代码,如果为 None 或 "ALL",返回所有代理列表
|
|
|
|
|
:return: 包含代理信息的字典列表
|
|
|
|
|
"""
|
|
|
|
|
with self._get_connection() as conn:
|
|
|
|
|
if region is None or region == "ALL":
|
|
|
|
|
cursor = conn.execute("SELECT * FROM proxies")
|
2024-11-23 14:27:50 +00:00
|
|
|
|
else:
|
2024-11-26 16:30:00 +00:00
|
|
|
|
cursor = conn.execute("SELECT * FROM proxies WHERE region = ?", (region,))
|
|
|
|
|
|
|
|
|
|
rows = cursor.fetchall()
|
|
|
|
|
return [
|
|
|
|
|
dict(zip(["id", "host", "port", "user", "password", "protocol", "region"], row))
|
|
|
|
|
for row in rows
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
def clear(self):
|
|
|
|
|
"""清空数据库中的所有数据"""
|
|
|
|
|
with self._get_connection() as conn:
|
|
|
|
|
conn.execute("DELETE FROM proxies")
|
|
|
|
|
conn.commit()
|
|
|
|
|
self.debug_print("数据库已清空。")
|
|
|
|
|
|
|
|
|
|
def classifier_smartproxy(proxy_line):
|
|
|
|
|
"""
|
|
|
|
|
从代理行中提取区域代码
|
|
|
|
|
区域代码格式: "_area-XX_", 提取 "XX" 部分作为区域代码。
|
|
|
|
|
:param proxy_line: 代理行字符串
|
|
|
|
|
:return: 区域代码 (如 "PL") 或 "OTHER" 如果提取失败
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
# 找到 "_area-" 的起始位置
|
|
|
|
|
start_index = proxy_line.find("_area-")
|
|
|
|
|
if start_index == -1:
|
|
|
|
|
return "OTHER"
|
2024-11-23 14:27:50 +00:00
|
|
|
|
|
2024-11-26 16:30:00 +00:00
|
|
|
|
# 区域代码从 "_area-" 之后开始,到下一个 "_" 之前结束
|
|
|
|
|
start_index += len("_area-")
|
|
|
|
|
end_index = proxy_line.find("_", start_index)
|
|
|
|
|
if end_index == -1:
|
|
|
|
|
return "OTHER" # 无法找到结束符,返回 "OTHER"
|
|
|
|
|
|
|
|
|
|
# 提取区域代码并返回
|
|
|
|
|
region_code = proxy_line[start_index:end_index]
|
|
|
|
|
return region_code.upper() # 返回大写的区域代码
|
|
|
|
|
except Exception as e:
|
|
|
|
|
print(f"Error in region classification: {str(e)}")
|
|
|
|
|
return "OTHER"
|
2024-11-23 14:27:50 +00:00
|
|
|
|
|
2024-11-26 16:30:00 +00:00
|
|
|
|
def serializer_smartproxy(proxy: Dict) -> str:
|
|
|
|
|
"""
|
|
|
|
|
默认的代理导出序列化函数
|
|
|
|
|
:param proxy: 代理字典
|
|
|
|
|
:return: 格式化后的字符串,格式为 host:port:user:password 没有区域信息的原因在于,smartproxy 的格式里user 字段就包含了区域信息
|
|
|
|
|
"""
|
|
|
|
|
try:
|
|
|
|
|
# 使用 "|" 分隔符标记区域,方便后续导入时解析
|
|
|
|
|
return f"{proxy['host']}:{proxy['port']}:{proxy['user']}:{proxy['password']}"
|
|
|
|
|
except KeyError as e:
|
|
|
|
|
raise ValueError(f"代理信息缺少必要字段: {e}")
|
2024-11-23 14:27:50 +00:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
2024-11-26 16:30:00 +00:00
|
|
|
|
manager = ProxyManagerSQLite()
|
|
|
|
|
# 导入代理
|
|
|
|
|
manager.import_proxies_with_classifier("IP.txt", classifier=classifier_smartproxy)
|
|
|
|
|
|
|
|
|
|
# 获取汇总统计
|
|
|
|
|
print("代理统计:", manager.get_summary())
|
|
|
|
|
|
|
|
|
|
# 获取随机代理
|
|
|
|
|
proxy = manager.get_random_proxy_by_region(region="PL", remove_after_fetch=True)
|
|
|
|
|
print(f"取出的代理: {proxy}")
|
|
|
|
|
|
|
|
|
|
# 获取代理数量
|
|
|
|
|
print("所有代理总数:", manager.get_proxy_count("ALL"))
|
|
|
|
|
print("PL 区域代理数:", manager.get_proxy_count("PL"))
|
|
|
|
|
|
|
|
|
|
print("PL 区域当前列表:",manager.get_proxies("PL"))
|
|
|
|
|
|
|
|
|
|
print("目前所有的可用代理列表:",manager.get_proxies("ALL"))
|
|
|
|
|
|
|
|
|
|
manager.export_proxies("剩下的可用IP.txt")
|
|
|
|
|
|