155 lines
4.4 KiB
Python
155 lines
4.4 KiB
Python
from datetime import datetime
|
||
from urllib.parse import urlparse
|
||
|
||
import pytz
|
||
import requests
|
||
from bs4 import BeautifulSoup
|
||
from dateutil.parser import parse
|
||
from loguru import logger
|
||
|
||
import const
|
||
|
||
|
||
def check_website_status(url):
|
||
"""
|
||
检测网站是否可以正常访问,取决于 status == 200
|
||
:param url:网址
|
||
:return:True 可以访问,False 不可以。
|
||
"""
|
||
try:
|
||
response = requests.get(url, timeout=30) # Set timeout to 5 seconds
|
||
if response.status_code == 200:
|
||
return True
|
||
else:
|
||
logger.error(f"{url} 网站无法访问,状态码:{response.status_code}")
|
||
return False
|
||
except requests.Timeout as e:
|
||
logger.error(f"{url} 请求超时 30 秒,错误:{e}")
|
||
return False
|
||
except requests.ConnectionError as e:
|
||
logger.error(f"{url} 连接错误,错误:{e}")
|
||
return False
|
||
except requests.RequestException as e:
|
||
logger.error(f"{url} 网站无法访问,错误:{e}")
|
||
return False
|
||
except Exception as e:
|
||
logger.error(f"{url} 未知错误,错误:{e}")
|
||
return False
|
||
|
||
|
||
def get_domain(url):
|
||
"""
|
||
获取 url 注册域名,二级域名 + 顶级域名
|
||
:param url:url 地址
|
||
:return:注册域名
|
||
"""
|
||
parsed_uri = urlparse(url)
|
||
subdomain = parsed_uri.netloc.split('.')[-2] # 获取二级域名部分
|
||
top_domain = parsed_uri.netloc.split('.')[-1] # 获取顶级域名部分
|
||
return f"{subdomain}.{top_domain}"
|
||
|
||
|
||
def get_domain_life(url):
|
||
"""
|
||
域名注册天数
|
||
:param url:注册域名
|
||
:return:天数
|
||
"""
|
||
headers = {
|
||
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
|
||
}
|
||
domain = get_domain(url)
|
||
|
||
try:
|
||
response = requests.get(f"https://rdap.verisign.com/com/v1/domain/{domain}", headers=headers, timeout=30)
|
||
response.raise_for_status() # Raises stored HTTPError, if one occurred.
|
||
|
||
registration_date = response.json().get('events')[0].get('eventDate')
|
||
if registration_date is None:
|
||
logger.error("无效响应,未找到 'eventDate'")
|
||
return None
|
||
|
||
date_format = "%Y-%m-%dT%H:%M:%SZ"
|
||
|
||
# 将字符串转换为日期对象
|
||
your_date = datetime.strptime(registration_date, date_format)
|
||
|
||
# 当前日期
|
||
now = datetime.now() # 使用当前日期
|
||
|
||
# 计算天数差
|
||
delta = now - your_date
|
||
|
||
return delta.days
|
||
|
||
except requests.exceptions.HTTPError as err:
|
||
logger.error(f"HTTP 错误: {err}")
|
||
except requests.exceptions.RequestException as err:
|
||
logger.error(f"请求错误: {err}")
|
||
except ValueError as err:
|
||
logger.error(f"日期解析错误: {err}")
|
||
except Exception as err:
|
||
logger.error(f"未预期的错误: {err}")
|
||
|
||
return 0
|
||
|
||
|
||
def remove_html_tags(text):
|
||
"""
|
||
移除无用 html 标签
|
||
:param text:源文本
|
||
:return:文本
|
||
"""
|
||
return BeautifulSoup(text, "html.parser").get_text()
|
||
|
||
|
||
def get_yiyan():
|
||
"""
|
||
获取一言文学语句
|
||
:return:一言
|
||
"""
|
||
try:
|
||
response = requests.get("https://v1.hitokoto.cn/?c=d&min_length=16&max_length=20&encode=text",
|
||
timeout=30) # Set timeout to 5 seconds
|
||
if response.status_code == 200:
|
||
return response.text
|
||
else:
|
||
logger.error(f"一言网站无法访问,状态码:{response.status_code}")
|
||
return False
|
||
except requests.Timeout as e:
|
||
logger.error(f"一言请求超时 30 秒,错误:{e}")
|
||
return False
|
||
except requests.ConnectionError as e:
|
||
logger.error(f"一言连接错误,错误:{e}")
|
||
return False
|
||
except requests.RequestException as e:
|
||
logger.error(f"一言网站无法访问,错误:{e}")
|
||
return False
|
||
except Exception as e:
|
||
logger.error(f"一言未知错误,错误:{e}")
|
||
return False
|
||
|
||
|
||
def get_multiple_of_100(string):
|
||
"""
|
||
获取文章长度 100 的整除
|
||
:return:建议关键字数量
|
||
"""
|
||
length = len(string)
|
||
multiple = length // 100
|
||
if multiple < 1:
|
||
multiple = 1
|
||
return multiple
|
||
|
||
|
||
def format_datetime(dt_str):
|
||
"""
|
||
格式化时间字符串为指定格式
|
||
:param dt_str:时间字符串
|
||
:return:指定格式
|
||
"""
|
||
dt = parse(dt_str)
|
||
tz = pytz.timezone(const.TIME_ZONE)
|
||
formatted_dt = dt.astimezone(tz).strftime(const.FORMAT_TIME)
|
||
return formatted_dt
|