niumalll/website/check_resources.py

#!/usr/bin/env python3
"""
检查网站资源文件完整性的脚本
"""

import os
import re
from pathlib import Path

def extract_resource_links(file_path):
    """从HTML文件中提取资源链接"""
    with open(file_path, 'r', encoding='utf-8') as f:
        content = f.read()

    # 匹配图片链接
    img_pattern = r'src="([^"]*\.(?:jpg|jpeg|png|gif|svg|ico))"'
    # 匹配CSS和JS链接（相对路径）
    resource_pattern = r'(?:src|href)="([^"]*\.(?:css|js))"'

    img_links = re.findall(img_pattern, content, re.IGNORECASE)
    resource_links = re.findall(resource_pattern, content, re.IGNORECASE)

    # 过滤掉外部链接
    local_img_links = [link for link in img_links if not link.startswith(('http://', 'https://', '//'))]
    local_resource_links = [link for link in resource_links if not link.startswith(('http://', 'https://', '//'))]

    return local_img_links + local_resource_links

def check_resources():
    """检查所有资源文件是否存在"""
    website_dir = Path(__file__).parent
    html_files = list(website_dir.glob('*.html'))

    missing_resources = []
    existing_resources = []

    for html_file in html_files:
        print(f"检查文件: {html_file.name}")
        resources = extract_resource_links(html_file)

        for resource in resources:
            resource_path = website_dir / resource

            if resource_path.exists():
                existing_resources.append((html_file.name, resource, "存在"))
            else:
                missing_resources.append((html_file.name, resource, "缺失"))

    # 输出结果
    print("\n=== 资源检查结果 ===")

    if missing_resources:
        print("\n❌ 缺失的资源文件:")
        for file, resource, status in missing_resources:
            print(f"  {file} -> {resource} ({status})")
    else:
        print("\n✅ 所有资源文件都存在")

    if existing_resources:
        print(f"\n✅ 存在的资源文件: {len(existing_resources)} 个")

    return missing_resources

if __name__ == "__main__":
    check_resources()