qinglong_auto_tools/scripts_check_nets.py

# -*- coding:utf-8 -*-
# 作者仓库:https://jihulab.com/spiritlhl/qinglong_auto_tools.git
# 觉得不错麻烦点个star谢谢
# 频道：https://t.me/qinglong_auto_tools

'''
cron: 1
new Env('单容器 二叉树查脚本网络链接');
'''

import time
import json
import re
import os

try:
    import requests
except Exception as e:
    print(e, "\n缺少requests 模块，请执行命令安装：pip3 install requests")

requests.packages.urllib3.disable_warnings()


def traversalDir_FirstDir(path):
    list = []
    if (os.path.exists(path)):
        files = os.listdir(path)
        for file in files:
            m = os.path.join(path, file)
            if (os.path.isdir(m)):
                h = os.path.split(m)
                list.append(h[1])
        return list


def read_ex(or_list):
    # 加载远程依赖剔除依赖文件的检索
    try:
        res1 = requests.get("https://jihulab.com/spiritlhl/dependence_scripts/-/raw/master/contents.json").json()
        time.sleep(5)
        res2 = requests.get("https://jihulab.com/spiritlhl/dependence_scripts/-/raw/master/utils.json").json()
        time.sleep(4)
        res3 = requests.get("https://jihulab.com/spiritlhl/dependence_scripts/-/raw/master/function.json").json()
        try:
            res1["documentation_url"]
            return
        except:
            try:
                res2["documentation_url"]
                return
            except:
                try:
                    res3["documentation_url"]
                    return
                except:
                    pass


    except:
        print("网络波动，稍后尝试")
        time.sleep(5)
        try:
            res1 = requests.get("https://jihulab.com/spiritlhl/dependence_scripts/-/raw/master/contents.json").json()
            time.sleep(5)
            res2 = requests.get("https://jihulab.com/spiritlhl/dependence_scripts/-/raw/master/utils.json").json()
            time.sleep(4)
            res3 = requests.get("https://jihulab.com/spiritlhl/dependence_scripts/-/raw/master/function.json").json()
            try:
                res1["documentation_url"]
                return
            except:
                try:
                    res2["documentation_url"]
                    return
                except:
                    try:
                        res3["documentation_url"]
                        return
                    except:
                        pass
        except:
            print("网络问题无法获取仓库文件列表，停止加载远程文件剔除依赖文件，直接本地检索")

    for i in res1:
        or_list.append(i["name"])
    for i in res2:
        or_list.append(i["name"])
    for i in res3:
        or_list.append(i["name"])
    or_list = list(set(or_list))
    return or_list


if __name__ == '__main__':
    # 获取主青龙任务

    # script根目录默认存在的文件夹，放入其中的文件夹不再检索
    or_list_o = ['node_modules', '__pycache__', 'utils', '.pnpm-store', 'function', 'tools', 'backUp', '.git', '.idea',
                 'fake_keys.txt', 'ec_config.txt']
    try:
        if os.environ["ec_read_dep"] == "true":
            print("已配置远程加载依赖文件名不查询\n")
            or_list = read_ex(or_list_o)
            if or_list == None:
                or_list = or_list_o
    except:
        print("#未配置远程加载依赖文件名不查询，有需要可添加配置")
        print("export ec_read_dep=\"true\"\n")
        or_list = or_list_o

    # 白名单
    try:
        expect_list = os.environ["ec_white_list"].split("@")
        print("已配置白名单\n")
    except:
        print("#未配置白名单，默认查询所有链接，有需要可添加配置")
        print("export ec_white_list=\"各种白名单关键词，用@分隔\"\n")
        expect_list = ["http://xxxx.xxxx.xxx/"]

    # 黑名单(屏蔽词)
    try:
        keys = os.environ["ec_black_keys"].split("@")
        print("已配置黑名单(屏蔽词)\n")
    except:
        print("#未配置黑名单，默认查询所有链接，有需要可添加配置")
        print("export ec_black_keys=\"各种黑名单屏蔽词，用@分隔\"\n")
        keys = ["http://xxxx.xxxx.xxx/"]
    keys = list(set(keys))

    print("============ 获取根目录脚本文件内容 ============\n")

    # 根目录
    dir_list = list(set(os.listdir("../") + os.listdir("./")) - set(or_list))
    data_script_list = []
    name_root = []
    if "db" not in os.listdir("../"):
        for i in dir_list:
            if i not in or_list and i[0:9] != "spiritLHL":
                try:
                    with open("../" + i, "r", encoding="utf-8") as f:
                        data_script_list.append(f.read())
                    name_root.append(i)
                except:
                    pass
    else:
        for i in dir_list:
            if i not in or_list and i[0:9] != "spiritLHL":
                try:
                    with open(i, "r", encoding="utf-8") as f:
                        data_script_list.append(f.read())
                    name_root.append(i)
                except:
                    pass

    # 筛出网址
    net_list = {}
    for i, k in zip(data_script_list, name_root):
        net_list[k] = []

    for i, k in zip(data_script_list, name_root):
        temp = re.findall(r"\"https://(.*?)\"", i)
        for j in temp:
            net_list[k].append("https://" + j)

    for i, k in zip(data_script_list, name_root):
        temp = re.findall(r"\"http://(.*?)\"", i)
        for j in temp:
            net_list[k].append("http://" + j)

    for i, k in zip(data_script_list, name_root):
        temp = re.findall(r"\'https://(.*?)\'", i)
        for j in temp:
            net_list[k].append("https://" + j)

    for i, k in zip(data_script_list, name_root):
        temp = re.findall(r"\'http://(.*?)\'", i)
        for j in temp:
            net_list[k].append("http://" + j)

    # 去重
    for i in net_list:
        net_list[i] = list(set(net_list[i]))
        for j in net_list[i]:
            if ".jd.com" in j or "." not in j or j in expect_list:
                net_list[i].remove(j)

    print()
    print("查询脚本，筛选网址中")
    print()

    # 输出找到的链接
    ## 根目录
    print("根目录文件\n")
    count_root = 0
    count_root_key = 0
    for k in net_list:
        if net_list[k] == []:
            print(k)
            print("无链接\n")
        else:
            print(k)
            for l in net_list[k]:
                print(l)
                count_root += 1
            print()
            for l in net_list[k]:
                for j in keys:
                    if j in l:
                        count_root_key += 1

    print()
    print("查到链接个数： {}".format(count_root))

    print()
    print("包含屏蔽词链接个数： {}".format(count_root_key))

    print("============ 根目录查询完毕 ============\n\n\n")

    ###################################################################################################

    # 仓库文件夹
    # 获取副青龙仓库目录脚本名字典
    tp_list = traversalDir_FirstDir("../")
    if "config" not in tp_list and "db" not in tp_list:
        zpath_list = traversalDir_FirstDir("../")
        try:
            zpath_list.remove("spiritLHL_qinglong_auto_tools")
        except:
            pass
        zpath_list = list(set(zpath_list) - set(or_list))
        dict_name = {}
        for i in zpath_list:
            dict_name[i] = []
            for j in list(set(os.listdir("../" + i)) - set(or_list)):
                if str(i)[0:9] != "spiritLHL":
                    dict_name[i].append(j)
    else:
        zpath_list = traversalDir_FirstDir("./")
        try:
            zpath_list.remove("spiritLHL_qinglong_auto_tools")
        except:
            pass
        zpath_list = list(set(zpath_list) - set(or_list))
        dict_name = {}
        for i in zpath_list:
            dict_name[i] = []
            for j in list(set(os.listdir("./" + i)) - set(or_list)):
                if str(i)[0:9] != "spiritLHL":
                    dict_name[i].append(j)

    # 查询
    if "config" not in tp_list and "db" not in tp_list:
        dict_net_list = {}
        for i in dict_name:
            dict_net_list[i] = {}
            for j in dict_name[i]:
                dict_net_list[i][j] = []
                with open("../" + i + "/" + j, "r", encoding="utf-8") as fp:
                    k = fp.read()
                    temp = re.findall(r"\"https://(.*?)\"", k)
                    for l in temp:
                        dict_net_list[i][j].append("https://" + l)

                    temp = re.findall(r"\"http://(.*?)\"", k)
                    for l in temp:
                        dict_net_list[i][j].append("http://" + l)

                    temp = re.findall(r"\'https://(.*?)\'", k)
                    for l in temp:
                        dict_net_list[i][j].append("https://" + l)

                    temp = re.findall(r"\'http://(.*?)\'", k)
                    for l in temp:
                        dict_net_list[i][j].append("http://" + l)

                    # 去重
                    dict_net_list[i][j] = list(set(dict_net_list[i][j]))
                    for m in dict_net_list[i][j]:
                        if ".jd.com" in m or "." not in m or m in expect_list:
                            dict_net_list[i][j].remove(m)
    else:
        dict_net_list = {}
        for i in dict_name:
            dict_net_list[i] = {}
            for j in dict_name[i]:
                dict_net_list[i][j] = []
                with open("./" + i + "/" + j, "r", encoding="utf-8") as fp:
                    k = fp.read()
                    temp = re.findall(r"\"https://(.*?)\"", k)
                    for l in temp:
                        dict_net_list[i][j].append("https://" + l)

                    temp = re.findall(r"\"http://(.*?)\"", k)
                    for l in temp:
                        dict_net_list[i][j].append("http://" + l)

                    temp = re.findall(r"\'https://(.*?)\'", k)
                    for l in temp:
                        dict_net_list[i][j].append("https://" + l)

                    temp = re.findall(r"\'http://(.*?)\'", k)
                    for l in temp:
                        dict_net_list[i][j].append("http://" + l)

                    # 去重
                    dict_net_list[i][j] = list(set(dict_net_list[i][j]))
                    for m in dict_net_list[i][j]:
                        if ".jd.com" in m or "." not in m or m in expect_list:
                            dict_net_list[i][j].remove(m)

    # 输出找到的链接
    count_dict = 0
    count_dict_key = 0
    for i in dict_net_list:
        print("====== 查询 {} 对应文件夹 =========\n".format(i))
        print("{}文件夹文件\n".format(i))
        for j in dict_net_list[i]:
            if dict_net_list[i][j] == []:
                print(j)
                print("无链接\n")
            else:
                print(j)
                for k in dict_net_list[i][j]:
                    print(k)
                    count_dict += 1
                print()
                for k in dict_net_list[i][j]:
                    for l in keys:
                        if l in k:
                            count_dict_key += 1

        print("{}文件夹内查到链接个数： {}\n".format(i, count_dict))

        print("{}文件夹内包含屏蔽词链接个数： {}\n".format(i, count_dict_key))

        print("====== {}对应文件夹查询完毕 =========\n".format(i))

    print()
    print("查询结束")