1
0
mirror of https://github.com/MetaCubeX/meta-rules-dat.git synced 2025-11-03 17:46:54 +08:00
This commit is contained in:
github-actions[bot]
2023-11-15 18:03:45 +08:00
parent 8026386706
commit 5273c5e5a2
9 changed files with 180 additions and 13 deletions

View File

@@ -0,0 +1,35 @@
103.com
123cha.com
95081.com
airasia.com
baid.us
baidu.jp
bussou.com
busytrade.com
cnbeta.com
cnbetacdn.com
cnpolitics.org
dm530.net
duanzhihu.com
dysfz.cc
emacs-china.org
galaxymacau.com
galstars.net
haitum.com
hostloc.com
jiaoyou8.com
kh.google.com
laonanren.com
mysinablog.com
ntrqq.com
nytlog.com
shuangtv.net
suppig.net
top
xclient.info
xjp.cc
yanghengjun.com
ydy.com
yslang.com
yysub.net
hamreus.com

0
resouces/direct.txt Normal file
View File

View File

@@ -0,0 +1,67 @@
#!/usr/bin/env python3
import sys
print(sys.argv[1], sys.argv[2])
''' Find redundant items in domain lists.
e.g. 'bar.foo.com' is redundant for 'foo.com'.
'''
def load(list):
''' Parse conf file & Prepare data structure
Returns: [ ['abc', 'com'],
['bar', 'foo', 'com'],
... ]
'''
results = []
with open(list, 'r') as f:
for line in f.readlines():
line = line.strip()
if line == '' or line.startswith('#'):
continue
# A domain name is case-insensitive and
# consists of several labels, separated by a full stop
domain_labels = line.lower().split('.')
results.append(domain_labels)
# Sort results by domain labels' length
results.sort(key=len)
return results
def find(labelses, removedDomainFile):
''' Find redundant items by a tree of top-level domain label to sub-level.
`tree` is like { 'com': { 'foo: { 'bar': LEAF },
'abc': LEAF },
'org': ... }
'''
tree = {}
LEAF = 1
for labels in labelses:
domain = '.'.join(labels)
# Init root node as current node
node = tree
while len(labels) > 0:
label = labels.pop()
if label in node:
# If child node is a LEAF node,
# current domain must be an existed domain or a subdomain of an existed.
if node[label] == LEAF:
print(f"Redundant found: {domain} at {'.'.join(labels)}")
with open(removedDomainFile, "a") as f:
f.write(domain)
f.write("\n")
break
else:
# Create a leaf node if current label is last one
if len(labels) == 0:
node[label] = LEAF
# Create a branch node
else:
node[label] = {}
# Iterate to child node
node = node[label]
if __name__ == '__main__':
find(load(sys.argv[1]), sys.argv[2])

View File

@@ -0,0 +1,3 @@
ifanr.com
weibo.com
www.baidu.com

2
resouces/proxy.txt Normal file
View File

@@ -0,0 +1,2 @@
supertop.co
hk.chinamobile.com

View File

@@ -0,0 +1,33 @@
4paradigm.com
addthis.com
addthisedge.com
alimama.alicdn.com
alimama.com
analytics.google.com
app.chat.xiaomi.net
bdtj.tagtic.cn
cdn.onesignal.com
click.discord.com
click.redditmail.com
ctrip.com
d.ifengimg.com
icons.mydrivers.com
img.alibaba.com
jav321.com
knet.cn
mail.tsinghua.edu.cn
mtalk.google.com
mx.technolutions.net
newrelic.com
offer.alibaba.com
pingjs.qq.com
qlogo.cn
resolver.msg.xiaomi.net
s.youtube.com
sf3-ttcdn-tos.pstatp.com
t.co
tagtic.cn
telegra.ph
tongji.baidu.com
tv.sohu.com
ue.yeyoucdn.com

0
resouces/reject.txt Normal file
View File

29
resouces/removeFrom.py Normal file
View File

@@ -0,0 +1,29 @@
import argparse
def remove_domains(file_to_remove, file_to_remove_from, output_file):
with open(file_to_remove, "r") as f_remove, open(
file_to_remove_from, "r"
) as f_from:
domains_to_remove = set(line.strip() for line in f_remove)
all_domains = set(line.strip() for line in f_from)
remaining_domains = all_domains - domains_to_remove
with open(output_file, "w") as output:
output.write("\n".join(remaining_domains))
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Remove domains from a file.")
parser.add_argument(
"-remove", required=True, help="File containing domains to be removed"
)
parser.add_argument(
"-from", required=True, dest="from_file", help="File to remove domains from"
)
parser.add_argument("-out", required=True, help="Output file")
args = parser.parse_args()
remove_domains(args.remove, args.from_file, args.out)