mirror of
				https://github.com/MetaCubeX/meta-rules-dat.git
				synced 2025-11-03 17:46:54 +08:00 
			
		
		
		
	test
This commit is contained in:
		
							
								
								
									
										67
									
								
								resouces/findRedundantDomain.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										67
									
								
								resouces/findRedundantDomain.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,67 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
print(sys.argv[1], sys.argv[2])
 | 
			
		||||
 | 
			
		||||
''' Find redundant items in domain lists.
 | 
			
		||||
    e.g. 'bar.foo.com' is redundant for 'foo.com'.
 | 
			
		||||
'''
 | 
			
		||||
 | 
			
		||||
def load(list):
 | 
			
		||||
    ''' Parse conf file & Prepare data structure
 | 
			
		||||
        Returns: [ ['abc', 'com'],
 | 
			
		||||
                   ['bar', 'foo', 'com'],
 | 
			
		||||
                   ... ]
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    results = []
 | 
			
		||||
    with open(list, 'r') as f:
 | 
			
		||||
        for line in f.readlines():
 | 
			
		||||
            line = line.strip()
 | 
			
		||||
            if line == '' or line.startswith('#'):
 | 
			
		||||
                continue
 | 
			
		||||
            # A domain name is case-insensitive and
 | 
			
		||||
            # consists of several labels, separated by a full stop
 | 
			
		||||
            domain_labels = line.lower().split('.')
 | 
			
		||||
            results.append(domain_labels)
 | 
			
		||||
 | 
			
		||||
    # Sort results by domain labels' length
 | 
			
		||||
    results.sort(key=len)
 | 
			
		||||
    return results
 | 
			
		||||
 | 
			
		||||
def find(labelses, removedDomainFile):
 | 
			
		||||
    ''' Find redundant items by a tree of top-level domain label to sub-level.
 | 
			
		||||
        `tree` is like { 'com': { 'foo: { 'bar': LEAF },
 | 
			
		||||
                                  'abc': LEAF },
 | 
			
		||||
                         'org': ... }
 | 
			
		||||
    '''
 | 
			
		||||
 | 
			
		||||
    tree = {}
 | 
			
		||||
    LEAF = 1
 | 
			
		||||
    for labels in labelses:
 | 
			
		||||
        domain = '.'.join(labels)
 | 
			
		||||
        # Init root node as current node
 | 
			
		||||
        node = tree
 | 
			
		||||
        while len(labels) > 0:
 | 
			
		||||
            label = labels.pop()
 | 
			
		||||
            if label in node:
 | 
			
		||||
                # If child node is a LEAF node,
 | 
			
		||||
                # current domain must be an existed domain or a subdomain of an existed.
 | 
			
		||||
                if node[label] == LEAF:
 | 
			
		||||
                    print(f"Redundant found: {domain} at {'.'.join(labels)}")
 | 
			
		||||
                    with open(removedDomainFile, "a") as f:
 | 
			
		||||
                        f.write(domain)
 | 
			
		||||
                        f.write("\n")
 | 
			
		||||
                    break
 | 
			
		||||
            else:
 | 
			
		||||
                # Create a leaf node if current label is last one
 | 
			
		||||
                if len(labels) == 0:
 | 
			
		||||
                    node[label] = LEAF
 | 
			
		||||
                # Create a branch node
 | 
			
		||||
                else:
 | 
			
		||||
                    node[label] = {}
 | 
			
		||||
            # Iterate to child node
 | 
			
		||||
            node = node[label]
 | 
			
		||||
 | 
			
		||||
if __name__ == '__main__':
 | 
			
		||||
    find(load(sys.argv[1]), sys.argv[2])
 | 
			
		||||
		Reference in New Issue
	
	Block a user