host_lists_as_forward_zones.py 8.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277
  1. #!/usr/bin/env python
  2. # encoding: utf-8
  3. '''
  4. '''
  5. import requests
  6. import sys
  7. if sys.version_info[0] == 2:
  8. reload(sys)
  9. sys.setdefaultencoding('utf-8')
  10. # Name of the forward zone file you'll use in your recursor.conf
  11. PDNS_ZONE_FILE = '/etc/pdns/null.forward.zone'
  12. UNBOUND_ZONE_FILE = '/etc/unbound/null.zone'
  13. FWD_TO = '127.0.0.2:6666'
  14. # File that contains one domain per line to whitelist
  15. # Can accept comments after domain:
  16. # xxxx.yyy # Domain required for zzzz.com
  17. DOMAIN_WHITELIST_FILE = '/etc/pdns/bh_whitelist'
  18. # Same as above but for hosts you need blacklisted that are not part of the lists
  19. DOMAIN_BLACKLIST_FILE = '/etc/pdns/bh_blacklist'
  20. #PDNS_ZONE_FILE = '/tmp/null.forward.zone'
  21. #UNBOUND_ZONE_FILE = '/tmp/null.zone'
  22. #DOMAIN_WHITELIST_FILE = '/tmp/bh_whitelist'
  23. # Well known hosts lists
  24. HOST_FILE_URL = ['http://someonewhocares.org/hosts/hosts',
  25. 'https://hosts-file.net/download/hosts.txt',
  26. 'http://winhelp2002.mvps.org/hosts.txt',
  27. 'http://www.malwaredomainlist.com/hostslist/hosts.txt',
  28. 'https://pgl.yoyo.org/adservers/serverlist.php?hostformat=hosts;showintro=0'
  29. ]
  30. # Easylist, feel free to add easyprivacy/fanboy to the list
  31. EASY_LIST_URL = ['https://easylist.to/easylist/easylist.txt',
  32. 'https://raw.githubusercontent.com/paulgb/BarbBlock/master/BarbBlock.txt'
  33. ]
  34. # Disconnect list. Set categories you want to remove
  35. DISCONNECT_LIST_URL = 'https://services.disconnect.me/disconnect-plaintext.json'
  36. #['Advertising', 'Analytics', 'Disconnect', 'Social']
  37. DISCONNECT_CATEGORIES = ['Advertising', 'Analytics']
  38. def process_host_file_url(white_list, srv_mode):
  39. host_list = []
  40. for url in HOST_FILE_URL:
  41. try:
  42. r = requests.get(url)
  43. except:
  44. sys.exit()
  45. if r.status_code != 200:
  46. # Continue to next url
  47. continue
  48. else:
  49. for line in r.iter_lines():
  50. try:
  51. # If utf8 decode fails jumps next item
  52. line = line.decode('utf-8')
  53. except:
  54. continue
  55. if line.startswith('127.0.0.1') or line.startswith('0.0.0.0'):
  56. # Remove ip
  57. try:
  58. n_host = line.split('127.0.0.1')[1]
  59. except IndexError:
  60. n_host = line.split('0.0.0.0')[1]
  61. except:
  62. continue
  63. # Fix some host lists having \t instead of space
  64. if n_host.startswith('\t'):
  65. n_host = n_host.lstrip('\t')
  66. # Ensure we only keep host as some list add comments
  67. n_host = n_host.split('#')[0].rstrip()
  68. # Some leave ports
  69. n_host = n_host.split(':')[0]
  70. # Some leave spaces prefixed
  71. n_host = n_host.replace(' ', '')
  72. # Remove local domains
  73. if n_host == 'localhost.localdomain' or n_host == 'localhost':
  74. continue
  75. # Now add the hosts to the list
  76. if n_host not in white_list:
  77. if srv_mode == 'unbound':
  78. host_list.append('local-zone: "{0}" static'.format(n_host))
  79. else:
  80. host_list.append('{0}={1}'.format(n_host, FWD_TO))
  81. return sorted(host_list)
  82. def process_easylist_url(white_list, host_list, srv_mode):
  83. for url in EASY_LIST_URL:
  84. try:
  85. r = requests.get(url)
  86. except:
  87. sys.exit()
  88. if r.status_code != 200:
  89. # Continue to next url
  90. continue
  91. else:
  92. for line in r.iter_lines():
  93. try:
  94. # If utf8 decode fails jumps next item
  95. line = line.decode('utf-8')
  96. except:
  97. continue
  98. if line.startswith('||'):
  99. # I don't want to bother with wildcards
  100. if '*' in line:
  101. continue
  102. # Keep domain
  103. try:
  104. n_host = line.split('^')[0]
  105. except:
  106. continue
  107. # and get rid of those '$'
  108. try:
  109. n_host = n_host.split('$')[0]
  110. except IndexError:
  111. pass
  112. # Remove leading '||'
  113. n_host = n_host.lstrip('||')
  114. # Some entries are urls
  115. if '/' in n_host:
  116. n_host = n_host.split('/')[0]
  117. # Some entries are no domains...
  118. if '.' not in n_host:
  119. continue
  120. # Now add the hosts to the list
  121. if n_host not in white_list:
  122. if srv_mode == 'unbound':
  123. host_list.append('local-zone: "{0}" static'.format(n_host))
  124. else:
  125. host_list.append('{0}={1}'.format(n_host, FWD_TO))
  126. return host_list
  127. def process_disconnect_url(white_list, host_list, srv_mode):
  128. try:
  129. r = requests.get(DISCONNECT_LIST_URL)
  130. except:
  131. sys.exit()
  132. if r.status_code == 200:
  133. try:
  134. j = r.json()
  135. except:
  136. print('Seems like we did not fetch a json dict')
  137. sys.exit()
  138. else:
  139. print('Incorrect return code from {0}: {1}'.format(DISCONNECT_LIST_URL, r.status_code))
  140. sys.exit()
  141. if 'categories' in j:
  142. for category in j['categories']:
  143. if category in DISCONNECT_CATEGORIES:
  144. for sub_dict in j['categories'][category]:
  145. for entity in sub_dict:
  146. for main_url in sub_dict[entity]:
  147. h_list = sub_dict[entity][main_url]
  148. if isinstance(h_list, list):
  149. for host in h_list:
  150. if host not in white_list:
  151. if srv_mode == 'unbound':
  152. host_list.append('local-zone: "{0}" static'.format(host))
  153. else:
  154. host_list.append('{0}={1}'.format(host, FWD_TO))
  155. else:
  156. print('"categories" key not found in dict, nothing to process')
  157. sys.exit()
  158. # Return the list sorted
  159. return host_list
  160. def process_black_list(black_list, host_list, srv_mode):
  161. for bl_host in black_list:
  162. if srv_mode == 'unbound':
  163. host_list.append('local-zone: "{0}" static'.format(bl_host))
  164. else:
  165. host_list.append('{0}={1}'.format(bl_host, FWD_TO))
  166. # Return the list sorted
  167. return sorted(list(set(host_list)))
  168. def build_whitelist():
  169. white_list = []
  170. try:
  171. f = open(DOMAIN_WHITELIST_FILE, 'r')
  172. except:
  173. return white_list
  174. # Loop over the line and append them to the the list
  175. for line in f.readlines():
  176. # If there's a comment
  177. if '#' in line:
  178. white_list.append(line.split('#')[0].strip())
  179. else:
  180. white_list.append(line.strip())
  181. return white_list
  182. def build_blacklist():
  183. black_list = []
  184. try:
  185. f = open(DOMAIN_BLACKLIST_FILE, 'r')
  186. except:
  187. return black_list
  188. # Loop over the line and append them to the the list
  189. for line in f.readlines():
  190. # If there's a comment
  191. if '#' in line:
  192. black_list.append(line.split('#')[0].strip())
  193. else:
  194. black_list.append(line.strip())
  195. return black_list
  196. def make_zone_file(host_list, zone_file):
  197. f = open(zone_file, 'w')
  198. f.write('\n'.join(host_list))
  199. def main():
  200. # Build whitelist/blacklist
  201. white_list = build_whitelist()
  202. black_list = build_blacklist()
  203. # Check if we want to output an unbound void zone
  204. if len(sys.argv) >= 2:
  205. if '-u' in sys.argv:
  206. zone_file = UNBOUND_ZONE_FILE
  207. srv_mode = 'unbound'
  208. else:
  209. zone_file = PDNS_ZONE_FILE
  210. srv_mode = 'pdns'
  211. # Build a bh domain list from our URLs
  212. host_list = process_host_file_url(white_list, srv_mode)
  213. host_list = process_easylist_url(white_list, host_list, srv_mode)
  214. # Use disconnect list only if explicitely asked
  215. if len(sys.argv) >= 2:
  216. if '-d' in sys.argv:
  217. host_list = process_disconnect_url(white_list, host_list, srv_mode)
  218. # Add hosts from blacklist and return sorted host_list
  219. host_list = process_black_list(black_list, host_list, srv_mode)
  220. # Create pdns file
  221. make_zone_file(host_list, zone_file)
  222. if __name__ == "__main__":
  223. main()