python测试代理速度源代码,python代理源代码,#!/usr/bin/e
文章由Byrx.net分享于2019-03-23 09:03:29
python测试代理速度源代码,python代理源代码,#!/usr/bin/e
#!/usr/bin/env python"""Get proxies from urls, and test their speed"""import urllib, re, time, threadingurls = ["<a href="http://proxy.ipcn.org/proxylist.html">http://proxy.ipcn.org/proxylist.html", "<a href="http://info.hustonline.net/index/proxyshow.aspx">http://info.hustonline.net/index/proxyshow.aspx" ] #where to get proxiesurls_proxy = {} #proxy used to connect urlsproxy_pattern = re.compile(r"""\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}:\\d{1,}""")test_url = "<a href="http://www.python.org/">http://www.python.org/"test_pattern = re.compile(r"""xs4all""")time_out = 30.0 #max waiting time to test proxiesoutput_file = "Proxies.txt"class TestTime(threading.Thread): """test a proxy's speed in new thread by recording its connect time""" def __init__(self, proxy): threading.Thread.__init__(self) self.proxy = proxy self.time = None self.stat = proxy + " time out!" def run(self): start = time.time() try: f = urllib.urlopen(test_url, proxies = {"http":"<a href="http://">http://"+self.proxy}) except: self.stat = self.proxy+" fails!" else: data = f.read() f.close() end = time.time() if test_pattern.search(data): #if data is matched self.time = end-start self.stat = self.proxy+" time: "+str(self.time) else: self.stat = self.proxy+" not matched!"def totest(proxy, result): """test a proxy's speed in time_out seconds""" test = TestTime(proxy) test.setDaemon(True) print "testing "+proxy test.start() test.join(time_out) #wait time_out seconds for testing print test.stat if test.time: result.append((test.time, proxy))if __name__ == "__main__": #get old proxies in output_file try: f = open(output_file) except: allproxies = set() else: allproxies = set([x[:-1] for x in f.readlines()]) f.close() #get else proxies from urls for url in urls: print "getting proxy from "+url try: f = urllib.urlopen(url, proxies=urls_proxy) except: print url+" can not open!\\n" else: data = f.read() f.close() allproxies.update(proxy_pattern.findall(data)) print url+" finished!" #test all proxies' speed result = [] for proxy in allproxies: #new thread to test every proxy t = threading.Thread(target=totest, args=(proxy, result)) t.setDaemon(True) t.start() #show all proxies' speed time.sleep(time_out+5.0) result.sort() for i in xrange(len(result)): print str(i+1)+"\\t"+result[i][1]+" \\t:\\t"+str(result[i][0]) #output needed proxies num = min(abs(int(raw_input("\\nHow many proxies to output: "))), len(result)) try: f = open(output_file, "w") except: print "Can not open output file!" else: f.writelines([x[1]+"\\n" for x in result[:num]]) f.close() print str(num)+" proxies are output."#该片段来自于http://byrx.net
相关内容
- 使用Python脚本格式化压缩后的JS文件,,lines = open
- 字符串去重复值,字符串重复值,以前在实际应用时,需
- 九九乘法表,乘法,print '\\n'.
- urllib2抓取网页时的错误处理,urllib2抓取网页,try: u
- python列出文件夹下的所有文件,python列出文件夹,#方法
- Python队列和堆栈,Python队列堆栈,#For a stack
- python windows平台锁定键盘,python锁定,pywin32中没有Bl
- Python 获取邮件地址,python获取邮件地址,import email
- Python 最长公共子串算法,python串算法,#!/usr/bin/e
- Python 实现enum的功能,python实现enum,class Enumer
评论关闭