python同步重庆时时彩的数据到本地,,依赖mysql,requ
python同步重庆时时彩的数据到本地,,依赖mysql,requ
依赖mysql,requests,torndb,从cp.360.com同步指定日期开始的时时彩数据回本地数据库中,支持增量更新,实时同步。1.[代码] 时时彩数据爬虫
#!/usr/bin/env python#-*-coding:utf-8-*-""" 从 cp.360.cn 同步时时彩的数据到表 haoma"""import datetimefrom datetime import timedeltaimport timeimport torndbimport sysimport requestsimport refrom mylogger import get_loggerreload(sys)sys.setdefaultencoding('utf-8')DBHOST = "localhost:3306"SCHEMA = "CAIPIAO"DBUSER = "user"DBPASSWD = "passwd"db = torndb.Connection(host=DBHOST, database=SCHEMA, user=DBUSER, password=DBPASSWD)cplog = get_logger("caipiao")class Data_Sync(object): ssc_re = re.compile(r'<td class=\'gray\'>(.*?)</td>(<td class=\'red big\'>|<td style=\'width:65px\'>)(.*?)</td>.*?<tr>') def __init__(self, start_date="20150101", sleep_secs = 10, run_ever=True): self.start_date = start_date if start_date > "20130101" else "20150101" self.run_ever = run_ever self.base_url = "http://chart.cp.360.cn/kaijiang/kaijiang?lotId=255401&spanType=2&span=" self.latest_date = '' self.latest_period = '' self.need_sleep = False self.sleep_secs = sleep_secs def run(self): while True: if self.need_sleep: time.sleep(self.sleep_secs) self.need_sleep = False else: self.sync_data_from_360() def sync_data_from_360(self): """ 根据数据库中最新一条数据,从 cp.360.com 同步数据至最新数据 """ self.get_latest_haoma_from_mysql() if not self.latest_date: cplog.info("db has no data, so start at {0}".format(self.start_date)) self.latest_date = self.start_date self.latest_period = "000" cplog.info("in db, item_date={0}, period={1}".format(self.latest_date, self.latest_period)) if self.latest_date: cur_date = datetime.datetime.utcnow() + timedelta(hours=8) latest_date = datetime.datetime.strptime(self.latest_date, "%Y%m%d") """ 更新规则: 1、检查是否同一天,如果不是,就下载数据,执行步骤2,增加天数,直到数据库日期与当前日一致; 2、检查数据库中的期数与下载回来的数据的最新期是否一致,一致,检查日期是否一致,是就跳过,否则插入数据; """ dl_times = 0 while (cur_date - latest_date).days > 0: if int(self.latest_period) < 120: dl_date = latest_date.strftime("%Y-%m-%d") dl_url = self.base_url + dl_date + "_" + dl_date data = self.download_with_requests(dl_url) if not data: if dl_times < 3: dl_times += 1 time.sleep(2) continue else: latest_date += timedelta(1) continue dl_times = 0 self.latest_date = latest_date.strftime('%Y%m%d') lottery_numbers = data[int(self.latest_period):] self.insert_into_mysql(self.latest_date, lottery_numbers) latest_date += timedelta(1) else: latest_date += timedelta(1) self.latest_period = "000" """ 更新当日数据 """ dl_date = latest_date.strftime("%Y-%m-%d") dl_url = self.base_url + dl_date + "_" + dl_date data = self.download_with_requests(dl_url) if data: lottery_numbers = data[int(self.latest_period):] self.latest_date = latest_date.strftime('%Y%m%d') self.insert_into_mysql(self.latest_date, lottery_numbers) def insert_into_mysql(self, item_date, datas): insert_datas = [] for data in datas: period = data[0] date_period = item_date + period lottery_number = data[2] if not re.search('\d+',2000 lottery_number): continue a, b, c, d, e = list(lottery_number) insert_data = (item_date, period, date_period, lottery_number, a, b, c, d, e) insert_datas.append(insert_data) if insert_datas: cplog.info("current insert into haoma:{0}, {1}".format(item_date, datas)) sql = "insert into haoma(item_date, period, date_period, lottery_number, a, b, c, d, e) values(%s, %s, %s, %s, %s, %s, %s, %s, %s)" try: db.executemany(sql, insert_datas) except Exception as e: print e sys.exit(1) else: cplog.info("no more new data to sync, wait for {0} seconds".format(self.sleep_secs)) self.need_sleep = True def get_latest_haoma_from_mysql(self): sql = "select * from haoma order by date_period desc limit 1" ret = db.get(sql) if ret: self.latest_date = ret.item_date self.latest_period = ret.period def download_with_requests(self, url): cplog.info("download: {0}".format(url)) data = [] try: r = requests.get(url, timeout=10) if r.status_code == 200: data = self.ssc_re.findall(r.content) else: cplog.info("download err, http status_code:{0}".format(r.status_code)) except Exception as e: cplog.info("call requests raise Exception: {0}".format(e)) finally: return datadef run(): sync = Data_Sync(start_date="20140101", sleep_secs=30) sync.run()if __name__ == "__main__": run()
编橙之家文章,
相关内容
- Python自动识别现接serial端口名称,pythonserial,Python自动识
- python 图灵机器人测试,python机器人测试,python 图灵机器
- Python实现论坛自动签到,python论坛签到,参考了各位大神
- python class实现模拟购物车代码,python购物车,<python
- Python有道字典查询单词,python有道字典,在Ubuntu下,Py
- python随机生成手机号,python手机号,python随机生成手机
- Python配置JDK编程环境,pythonjdk编程,用Python配置JDK
- python提取京东商品url获取产品价格,python产品价格,京东
- python代码查询市级城市天气,python代码查询,python代码查
- 自动更新高清电影文件中文名python代码,高清python,自动
评论关闭