日期生成def get_nday_list (n) : before_n_days = [] for i in range( 1 , n + 1 )[:: -1 ]: before_n_days.append(str(datetime.date.today() - datetime.timedelta(days= i))) return before_n_days a = get_nday_list( 30 ) print(a)
def create_assist_date (datestart = None,dateend = None) : # 創(chuàng)建日期輔助表 if datestart is None : datestart = '2016-01-01' if dateend is None : date datetime.datetime.now().strftime( '%Y-%m-%d' ) # 轉(zhuǎn)為日期格式 datestart=datetime.datetime.strptime(datestart, '%Y-%m-%d' ) dateend=datetime.datetime.strptime(dateend, '%Y -%m-%d' ) date_list = [] date_list.append(datestart.strftime( '%Y-%m-%d' )) 而 datestart<dateend: # 日期疊加一天 datestart+=datetime.timedelta(days=+1) # 日期轉(zhuǎn)字符串存入列表 date_list.append(datestart.strftime('%Y-%m-%d')) return date_list d_list = create_assist_date(datestart='2021-12-27', dateend='2021-12-30') d_list
保存數(shù)據(jù)到CSVif not os.path.exists( r'2021_data_%s.csv' % date): with open( '2021_data_%s.csv' % date, 'a+' , encoding= 'utf- 8' ) as f: f.write( '標(biāo)題,熱度,時(shí)間,url\n' ) for i in data: title = i[ 'title' ] extra = i[ 'extra' ] time = i[ 'time' ] 網(wǎng)址=我[ '網(wǎng)址'] 行 = '{},{},{},{}' .format(title,extra,time,url) f.write(row) f.write( '\n' ) else : with open( '2021_data_%s .csv ' % date, 'a+' , encoding= 'utf-8' ) as f: for i in data: title = i[ 'title' ] extra = i[ 'extra' ] time = i[ 'time' ] 網(wǎng)址=我[ '網(wǎng)址' ] 行 = '{},{},{},{}'。格式(標(biāo)題,額外,時(shí)間,網(wǎng)址) f.write(row) f.write( '\n' ) 帶背景顏色的 Pyecharts
requests 庫調(diào)用headers = { 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' , 'cookie' : 'some_cookie' } response = requests.request( 'GET' , url, headers=headers)
date_list = create_assist_date(mydate) url = 'https://test.test' files=[] headers = { 'user-agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit /537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36' , 'cookie' : '' } for d in date_list: payload={ 'p' : '10' , 'day' : d, 'nodeid': '1', 't': 'itemsbydate', 'c': 'node' } for i in range( 1 , 100 ): payload[ 'p' ] = str(i) print( '獲取頁面 %s 中 %s 的數(shù)據(jù)' % (d, str(i) )) response = requests.request( 'POST' , url, headers=headers, data=payload, files=files) items = response.json()[ 'data' ][ 'items' ] if items: save_data(items, d) 其他: 休息 Python 操作各種數(shù)據(jù)庫操作 Redis
rd = redis_conn_pool() rd.set( 'test_data' , 'mytest' ) 操作 MongoDB
commentList = res[ 'data' ][ 'commentList' ] mongo_collection.insert_many(commentList) 操作 MySQL
cursor.execute('SELECT VERSION()') # 使用 fetchone() 方法獲取一條數(shù)據(jù) data = cursor.fetchone() print 'Database version : %s ' % data # 關(guān)閉數(shù)據(jù)庫連接 db.close()
本地文件整理import os df_list = [] for i in os.listdir(): if 'csv' in i: day = i.split( '.' )[ 0 ].split( '_' )[ -1 ] df = pd.read_csv(i) df[ 'day' ] = day df_list.append(df) df = pd.concat(df_list, axis= 0 ) df.to_csv( 'total.txt' , index= 0 ) 多線程代碼
異步編程代碼import aiohttp import aiofiles async def get_html (session, url) : try : async with session.get(url=url, timeout= 8 ) as resp: if not resp.status // 100 == 2 : print(resp. status) print( '爬取' , url, '出現(xiàn)錯(cuò)誤' ) else : resp.encoding = 'utf-8' text = await resp.text() return text except Exception as e: print('出現(xiàn)錯(cuò)誤', e) await get_html(session, url)
|
|