BeautifulSoup练习
爬取打榜动态 - 音悦Tai - 让娱乐更美好各地区排行榜
import requests
from bs4 import BeautifulSoup
def get_html(url):
r=requests.get(url,timeout=30)
r.raise_for_status
r.encoding = r.apparent_encoding
return r.text
def get_text(url):
if url[-2:]=='ALL':
diqu='总榜'
with open('abc.txt','a+')as f:
f.write(diqu+'\n')
elif url[-2:]=='ML':
diqu='内地榜'
with open('abc.txt','a+')as f:
f.write(diqu+'\n')
elif url[-2:]=='US':
diqu='欧美榜'
with open('abc.txt','a+')as f:
f.write(diqu+'\n')
elif url[-2:]=='JP':
diqu='日本榜'
with open('abc.txt','a+')as f:
f.write(diqu+'\n')
elif url[-2:]=='KR':
diqu='韩国榜'
with open('abc.txt','a+')as f:
f.write(diqu+'\n')
elif url[-2:]=='HT':
diqu='香港榜'
with open('abc.txt','a+')as f:
f.write(diqu+'\n')
html = get_html(url)
soup = BeautifulSoup(html,'lxml')
lis = soup.find_all('li',attrs={'name':'dmvLi'})
for li in lis:
song={}
if li.find('h3',attrs={'class':'asc_score'}):
song['分数']=li.find('h3',attrs={'class':'asc_score'}).text
else:
song['分数']=li.find('h3',attrs={'class':'desc_score'}).text
song['排名'] = li.find('div', class_='top_num').text
song['歌名'] = li.find('a', class_='mvname').text
song['歌手'] = li.find('a', class_='special').text
song['时间'] = li.find('p', class_='c9').text
with open('abc.txt','a+')as f:
f.write('排名:{} \t 歌名:{} \t 歌手:{} \t 分数:{} \t 时间:{} \n'.format(song['排名'],song['歌名'],song['歌手'],song['分数'],song['时间']))
def main():
urlt = 'http://vchart.yinyuetai.com/vchart/trends?area='
areas=['ALL','ML','HT','US','JP','KR']
for a in areas:
url=urlt+a
get_text(url)
if __name__=='__main__':
main()
编辑于 2018-04-23 18:07