BeautifulSoup练习

爬取打榜动态 - 音悦Tai - 让娱乐更美好各地区排行榜

import requests
from bs4 import BeautifulSoup

def get_html(url):
    r=requests.get(url,timeout=30)
    r.raise_for_status
    r.encoding = r.apparent_encoding
    return r.text

def get_text(url):
    if url[-2:]=='ALL':
        diqu='总榜'
        with open('abc.txt','a+')as f:
            f.write(diqu+'\n')
    elif url[-2:]=='ML':
        diqu='内地榜'
        with open('abc.txt','a+')as f:
            f.write(diqu+'\n')
    elif url[-2:]=='US':
        diqu='欧美榜'
        with open('abc.txt','a+')as f:
            f.write(diqu+'\n')
    elif url[-2:]=='JP':
        diqu='日本榜'
        with open('abc.txt','a+')as f:
            f.write(diqu+'\n')
    elif url[-2:]=='KR':
        diqu='韩国榜'
        with open('abc.txt','a+')as f:
            f.write(diqu+'\n')
    elif url[-2:]=='HT':
        diqu='香港榜'
        with open('abc.txt','a+')as f:
            f.write(diqu+'\n')


    html = get_html(url)
    soup = BeautifulSoup(html,'lxml')
    lis = soup.find_all('li',attrs={'name':'dmvLi'})
    for li in lis:
        song={}
        if li.find('h3',attrs={'class':'asc_score'}):
            song['分数']=li.find('h3',attrs={'class':'asc_score'}).text
        else:
            song['分数']=li.find('h3',attrs={'class':'desc_score'}).text

        song['排名'] = li.find('div', class_='top_num').text
        song['歌名'] = li.find('a', class_='mvname').text
        song['歌手'] = li.find('a', class_='special').text
        song['时间'] = li.find('p', class_='c9').text

        with open('abc.txt','a+')as f:
            f.write('排名:{} \t 歌名:{} \t 歌手:{} \t 分数:{} \t 时间:{} \n'.format(song['排名'],song['歌名'],song['歌手'],song['分数'],song['时间']))



def main():
    urlt = 'http://vchart.yinyuetai.com/vchart/trends?area='
    areas=['ALL','ML','HT','US','JP','KR']
    for a in areas:
        url=urlt+a
        get_text(url)

if __name__=='__main__':
    main()

编辑于 2018-04-23 18:07