使用python爬虫下载网易云歌单

爬虫练手之使用python爬虫实现网易云歌单批量、自动下载

from multiprocessing import Pool, cpu_count
from bs4 import BeautifulSoup
import requests
import re

def get_music_sheet():
    for music in main.find_all('a'):
        id = re.search('[0-9]+', music['href'])
        suburl = 'https://music.163.com' + music['href']
        html = requests.get(suburl, headers=headers).text
        soup = BeautifulSoup(html, 'lxml')
        yield soup, music, id

def get_music_info(soup, music, id, ):
    name = soup.find_all('a', {'class': 's-fc7'})[1].text + ' - ' + music.text
    url = 'http://music.163.com/song/media/outer/url?id=' + id.group() + '.mp3'
    data = requests.get(url)

    for errstr in name:
        if errstr in err_strList:
            index_name = err_strList.index(errstr)
            name = name.replace(errstr, re_strList[index_name])

    with open('./music/'  + name + '.mp3', 'wb') as f:
        f.write(data.content)

if __name__ == "__main__":
    base_url = '歌单URL,如:https://music.163.com/playlist?id=2246544491'

    headers = {
        'Referer': 'http://music.163.com/',
        'Host': 'music.163.com',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36',
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
    }

    count = 1
    err_strList = ['/', '\\', '<', '>', '|', ':', '?', '*', '"']
    re_strList = ['/', '\', '〈', '〉', '|', ':', '?', '﹡', '“']
    se = requests.session()
    se = BeautifulSoup(se.get(base_url, headers=headers).content, 'lxml')
    main = se.find('ul', {'class': 'f-hide'})
    p = Pool(cpu_count())

    for data in get_music_sheet():
        p.apply_async(get_music_info, args=(data[0], data[1], data[2]))
        print('已下载' + str(count) + '首')
        count += 1

    p.close()
    p.join()
    print('完成!')

1、base_url为你需要爬取的歌单

2、该程序只能下载最低音质

3、无法下载下架或有版权的歌曲

4、页面变更可能导致该程序失效

Github地址

  • 用支付宝打我
  • 用微信打我

Long may the sunshine

评论已关闭。

召唤蕾姆