Python学习之路-网易云音乐评论爬取

2021年6月24日 0條評論 3.92k次閱讀 22人按讚 Maaii

爬取指定歌单内所有歌曲的热评, 写得很垃, 不想改了

import csv
import json
import time
import urllib.request

code = 'utf-8'


def http_r(url, c):
    # 设置UA
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                      'Chrome/90.0.4430.212 Safari/537.36'
    }
    request = urllib.request.Request(url=url, headers=headers)
    return urllib.request.urlopen(request).read().decode(c)


def getUrls(playListID):  # 通过歌单ID利用接口获取歌曲列表并提取出歌曲ID
    s = []
    print('开始获取歌曲列表')
    for key in json.loads(http_r("https://api.fczbl.vip/163/?type=playlist&id=" + playListID, code)):
        s.append((key['url']).replace('https://api.fczbl.vip/163/?type=url&id=', ''))
    print('已获取 ' + str(len(s)) + ' 首歌曲')
    return s


def getContent(url, c):  # 获取接口内容, 判断是否获取失败
    back = http_r(url, c)
    while back[0:1] != '{' and ((json.loads(back))["code"] < 0):
        print('获取失败, 重试中')
        time.sleep(1)
        back = http_r(url, c)
    return back


def getComment(urls):  # 解析接口返回内容
    i = 0
    n = str(len(urls))
    url_1 = 'https://api.fczbl.vip/163/?type=single&id='  # 第三方API
    url_2 = 'https://music.163.com/api/v1/resource/comments/R_SO_4_'  # 网易云音乐评论API
    print("开始获取歌曲详情")
    newCsv()
    for mID in urls:
        i += 1
        print(mID + ' (' + str(i) + '/' + n + '): 正在获取歌曲详情')
        info_m = getContent(url_1 + mID, code)
        info_m = json.loads(info_m)
        print(mID + ' (' + str(i) + '/' + n + '): 正在获取评论列表')
        hotCommentsList = getContent(url_2 + mID, code)

        try:
            hotCommentsList = (json.loads(hotCommentsList))["hotComments"]  # 热评列表
        except Exception:
            print(mID + ' (' + str(i) + '/' + n + '): 获取失败已跳过')
            continue

        print(mID + ' (' + str(i) + '/' + n + '): 已获取到 ' + str(len(hotCommentsList)) + ' 条热评')
        hotComments = {}
        print(mID + ' (' + str(i) + '/' + n + '): 正在解析热评列表')
        for val in hotCommentsList:
            hotComments[val["user"]["nickname"]] = val["content"]
        info = {"title": info_m["name"], "author": info_m["artist"], "comment": hotComments}
        print(mID + ' (' + str(i) + '/' + n + '): ' + str(info))
        writeOut(info)
        time.sleep(2)


def writeOut(info):  # 将解析好的数据按格式写入文本
    with open('infos.csv', 'a', encoding='utf-8') as file_obj:  # 将数据追加写出到同级目录下的infos.txt中
        f_csv = csv.writer(file_obj)
        name = info['title']
        author = info['author']
        commentList = info['comment']

        for key in commentList:
            f_csv.writerow([name, author, key, commentList[key]])


def newCsv():
    with open('infos.csv', 'w', encoding='utf-8')as f:
        f_csv = csv.writer(f)
        f_csv.writerow(['歌曲名', '歌曲作者', '评论者', '评论'])


musicIDList = getUrls("926056136")  # 歌单ID
getComment(musicIDList)

MaaiiのBlog

Python学习之路-网易云音乐评论爬取

本作品採用知識共享署名-相同方式共享 4.0 國際許可協議進行許可

發佈留言取消回覆

MaaiiのBlog

本作品採用 知識共享署名-相同方式共享 4.0 國際許可協議 進行許可

發佈留言 取消回覆

本作品採用知識共享署名-相同方式共享 4.0 國際許可協議進行許可

發佈留言取消回覆