Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[漏洞] {获取弹幕时,部分视频获取失败} #799

Open
Munbo123 opened this issue Aug 22, 2024 · 0 comments
Open

[漏洞] {获取弹幕时,部分视频获取失败} #799

Munbo123 opened this issue Aug 22, 2024 · 0 comments
Labels
bug 漏洞

Comments

@Munbo123
Copy link

Python 版本: 3.12.5

模块版本: 16.2.0

运行环境: Windows

模块路径: bilibili_api.video.py

解释器: cpython
报错信息:

PS C:\Users\19722\Desktop\Coding>  c:; cd 'c:\Users\19722\Desktop\Coding'; & 'e:\Programming\Python\python312\python.exe' 'c:\Users\19722\.vscode\extensions\ms-python.debugpy-2024.10.0-win32-x64\bundled\libs\debugpy\adapter/../..\debugpy\launcher' '11421' '--' 'C:\Users\19722\Desktop\Coding\Languages\Python\test1.py'
成功获取
Traceback (most recent call last):
  File "C:\Users\19722\Desktop\Coding\Languages\Python\test1.py", line 21, in <module>
    sync(ass.make_ass_file_danmakus_protobuf(
  File "e:\Programming\Python\python312\Lib\site-packages\bilibili_api\utils\sync.py", line 33, in sync  
    return loop.run_until_complete(coroutine)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "e:\Programming\Python\python312\Lib\asyncio\base_events.py", line 687, in run_until_complete     
    return future.result()
           ^^^^^^^^^^^^^^^
  File "e:\Programming\Python\python312\Lib\site-packages\bilibili_api\ass.py", line 211, in make_ass_file_danmakus_protobuf
    danmakus = await v.get_danmakus(cid=cid, date=date)  # type: ignore
               ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "e:\Programming\Python\python312\Lib\site-packages\bilibili_api\video.py", line 883, in get_danmakus
    to_seg = view["dm_seg"]['total'] - 1
             ~~~~~~~~~~~~~~^^^^^^^^^
KeyError: 'total'

报错代码:

from bilibili_api import video,sync,ass


success_id = ['BV17f421B7gD', 'BV1ui421a7RX', 'BV1mS421X7Ct', 'BV1eE4m1R7XK', 'BV1XZ421T73j', 'BV1Xr421M7C2', 'BV1Kz421i7MU', 'BV1J1421t7FS', 'BV1bYp1etEVq', 'BV1tH4y1c7xx', 'BV1vKWFeGEL6', 'BV1W4WneJE6V', 'BV1zS411w7Wi', 'BV1xhpreVEbX', 'BV1qr421K7QH', 'BV1Wr421K7eS', 'BV1Z8WNecEab', 'BV1QW42197LE', 'BV1Zi421h7SU', 'BV1rZ421N77v', 'BV171421t7V9', 'BV13S42197nK', 'BV1VW42197iR', 'BV1Gb42177aL', 'BV1kzpXeiENi', 'BV1GE4m1d7aW', 'BV1FgWMekE1U', 'BV12S421X7Ev', 'BV1w1421t7Xw', 'BV1ri42167D4', 'BV1P4421f7cV', 'BV1XnW4e2EJb', 'BV1DLWpeeEyP', 'BV1BS421X7MK', 'BV1Br421K7A6', 'BV1Qz421i7JU', 'BV1SW42197b9', 'BV1L1421878K', 'BV1ui421a7ge', 'BV1oZ421T7TR', 'BV1JVebe4EFp', 'BV1kZ421N7TT', 'BV1bT421672e', 'BV1kW42197iE', 'BV1KE421w76n', 'BV18S421X795', 'BV18m42137pf', 'BV1mS421X7d9', 'BV1Qf421q7Pr', 'BV1Jf421B7x4', 'BV1xQeGexEyC', 'BV11w4m1r7Qp', 'BV1sM4m1y7FB', 'BV1LT421r7xQ', 'BV1AZ421K7H7']
fail_id = ['BV1vx4y147MZ', 'BV1fapyeuETc', 'BV13WpCedEjB', 'BV1ei421a7px', 'BV1XW42197H8', 'BV1Ci421h7K7', 'BV19W421X7Sz', 'BV1Qy411i7v6', 'BV1iH4y1c72z', 'BV11r421K7A5', 'BV1UU411U7q8', 'BV1iE4m1R7Fn', 'BV1QbWheoEgp', 'BV1ky411e71v', 'BV1cm42137hU', 'BV1MZ421K7n1', 'BV15H4y1c7Z1', 'BV1xb421J7Zv', 'BV1Mr421M7yB', 'BV1nU411S7bB', 'BV1wZ421K7cW', 'BV1dw4m1r7Tq', 'BV12S411w7eF', 'BV1Ex4y1s7tM', 'BV1F4421S7SE', 'BV1SVp2eFE5u', 'BV1RM4m127Sa', 'BV15T421r7HH', 'BV1XZ421N7aa', 'BV1dx4y1W7Cx', 'BV1y4421U7S9', 'BV1rb421J7nq', 'BV17r421K7Pb', 'BV12Z421T7XZ', 'BV1E4421S7Zc', 'BV1gw4m1k7LQ', 'BV1LM4m117aT', 'BV1XU411U7yN', 'BV1W142187C8', 'BV1qW42197Gq', 'BV1Xw4m1r7rq', 'BV1WT421r76h', 'BV1fw4m1k7R5', 'BV1Vb421771R', 'BV12i421h72j']




#使用不同的bv号进行弹幕的获取,fail_id那一栏里的全都获取失败,success_id全部获取成功
success = video.Video(success_id[0])  
fail = video.Video(fail_id[0])

sync(ass.make_ass_file_danmakus_protobuf(
    obj=success, # 生成弹幕文件的对象
    page=0, # 哪一个分 P (从 0 开始)
    out="success.ass" # 输出文件地址
))
print('成功获取')

sync(ass.make_ass_file_danmakus_protobuf(
    obj=fail, # 生成弹幕文件的对象
    page=0, # 哪一个分 P (从 0 开始)
    out="fail.ass" # 输出文件地址
))


我怀疑有问题的位置:

async def get_danmakus(
        self,
        page_index: int = 0,
        date: Union[datetime.date, None] = None,
        cid: Union[int, None] = None,
        from_seg: Union[int, None] = None,
        to_seg: Union[int, None] = None,
    ) -> List[Danmaku]:
        """
        获取弹幕。

        Args:
            page_index (int, optional): 分 P 号,从 0 开始。Defaults to None

            date       (datetime.Date | None, optional): 指定日期后为获取历史弹幕,精确到年月日。Defaults to None.

            cid        (int | None, optional): 分 P 的 ID。Defaults to None

            from_seg (int, optional): 从第几段开始(0 开始编号,None 为从第一段开始,一段 6 分钟). Defaults to None.

            to_seg (int, optional): 到第几段结束(0 开始编号,None 为到最后一段,包含编号的段,一段 6 分钟). Defaults to None.

            注意:
            - 1. 段数可以使用 `get_danmaku_view()["dm_seg"]["total"]` 查询。
            - 2. `from_seg` 和 `to_seg` 仅对 `date == None` 的时候有效果。
            - 3. 例:取前 `12` 分钟的弹幕:`from_seg=0, to_seg=1`

        Returns:
            List[Danmaku]: Danmaku 类的列表。
        """
        if date is not None:
            self.credential.raise_for_no_sessdata()

        if cid is None:
            if page_index is None:
                raise ArgsException("page_index 和 cid 至少提供一个。")

            cid = await self.__get_cid_by_index(page_index)

        session = get_session()
        aid = self.get_aid()
        params: dict[str, Any] = {"oid": cid, "type": 1, "pid": aid}
        if date is not None:
            # 获取历史弹幕
            api = API["danmaku"]["get_history_danmaku"]
            params["date"] = date.strftime("%Y-%m-%d")
            params["type"] = 1
            from_seg = to_seg = 0
        else:
            api = API["danmaku"]["get_danmaku"]
            if from_seg == None:
                from_seg = 0
            if to_seg == None:
                view = await self.get_danmaku_view(cid=cid)

                #=======改动处========#
                # to_seg = view["dm_seg"]['total'] - 1     原代码为这一行,修改后之前失败的bv号再次实验则一切正常  
                to_seg = view["dm_seg"].get("total",1) - 1
                #=======改动处========#
        danmakus = []

        for seg in range(from_seg, to_seg + 1):
            if date is None:
                # 仅当获取当前弹幕时需要该参数
                params["segment_index"] = seg + 1

            config = {}
            config["url"] = api["url"]
            config["params"] = params
            config["headers"] = {
                "Referer": "https://www.bilibili.com",
                "User-Agent": "Mozilla/5.0",
            }
            config["cookies"] = self.credential.get_cookies()

            try:
                req = await session.get(**config)
            except Exception as e:
                raise NetworkException(-1, str(e))

            if "content-type" not in req.headers.keys():
                break
            else:
                content_type = req.headers["content-type"]
                if content_type != "application/octet-stream":
                    raise ResponseException("返回数据类型错误:")

            # 解析二进制流数据
            data = req.read()
            if data == b"\x10\x01":
                # 视频弹幕被关闭
                raise DanmakuClosedException()

            reader = BytesReader(data)
            while not reader.has_end():
                type_ = reader.varint() >> 3
                if type_ != 1:
                    if type_ == 4:
                        reader.bytes_string()
                        # 什么鬼?我用 protoc 解析出乱码!
                    elif type_ == 5:
                        # 大会员专属颜色
                        reader.varint()
                        reader.varint()
                        reader.varint()
                        reader.bytes_string()
                    elif type_ == 13:
                        # ???
                        continue
                    else:
                        raise ResponseException("解析响应数据错误")

                dm = Danmaku("")
                dm_pack_data = reader.bytes_string()
                dm_reader = BytesReader(dm_pack_data)

                while not dm_reader.has_end():
                    data_type = dm_reader.varint() >> 3

                    if data_type == 1:
                        dm.id_ = dm_reader.varint()
                    elif data_type == 2:
                        dm.dm_time = dm_reader.varint() / 1000
                    elif data_type == 3:
                        dm.mode = dm_reader.varint()
                    elif data_type == 4:
                        dm.font_size = dm_reader.varint()
                    elif data_type == 5:
                        color = dm_reader.varint()
                        if color != 60001:
                            color = hex(color)[2:]
                        else:
                            color = "special"
                        dm.color = color
                    elif data_type == 6:
                        dm.crc32_id = dm_reader.string()
                    elif data_type == 7:
                        dm.text = dm_reader.string()
                    elif data_type == 8:
                        dm.send_time = dm_reader.varint()
                    elif data_type == 9:
                        dm.weight = dm_reader.varint()
                    elif data_type == 10:
                        dm.action = str(dm_reader.string())
                    elif data_type == 11:
                        dm.pool = dm_reader.varint()
                    elif data_type == 12:
                        dm.id_str = dm_reader.string()
                    elif data_type == 13:
                        dm.attr = dm_reader.varint()
                    elif data_type == 14:
                        dm.uid = dm_reader.varint()
                    elif data_type == 15:
                        dm_reader.varint()
                    elif data_type == 20:
                        dm_reader.bytes_string()
                    elif data_type == 21:
                        dm_reader.bytes_string()
                    elif data_type == 22:
                        dm_reader.bytes_string()
                    elif data_type == 25:
                        dm_reader.varint()
                    elif data_type == 26:
                        dm_reader.varint()
                    else:
                        break
                danmakus.append(dm)
        return danmakus

我尝试爬取100个视频的弹幕文件时,发现有将近一半的视频(45个)无法正常获取,查看代码后,发现是在更新to_seg时,字典view["dm_seg"]的值为空,但去b站打开视频发现还是有弹幕的,估计是因为时长太短或者弹幕太少的缘故,这些数据的view["dm_seg"]为空,而原代码没有判断直接获取‘total’进行计算,导致出错。也不是什么大问题,望修改,谢谢

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bug 漏洞
Projects
None yet
Development

No branches or pull requests

1 participant