Python爬虫源码,腾讯课堂后台直播课程视频下载源码!

2023-10-23 18:14:00 浏览数 (1)

腾讯课堂是知名大厂的在线学习平台,其中在线直播课是不少机构引流和交易转化的利器工具,平台需要10个点的服务费用,营业额流水的10%,扣点还是比较高的,不过腾讯系平台可以全推,不担心被封禁!

如果想要在鹅系推广课程,腾讯课堂是必不可少的,比如公众号推广或者广点通,qq,微信朋友圈等推广宣传。

这里本渣渣写了一个工具,可批量下载腾讯课堂后台课程视频资源,包括直播课及录播课课程资源,前提你得是腾讯课堂后台管理员,官方未提供批量下载工具,故有了本篇文章分享!

抓包分析

通过浏览器抓包分析可知:

  1. 访问方式

不管是视频列表获取,还是视频真实链接获取,其访问方式都是以 get 方式获取即可,这里直接用 requests 来模拟访问即可。

  1. 访问协议头

模拟访问需要携带协议头,这里的协议头里需要添加常规的几个参数,包括cookies、ua以及referer。

  1. 视频链接获取

视频链接的获取需要先获取到视频的id编号,然后再通过 get 方式访问获取到视频链接地址。

参考源码

由于是管理平台,几乎没有反爬限制,只需要抓包获取到模拟提交的参数即可,故这里提供本渣渣写的仅供参考和学习!

注意:源码中的参数:cookie、appid、bkn 三个参数需自行填写你自己账号的参数,需自行补齐!

代码语言:javascript复制
#腾讯课堂后台获取视频列表链接及下载视频
# -*- coding: UTF-8 -*-
#@author:huguo00289
import requests
import time
import json
import random
import os
import re
from contextlib import closing
import datetime


def get_veideo(page):
    headers = {
        "cookie": cookie,
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
        "referer": "https://ke.qq.com/admin/index.html",
    }

    url = "https://ke.qq.com/cgi-proxy/agency/get_video_list?"
    params = {
        "aid": 85809,
        "count": 10,
        "filter": '{"tid_list":[],"type":"VTPlayBack","need_task_info":true}',
        "page": page,
        "bkn": 1334101334,
        "t": 0.5303,
    }
    response = requests.get(url=url, headers=headers, params=params, timeout=6)
    print(response.status_code)
    print(response.json())
    result = response.json()
    with open(f'{page}.json','w',encoding='utf-8') as f:
        json.dump(result,f)
    print(f">> 保存第{page}页视频列表数据成功!")
    write_txt(f">> 保存第{page}页视频列表数据成功!")
    vlists=result['result']['vlist']
    for vlist in vlists:
        print(vlist)
        feilo_name=vlist['name']
        fid=vlist['id']
        pattern = r"[/\:*?"<>|]"
        feilo_name = re.sub(pattern, "_", feilo_name)  # 替换为下划线
        file=f'{feilo_name}-{fid}/'
        with open(f'{page}_video.txt', 'a ', encoding='utf-8') as f:
            f.write(f'{feilo_name}n{file}n')
        os.makedirs(file,exist_ok=True)
        print(feilo_name)
        sub_videos=vlist['sub_video']
        i=vlist['count']
        for sub_video in sub_videos:
            # print(sub_video)
            video_name=sub_video['name']
            patt = r"[/\:*?"<>|]"
            video_name = re.sub(patt, "_", video_name)  # 替换为下划线
            video_id=sub_video['vid']
            print(i,video_id, video_name)
            video_name=f'{i}_{video_name}'
            with open(f'{page}_video.txt', 'a ', encoding='utf-8') as f:
                f.write(f'{file}-{feilo_name}-{i}-{video_id}-{video_name}n')
            try:
                veideourl=get_down_veideo(video_id)
                time.sleep(2)
                down_vei(file, veideourl, video_name)
                time.sleep(4)
            except Exception as e:
                print(f"!! 运行获取下载视频错误:{e}")
                write_txt(f"!! 运行获取下载视频错误:{e}")
                write_txt(f"&& 保存数据:{i}-{video_id}-{video_name}")
            i=i-1
            time.sleep(10)
        with open(f'{page}_video.txt', 'a ', encoding='utf-8') as f:
            f.write("n-n")

#写入文本
def write_txt(content):
    # 获取当前日期和时间
    now = datetime.datetime.now()
    with open('log.txt','a ',encoding='utf-8') as f:
        f.write(f'{now}--{content}n')


#下载视频
def down_vei(file,veideourl,name):
    ua_list = [
        'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/535.1 (KHTML, like Gecko) Chrome/14.0.835.163 Safari/535.1',
        'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/73.0.3683.103 Safari/537.36Chrome 17.0',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_0) AppleWebKit/535.11 (KHTML, like Gecko) Chrome/17.0.963.56 Safari/535.11',
        'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:6.0) Gecko/20100101 Firefox/6.0Firefox 4.0.1',
        'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.6; rv:2.0.1) Gecko/20100101 Firefox/4.0.1',
        'Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10_6_8; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Mozilla/5.0 (Windows; U; Windows NT 6.1; en-us) AppleWebKit/534.50 (KHTML, like Gecko) Version/5.1 Safari/534.50',
        'Opera/9.80 (Windows NT 6.1; U; en) Presto/2.8.131 Version/11.11',
    ]
    headers = {
        "user-agent": random.choice(ua_list),
    }
    hz=veideourl.split('?')[0]
    hz =hz.split('.')[-1]
    veideo_name=f'{name}.{hz}'
    with closing(requests.get(veideourl, timeout=10, headers=headers, stream=True)) as response:
        chunk_size = 1024  # 单次请求最大值
        content_size = int(response.headers['content-length'])  # 文件总大小
        data_count = 0  # 当前已传输的大小
        with open(f'{file}{veideo_name}', "wb") as file:
            for data in response.iter_content(chunk_size=chunk_size):
                file.write(data)
                done_block = int((data_count / content_size) * 50)  # 已经下载的文件大小
                data_count = data_count   len(data)  # 实时进度条进度
                now_jd = (data_count / content_size) * 100  # %% 表示%
                print("r %s [%s%s] %d%% " % (
                veideo_name   "---->", done_block * '█', ' ' * (50 - 1 - done_block), now_jd), end=" ")

    write_txt(f">> 下载视频成功:{veideo_name}!")


def get_down_veideo(file_id):
    headers={
        "cookie": cookie,
        "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/109.0.0.0 Safari/537.36",
        "referer":"https://ke.qq.com/admin/index.html",
    }

    url="https://ke.qq.com/cgi-proxy/txcloud/GetVideoDownUrl?"
    params={
        "appid": appid,
        "file_id": file_id,
        "bkn": bkn,
        "t": 0.6232,
    }
    response=requests.get(url=url,headers=headers,params=params,timeout=6)
    print(response.status_code)
    print(response.json())
    result=response.json()
    veideourl=result['result']['url']
    print(veideourl)
    write_txt(f">> 获取视频链接成功:{veideourl}!")
    return veideourl



if __name__=="__main__":
    page=input(">> 请输入需要获取的视频数据页面:")
    print(f"正在获取第{page}页视频列表数据...")
    write_txt(f"正在获取第{page}页视频列表数据...")
    try:
        get_veideo(page)
    except Exception as e:
        print(f"!! 运行错误:{e}")
        write_txt(f"!! 运行错误:{e}")

参考源码示例了简单的运行日志文件log、视频下载进度百分百,仅供参考!

如有需求,可参考自行打包使用,当然写得比较渣,可能需要更多优化,就看你自己了!

经测试上T数据下载,开了四个程序的话,会出现视频下载数据中断的情况,因此需要注意数据的补采集,尤其是你想要获取完整数据的情况下!

渣渣能力有限,暂未写入多线程及开启多线程下载视频,可自行测试!

·················END·················

0 人点赞