当前位置:首页 >> 脚本专栏

Python解析m3u8拼接下载mp4视频文件的示例代码

一、关于m3u8:

m3u8是苹果公司推出一种视频播放标准,是m3u的一种,不过编码方式是utf-8,是一种文件检索格式,将视频切割成一小段一小段的ts格式的视频文件,然后存在服务器中(现在为了减少I/o访问次数,一般存在服务器的内存中),通过m3u8解析出来路径,然后去请求。

示例:

#EXTM3U
#EXT-X-TARGETDURATION:10
#EXTINF:9,
http://data.video.iqiyi.com/videos/vts/20210301/69/b8/73ad4ef04fde4586ef2799ecd67241ce.ts"text-align: center">Python解析m3u8拼接下载mp4视频文件的示例代码

我们想要的mp4文件就是一个个ts文件按照顺序拼接成的,废话不多说直接上代码。

# -*- coding:utf-8 -*- 
"""
Author:SPIDERMAN
Time: 2021/3/1 
Software: PyCharm
"""
import logging
import os
from glob import iglob
import requests
import m3u8
from urllib.parse import urljoin
from concurrent.futures import ThreadPoolExecutor
from natsort import natsorted

class M3u8Download:
 def __init__(self,m3u8_url):
  self.m3u8_url = m3u8_url
  self.headers = {
   'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.99 Safari/537.36'
  }
  self.threadpool = ThreadPoolExecutor(max_workers=10)
  self.file_name = 'weibo.mp4'
  logging.basicConfig(format='[%(asctime)s][*%(levelname)s]:%(message)s',
       level=logging.INFO)
 def get_ts_url(self,m3u8_url):
  """
  解析ts_url
  :param m3u8_url:
  :return:
  """
  m3u8_obj = m3u8.load(m3u8_url)
  base_uri = m3u8_obj.base_uri
  logging.info('[*]get_base_uri'+base_uri)
  for seg in m3u8_obj.segments:
   yield urljoin(base_uri, seg.uri)

 def download__ts(self, urlinfo):
  """
  下载ts文件
  :param urlinfo:
  :return:
  """
  url, ts_name = urlinfo
  res = requests.get(url, headers=self.headers)
  with open(ts_name, 'wb') as fp:
   fp.write(res.content)
  logging.info('[*download]'+ts_name)


 def download_all_ts(self):
  """
  下载所有函数
  :return:
  """
  ts_urls = self.get_ts_url(self.m3u8_url)
  logging.info('[*download]download:'+self.m3u8_url)
  for index, ts_url in enumerate(ts_urls):
   print(ts_url)
   self.threadpool.submit(self.download__ts, [ts_url, f'{index}.ts'])
  self.threadpool.shutdown()

 def remove_ts(self,ts_path):
  """
  删除ts文件
  :param ts_path:
  :return:
  """
  for ts in iglob(ts_path):
   os.remove(ts)
  logging.info('[*remove]remove all *.ts')

 def run(self):
  self.download_all_ts()
  ts_path = '*.ts'
  all_ts = iglob(ts_path)
  with open(self.file_name, 'wb') as fn:
   #根据ts排序
   for ts in natsorted(all_ts):
    #读ts写mp4
    with open(ts, 'rb') as ft:
     scline = ft.read()
     fn.write(scline)
  self.remove_ts(ts_path)

if __name__ == '__main__':
 m3u8Download = M3u8Download('https://cache.m.iqiyi.com/mus/1618469868576801/a34fec3fc63db2c1bb4c15f53cd513e1/afbe8fd3d73448c9/0/20210301/69/b8/670962cfd6b9166c87a21728808fe6a2.m3u8"公众号:Java技术迷")
console.log("wx:spiderskill")