原网页关键内容如下:
<div class="container">
<div class="row">
<div class="col-12 col-sm-12 col-md-8 col-lg-8 py-2">
<div>
<video id="video" poster="/image/c9/2e/fe52c35ba98444b6bcd63c7c6b4ec92e.jpg1" width="100%" height="auto" preload="none" controls playsinline webkit-playsinline></video>
;(function(){
var src = "https:\/\/******\/stream\/full\/asia\/1700\/G0622.cut.ff.m3u81";
var video = document.getElementById("video");
video.volume = 0.1;
if (!Hls.isSupported()) {
video.src = src;
return;
}
var hls = {};
if (p2pml.core.HybridLoader.isSupported()) {
var engine = new p2pml.hlsjs.Engine({
segments: {
swarmId: src['\x72\x65\x70\x6c\x61\x63\x65']("\x63\x63\x2e\x73\x73\x73\x73\x70\x70\x70\x70\x2e\x63\x6f\x6d","\x62\x70\x31\x2e\x64\x6b\x6b\x6f\x6d\x6f\x2e\x63\x6f\x6dfdd"),
},
loader: {
trackerAnnounce: ["wss://tracker.openwebtorrent.com1/", "wss://tracker.btorrent.xyz1/"],
cachedSegmentExpiration: 86400000,
cachedSegmentsCount: 1000,
requiredSegmentsPriority: 3,
httpDownloadMaxPriority: 9,
httpDownloadProbability: 0.06,
httpDownloadProbabilityInterval: 1000,
httpDownloadProbabilitySkipIfNoPeers: true,
p2pDownloadMaxPriority: 50,
httpFailedSegmentTimeout: 1000,
simultaneousP2PDownloads: 20,
simultaneousHttpDownloads: 3,
httpDownloadInitialTimeout: 120000,
httpDownloadInitialTimeoutPerSegment: 5000,
httpUseRanges: true,
}
});
hls = new Hls({ loader: engine.createLoaderClass() });
p2pml.hlsjs.initHlsJsPlayer(hls);
} else {
hls = new Hls();
}
hls.loadSource(src);
hls.attachMedia(video);
})();
</div>
</div>
<div class="col-12 col-sm-12 col-md-4 col-lg-4 py-2">
<div>
<a target="_blank" href="https://tzainolxp.50gdainolxp.com/0167/475/index1.html"><img class="img-fluid" src="/static/assets/img/8484af615821.gif"></a><a target="_blank" href="https://tzbqdjwdr.50gdbqdjwdr.com/6966/9725/index1.html"><img class="img-fluid" src="/static/assets/img/632711ca1f31.gif"></a>
</div>
</div>
</div>
</div>
##可以看到$var src $是关键的内容
在使用soup.find_all("script")获取到script的list后, 使用正则即可解决
post_list = soup.find_all("script")
for post in post_list:
js_code = post.string
# 使用正则表达式提取变量名和赋值部分
pattern = r'var\s+(\w+)\s*=\s*(.*?);'
matches = re.findall(pattern, js_code, re.DOTALL)
for var in matches:
if(var[0] == "src"):
print(title,"var[1]",var[1])
注意:本文归作者所有,未经作者允许,不得转载