Add Douyin video downloader with auto-cookie feature
This commit is contained in:
261
simple_download.py
Normal file
261
simple_download.py
Normal file
@@ -0,0 +1,261 @@
|
||||
import requests
|
||||
import re
|
||||
import os
|
||||
import json
|
||||
import time
|
||||
from urllib.parse import unquote
|
||||
|
||||
def download_douyin_final(share_url, save_dir=r"C:\Users\Administrator\Desktop\TestDownload"):
|
||||
"""
|
||||
Final attempt using multiple approaches
|
||||
"""
|
||||
os.makedirs(save_dir, exist_ok=True)
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
# Step 1: Get video ID
|
||||
print("Step 1: Getting video ID...")
|
||||
headers_mobile = {
|
||||
'User-Agent': 'Mozilla/5.0 (Linux; Android 10; MI 10 Build/QKQ1.190828.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/89.0.4389.72 MQQBrowser/6.2 TBS/046291 Mobile Safari/537.36 MicroMessenger/8.0.1.1841(0x2800015D) Process/app WeChat/arm64 Weixin NetType/4G Language/zh_CN ABI/arm64',
|
||||
}
|
||||
session.headers.update(headers_mobile)
|
||||
|
||||
resp = session.get(share_url, allow_redirects=True)
|
||||
final_url = resp.url
|
||||
print(f"Final URL: {final_url}")
|
||||
|
||||
match = re.search(r'/video/(\d+)', final_url)
|
||||
if not match:
|
||||
match = re.search(r'video[=/](\d{19})', final_url)
|
||||
|
||||
if not match:
|
||||
print("Cannot extract video ID")
|
||||
return False
|
||||
|
||||
video_id = match.group(1)
|
||||
print(f"Video ID: {video_id}")
|
||||
|
||||
# Step 2: Try to get video info from iesdouyin HTML
|
||||
print("\nStep 2: Parsing share page...")
|
||||
|
||||
html = resp.text
|
||||
|
||||
# Method 1: Look for ROUTER_DATA
|
||||
router_match = re.search(r'ROUTER_DATA\s*=\s*(\{.+?\})\s*;\s*</script>', html, re.DOTALL)
|
||||
if router_match:
|
||||
try:
|
||||
router_data = json.loads(router_match.group(1))
|
||||
print("Found ROUTER_DATA")
|
||||
|
||||
# Navigate the data structure
|
||||
if 'loaderData' in router_data:
|
||||
for key, val in router_data['loaderData'].items():
|
||||
if 'video' in key.lower() or 'aweme' in key.lower():
|
||||
print(f"Checking {key}...")
|
||||
result = extract_and_download(val, video_id, session, save_dir)
|
||||
if result:
|
||||
return True
|
||||
except Exception as e:
|
||||
print(f"ROUTER_DATA parse error: {e}")
|
||||
|
||||
# Method 2: Try direct video URLs in HTML
|
||||
print("\nMethod 2: Looking for video URLs in HTML...")
|
||||
|
||||
# Look for play_addr patterns
|
||||
patterns = [
|
||||
r'"playAddr"\s*:\s*\[\s*\{\s*"src"\s*:\s*"([^"]+)"',
|
||||
r'"play_addr"\s*:\s*\{\s*"url_list"\s*:\s*\[\s*"([^"]+)"',
|
||||
r'"url_list"\s*:\s*\[\s*"(https?://[^"]+\.douyinvod\.com[^"]*)"',
|
||||
r'"src"\s*:\s*"(https?://[^"]+\.douyinvod\.com[^"]*)"',
|
||||
r'(https?://v[0-9]+-[0-9a-z]+\.douyinvod\.com/[a-f0-9/]+\.mp4[^"\'\s]*)',
|
||||
]
|
||||
|
||||
for pattern in patterns:
|
||||
matches = re.findall(pattern, html)
|
||||
if matches:
|
||||
print(f"Found {len(matches)} matches with pattern: {pattern[:40]}...")
|
||||
for match in matches[:3]:
|
||||
video_url = match.replace('\\u002F', '/').replace('\\/', '/')
|
||||
print(f" URL: {video_url[:80]}...")
|
||||
|
||||
if 'douyinvod' in video_url or '.mp4' in video_url:
|
||||
result = try_download(video_url, video_id, session, save_dir)
|
||||
if result:
|
||||
return True
|
||||
|
||||
# Method 3: Try the aweme iteminfo API with proper headers
|
||||
print("\nMethod 3: Trying aweme iteminfo API...")
|
||||
|
||||
api_url = f"https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={video_id}&count=1"
|
||||
|
||||
api_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',
|
||||
'Referer': f'https://www.iesdouyin.com/share/video/{video_id}',
|
||||
'Accept': 'application/json',
|
||||
}
|
||||
|
||||
resp = session.get(api_url, headers=api_headers)
|
||||
print(f"API status: {resp.status_code}")
|
||||
|
||||
if resp.status_code == 200 and resp.text:
|
||||
try:
|
||||
data = resp.json()
|
||||
if data.get('status_code') == 0 and data.get('item_list'):
|
||||
item = data['item_list'][0]
|
||||
return extract_and_download({'item': item}, video_id, session, save_dir)
|
||||
else:
|
||||
print(f"API response: {json.dumps(data, ensure_ascii=False)[:200]}")
|
||||
except Exception as e:
|
||||
print(f"API parse error: {e}")
|
||||
print(f"Response: {resp.text[:200]}")
|
||||
|
||||
# Method 4: Try embed page
|
||||
print("\nMethod 4: Trying embed page...")
|
||||
|
||||
embed_url = f"https://www.douyin.com/embed/video/{video_id}"
|
||||
embed_headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Accept': 'text/html,application/xhtml+xml,application/xml',
|
||||
}
|
||||
|
||||
resp = session.get(embed_url, headers=embed_headers)
|
||||
if resp.status_code == 200:
|
||||
# Look for video src in embed page
|
||||
video_src = re.search(r'<video[^>]*src=["\']([^"\']+)["\']', resp.text)
|
||||
if video_src:
|
||||
video_url = video_src.group(1)
|
||||
print(f"Found video src: {video_url[:80]}...")
|
||||
return try_download(video_url, video_id, session, save_dir)
|
||||
|
||||
print("\nAll methods failed.")
|
||||
print("\nThe video likely requires authentication.")
|
||||
print("Please try the following:")
|
||||
print("1. Open Chrome/Edge and go to www.douyin.com")
|
||||
print("2. Login to your account")
|
||||
print("3. Press F12 -> Network -> Refresh page")
|
||||
print("4. Click any request -> Headers -> Copy Cookie value")
|
||||
print("5. Use that cookie with the download tool")
|
||||
|
||||
return False
|
||||
|
||||
def extract_and_download(data, video_id, session, save_dir):
|
||||
"""Extract video URL from data and download"""
|
||||
|
||||
def find_video_url(obj):
|
||||
if isinstance(obj, dict):
|
||||
# Check common video URL locations
|
||||
for key in ['play_addr', 'playAddr', 'video_url', 'download_addr', 'downloadAddr']:
|
||||
if key in obj:
|
||||
val = obj[key]
|
||||
if isinstance(val, dict):
|
||||
url_list = val.get('url_list') or val.get('urlList') or val.get('url_list', [])
|
||||
if url_list and isinstance(url_list, list):
|
||||
return url_list[0]
|
||||
elif isinstance(val, str):
|
||||
return val
|
||||
elif isinstance(val, list):
|
||||
if val and isinstance(val[0], dict) and 'src' in val[0]:
|
||||
return val[0]['src']
|
||||
|
||||
# Check for video object
|
||||
if 'video' in obj:
|
||||
result = find_video_url(obj['video'])
|
||||
if result:
|
||||
return result
|
||||
|
||||
# Recurse
|
||||
for v in obj.values():
|
||||
result = find_video_url(v)
|
||||
if result:
|
||||
return result
|
||||
|
||||
elif isinstance(obj, list):
|
||||
for item in obj:
|
||||
result = find_video_url(item)
|
||||
if result:
|
||||
return result
|
||||
|
||||
return None
|
||||
|
||||
def find_desc(obj):
|
||||
if isinstance(obj, dict):
|
||||
if 'desc' in obj and isinstance(obj['desc'], str):
|
||||
return obj['desc']
|
||||
for v in obj.values():
|
||||
result = find_desc(v)
|
||||
if result:
|
||||
return result
|
||||
elif isinstance(obj, list):
|
||||
for item in obj:
|
||||
result = find_desc(item)
|
||||
if result:
|
||||
return result
|
||||
return None
|
||||
|
||||
video_url = find_video_url(data)
|
||||
|
||||
if video_url:
|
||||
desc = find_desc(data) or "douyin_video"
|
||||
desc = re.sub(r'[\\/:*?"<>|]', '_', desc)[:50]
|
||||
|
||||
return try_download(video_url, video_id, session, save_dir, desc)
|
||||
|
||||
return False
|
||||
|
||||
def try_download(video_url, video_id, session, save_dir, desc="douyin_video"):
|
||||
"""Attempt to download video from URL"""
|
||||
|
||||
# Clean URL
|
||||
video_url = video_url.replace('\\u002F', '/').replace('\\/', '/')
|
||||
video_url = unquote(video_url)
|
||||
|
||||
# Try to get non-watermarked version
|
||||
video_url = video_url.replace('playwm', 'play')
|
||||
|
||||
print(f"\nAttempting download from: {video_url[:100]}...")
|
||||
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
|
||||
'Referer': 'https://www.douyin.com/',
|
||||
}
|
||||
|
||||
try:
|
||||
resp = session.get(video_url, headers=headers, stream=True, timeout=30)
|
||||
|
||||
print(f"Response status: {resp.status_code}")
|
||||
print(f"Content-Type: {resp.headers.get('Content-Type', 'unknown')}")
|
||||
print(f"Content-Length: {resp.headers.get('Content-Length', 'unknown')}")
|
||||
|
||||
if resp.status_code == 200:
|
||||
# Check if it's actually a video
|
||||
content_type = resp.headers.get('Content-Type', '')
|
||||
content_length = int(resp.headers.get('Content-Length', 0))
|
||||
|
||||
if 'video' in content_type or content_length > 50000:
|
||||
filename = f"{desc}_{video_id}.mp4"
|
||||
filepath = os.path.join(save_dir, filename)
|
||||
|
||||
downloaded = 0
|
||||
with open(filepath, 'wb') as f:
|
||||
for chunk in resp.iter_content(chunk_size=8192):
|
||||
if chunk:
|
||||
f.write(chunk)
|
||||
downloaded += len(chunk)
|
||||
|
||||
file_size = os.path.getsize(filepath)
|
||||
print(f"\n✓ SUCCESS!")
|
||||
print(f" File: {filepath}")
|
||||
print(f" Size: {file_size / 1024 / 1024:.2f} MB")
|
||||
return True
|
||||
else:
|
||||
print(f"Response doesn't appear to be a video file")
|
||||
print(f"First 200 bytes: {resp.content[:200]}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Download error: {e}")
|
||||
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
url = "https://v.douyin.com/R5doyi5_cTk/"
|
||||
download_douyin_final(url)
|
||||
Reference in New Issue
Block a user