import requests import re import os import json import time from urllib.parse import unquote def download_douyin_final(share_url, save_dir=r"C:\Users\Administrator\Desktop\TestDownload"): """ Final attempt using multiple approaches """ os.makedirs(save_dir, exist_ok=True) session = requests.Session() # Step 1: Get video ID print("Step 1: Getting video ID...") headers_mobile = { 'User-Agent': 'Mozilla/5.0 (Linux; Android 10; MI 10 Build/QKQ1.190828.002; wv) AppleWebKit/537.36 (KHTML, like Gecko) Version/4.0 Chrome/89.0.4389.72 MQQBrowser/6.2 TBS/046291 Mobile Safari/537.36 MicroMessenger/8.0.1.1841(0x2800015D) Process/app WeChat/arm64 Weixin NetType/4G Language/zh_CN ABI/arm64', } session.headers.update(headers_mobile) resp = session.get(share_url, allow_redirects=True) final_url = resp.url print(f"Final URL: {final_url}") match = re.search(r'/video/(\d+)', final_url) if not match: match = re.search(r'video[=/](\d{19})', final_url) if not match: print("Cannot extract video ID") return False video_id = match.group(1) print(f"Video ID: {video_id}") # Step 2: Try to get video info from iesdouyin HTML print("\nStep 2: Parsing share page...") html = resp.text # Method 1: Look for ROUTER_DATA router_match = re.search(r'ROUTER_DATA\s*=\s*(\{.+?\})\s*;\s*', html, re.DOTALL) if router_match: try: router_data = json.loads(router_match.group(1)) print("Found ROUTER_DATA") # Navigate the data structure if 'loaderData' in router_data: for key, val in router_data['loaderData'].items(): if 'video' in key.lower() or 'aweme' in key.lower(): print(f"Checking {key}...") result = extract_and_download(val, video_id, session, save_dir) if result: return True except Exception as e: print(f"ROUTER_DATA parse error: {e}") # Method 2: Try direct video URLs in HTML print("\nMethod 2: Looking for video URLs in HTML...") # Look for play_addr patterns patterns = [ r'"playAddr"\s*:\s*\[\s*\{\s*"src"\s*:\s*"([^"]+)"', r'"play_addr"\s*:\s*\{\s*"url_list"\s*:\s*\[\s*"([^"]+)"', r'"url_list"\s*:\s*\[\s*"(https?://[^"]+\.douyinvod\.com[^"]*)"', r'"src"\s*:\s*"(https?://[^"]+\.douyinvod\.com[^"]*)"', r'(https?://v[0-9]+-[0-9a-z]+\.douyinvod\.com/[a-f0-9/]+\.mp4[^"\'\s]*)', ] for pattern in patterns: matches = re.findall(pattern, html) if matches: print(f"Found {len(matches)} matches with pattern: {pattern[:40]}...") for match in matches[:3]: video_url = match.replace('\\u002F', '/').replace('\\/', '/') print(f" URL: {video_url[:80]}...") if 'douyinvod' in video_url or '.mp4' in video_url: result = try_download(video_url, video_id, session, save_dir) if result: return True # Method 3: Try the aweme iteminfo API with proper headers print("\nMethod 3: Trying aweme iteminfo API...") api_url = f"https://www.iesdouyin.com/web/api/v2/aweme/iteminfo/?item_ids={video_id}&count=1" api_headers = { 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1', 'Referer': f'https://www.iesdouyin.com/share/video/{video_id}', 'Accept': 'application/json', } resp = session.get(api_url, headers=api_headers) print(f"API status: {resp.status_code}") if resp.status_code == 200 and resp.text: try: data = resp.json() if data.get('status_code') == 0 and data.get('item_list'): item = data['item_list'][0] return extract_and_download({'item': item}, video_id, session, save_dir) else: print(f"API response: {json.dumps(data, ensure_ascii=False)[:200]}") except Exception as e: print(f"API parse error: {e}") print(f"Response: {resp.text[:200]}") # Method 4: Try embed page print("\nMethod 4: Trying embed page...") embed_url = f"https://www.douyin.com/embed/video/{video_id}" embed_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Accept': 'text/html,application/xhtml+xml,application/xml', } resp = session.get(embed_url, headers=embed_headers) if resp.status_code == 200: # Look for video src in embed page video_src = re.search(r']*src=["\']([^"\']+)["\']', resp.text) if video_src: video_url = video_src.group(1) print(f"Found video src: {video_url[:80]}...") return try_download(video_url, video_id, session, save_dir) print("\nAll methods failed.") print("\nThe video likely requires authentication.") print("Please try the following:") print("1. Open Chrome/Edge and go to www.douyin.com") print("2. Login to your account") print("3. Press F12 -> Network -> Refresh page") print("4. Click any request -> Headers -> Copy Cookie value") print("5. Use that cookie with the download tool") return False def extract_and_download(data, video_id, session, save_dir): """Extract video URL from data and download""" def find_video_url(obj): if isinstance(obj, dict): # Check common video URL locations for key in ['play_addr', 'playAddr', 'video_url', 'download_addr', 'downloadAddr']: if key in obj: val = obj[key] if isinstance(val, dict): url_list = val.get('url_list') or val.get('urlList') or val.get('url_list', []) if url_list and isinstance(url_list, list): return url_list[0] elif isinstance(val, str): return val elif isinstance(val, list): if val and isinstance(val[0], dict) and 'src' in val[0]: return val[0]['src'] # Check for video object if 'video' in obj: result = find_video_url(obj['video']) if result: return result # Recurse for v in obj.values(): result = find_video_url(v) if result: return result elif isinstance(obj, list): for item in obj: result = find_video_url(item) if result: return result return None def find_desc(obj): if isinstance(obj, dict): if 'desc' in obj and isinstance(obj['desc'], str): return obj['desc'] for v in obj.values(): result = find_desc(v) if result: return result elif isinstance(obj, list): for item in obj: result = find_desc(item) if result: return result return None video_url = find_video_url(data) if video_url: desc = find_desc(data) or "douyin_video" desc = re.sub(r'[\\/:*?"<>|]', '_', desc)[:50] return try_download(video_url, video_id, session, save_dir, desc) return False def try_download(video_url, video_id, session, save_dir, desc="douyin_video"): """Attempt to download video from URL""" # Clean URL video_url = video_url.replace('\\u002F', '/').replace('\\/', '/') video_url = unquote(video_url) # Try to get non-watermarked version video_url = video_url.replace('playwm', 'play') print(f"\nAttempting download from: {video_url[:100]}...") headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Referer': 'https://www.douyin.com/', } try: resp = session.get(video_url, headers=headers, stream=True, timeout=30) print(f"Response status: {resp.status_code}") print(f"Content-Type: {resp.headers.get('Content-Type', 'unknown')}") print(f"Content-Length: {resp.headers.get('Content-Length', 'unknown')}") if resp.status_code == 200: # Check if it's actually a video content_type = resp.headers.get('Content-Type', '') content_length = int(resp.headers.get('Content-Length', 0)) if 'video' in content_type or content_length > 50000: filename = f"{desc}_{video_id}.mp4" filepath = os.path.join(save_dir, filename) downloaded = 0 with open(filepath, 'wb') as f: for chunk in resp.iter_content(chunk_size=8192): if chunk: f.write(chunk) downloaded += len(chunk) file_size = os.path.getsize(filepath) print(f"\n✓ SUCCESS!") print(f" File: {filepath}") print(f" Size: {file_size / 1024 / 1024:.2f} MB") return True else: print(f"Response doesn't appear to be a video file") print(f"First 200 bytes: {resp.content[:200]}") except Exception as e: print(f"Download error: {e}") return False if __name__ == "__main__": url = "https://v.douyin.com/R5doyi5_cTk/" download_douyin_final(url)