import requests import re import json import os headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36', 'Referer': 'https://www.douyin.com/', } def get_video_info(share_url): session = requests.Session() session.headers.update(headers) # Get redirect URL resp = session.get(share_url, allow_redirects=False) location = resp.headers.get('Location', '') print(f"Redirect to: {location}") if location: # Extract video ID from URL match = re.search(r'/video/(\d+)', location) if match: video_id = match.group(1) print(f"Video ID: {video_id}") return video_id return None def get_douyin_video_direct(share_url): session = requests.Session() session.headers.update({ 'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1' }) # Follow redirects to get the actual video page resp = session.get(share_url, allow_redirects=True) print(f"Final URL: {resp.url}") # Try to find video URL in page source html = resp.text # Look for render_data match = re.search(r'ROUTER_DATA\s*=\s*(\{.+?\})\s*', html, re.DOTALL) if match: try: data = json.loads(match.group(1)) print("Found router data!") print(json.dumps(data, indent=2, ensure_ascii=False)[:2000]) except: pass # Try another pattern match = re.search(r'_ROUTER_DATA\s*=\s*({.*?})\s*;', html, re.DOTALL) if match: try: data = json.loads(match.group(1)) print("Found _ROUTER_DATA!") # Navigate to video info if 'loaderData' in data: for key, value in data['loaderData'].items(): if 'aweme' in key.lower(): print(f"Found aweme data in {key}") print(json.dumps(value, indent=2, ensure_ascii=False)[:3000]) except Exception as e: print(f"Parse error: {e}") # Test url = "https://v.douyin.com/R5doyi5_cTk/" video_id = get_video_info(url) get_douyin_video_direct(url)