Files
tiktok/test_direct.py

72 lines
2.3 KiB
Python

import requests
import re
import json
import os
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36',
'Referer': 'https://www.douyin.com/',
}
def get_video_info(share_url):
session = requests.Session()
session.headers.update(headers)
# Get redirect URL
resp = session.get(share_url, allow_redirects=False)
location = resp.headers.get('Location', '')
print(f"Redirect to: {location}")
if location:
# Extract video ID from URL
match = re.search(r'/video/(\d+)', location)
if match:
video_id = match.group(1)
print(f"Video ID: {video_id}")
return video_id
return None
def get_douyin_video_direct(share_url):
session = requests.Session()
session.headers.update({
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1'
})
# Follow redirects to get the actual video page
resp = session.get(share_url, allow_redirects=True)
print(f"Final URL: {resp.url}")
# Try to find video URL in page source
html = resp.text
# Look for render_data
match = re.search(r'ROUTER_DATA\s*=\s*(\{.+?\})\s*</script>', html, re.DOTALL)
if match:
try:
data = json.loads(match.group(1))
print("Found router data!")
print(json.dumps(data, indent=2, ensure_ascii=False)[:2000])
except:
pass
# Try another pattern
match = re.search(r'_ROUTER_DATA\s*=\s*({.*?})\s*;', html, re.DOTALL)
if match:
try:
data = json.loads(match.group(1))
print("Found _ROUTER_DATA!")
# Navigate to video info
if 'loaderData' in data:
for key, value in data['loaderData'].items():
if 'aweme' in key.lower():
print(f"Found aweme data in {key}")
print(json.dumps(value, indent=2, ensure_ascii=False)[:3000])
except Exception as e:
print(f"Parse error: {e}")
# Test
url = "https://v.douyin.com/R5doyi5_cTk/"
video_id = get_video_info(url)
get_douyin_video_direct(url)