The solution to this problem can either be forcing the API to extract metadata more than 20000 videos from a channel or specifying a time period during which video was uploaded. That way, the code can be run again and again for multiple time periods to extract metadata for all videos.
I tried this without success.
My solution to failing YouTube API backend is using this Python script:
It consists in faking requests done when browsing “Videos” tab on a YouTube channel.
import urllib.request, json, subprocess
from urllib.error import HTTPError
def getURL(url):
res = ""
try:
res = urllib.request.urlopen(url).read()
except HTTPError as e:
res = e.read()
return res.decode('utf-8')
def exec(cmd):
return subprocess.check_output(cmd, shell = True)
youtuberId = 'CHANNEL_ID'
videosIds = []
errorsCount = 0
def retrieveVideosFromContent(content):
global videosIds
wantedPattern = '"videoId":"'
content = content.replace('"videoId": "', wantedPattern).replace("'videoId': '", wantedPattern)
contentParts = content.split(wantedPattern)
contentPartsLen = len(contentParts)
for contentPartsIndex in range(contentPartsLen):
contentPart = contentParts[contentPartsIndex]
contentPartParts = contentPart.split('"')
videoId = contentPartParts[0]
videoIdLen = len(videoId)
if not videoId in videosIds and videoIdLen == 11:
videosIds += [videoId]
def scrape(token):
global errorsCount, data
# YOUR_KEY can be obtained by browsing a videos channel section (like https://www.youtube.com/c/BenjaminLoison/videos) while checking your "Network" tab using for instance Ctrl+Shift+E
cmd = 'curl -s \'https://www.youtube.com/youtubei/v1/browse?key=YOUR_KEY\' -H \'Content-Type: application/json\' --data-raw \'{"context":{"client":{"clientName":"WEB","clientVersion":"2.20210903.05.01"}},"continuation":"' + token + '"}\''
cmd = cmd.replace('"', '\\"').replace("\'", '"')
content = exec(cmd).decode('utf-8')
retrieveVideosFromContent(content)
data = json.loads(content)
if not 'onResponseReceivedActions' in data:
print('no token found let\'s try again')
errorsCount += 1
return scrape(token)
entry = data['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems'][-1]
if not 'continuationItemRenderer' in entry:
return ''
newToken = entry['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token']
return newToken
url="https://www.youtube.com/channel/" + youtuberId + '/videos'
content = getURL(url)
content = content.split('var ytInitialData=")[1].split(";</script>")[0]
dataFirst = json.loads(content)
retrieveVideosFromContent(content)
token = dataFirst["contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['sectionListRenderer']['contents'][0]['itemSectionRenderer']['contents'][0]['gridRenderer']['items'][-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token']
while True:
videosIdsLen = len(videosIds)
print(videosIdsLen, token)
if token == '':
break
newToken = scrape(token)
token = newToken
print(videosIdsLen, videosIds)
Pay attention to modify CHANNEL_ID
and YOUR_KEY
values. Also pay attention to have curl command available from your shell.