Initial commit with all my changes

This commit is contained in:
Eir Sunny 2024-12-18 17:54:43 -06:00
parent fde9f3272a
commit cbd278e9cf
40 changed files with 9757 additions and 203 deletions

View File

@ -1,13 +0,0 @@
# Editor crud files
*~
\#*#
.vscode
.idea
.git
# Artifacts
__pycache__
# Personal
/generic-updater
/configuration.py

3
.gitignore vendored
View File

@ -19,3 +19,6 @@ venv
env.bak
venv.bak
newleaf-venv
# protodec
tools/protodec

View File

@ -1,13 +0,0 @@
FROM python:3.12-alpine
WORKDIR /workdir
COPY ./requirements.txt ./requirements.txt
RUN pip install -r requirements.txt
COPY . .
EXPOSE 3000
CMD python index.py

View File

@ -1,11 +1,30 @@
# NewLeaf
## Navigation
My fork of [NewLeaf][newleaf] by [Cadence Ember](https://cadence.moe/).
## Changes
- Can extract playlist data, including continuations
- Better channel data extract with tabs, playlists, and shorts
- Better comment data extracting, including continuations
- Short view count matches Invidious
## Installation
Build [protodec](https://github.com/iv-org/protodec) to a binary and place it in `tools/protodec`.
 
Yes I know `tools/fetch-with-node` is extremely cursed. At some point I want to rewrite all of this...
***
## Navigation (NewLeaf)
- [Project hub][hub]
- [Announcements][announce]
- [CloudTube repo][cloudtube]
- NewLeaf repo
- [NewLeaf repo][newleaf]
- [Documentation repo][docs]
- [Mailing list][list] for development and discussion
- [Todo tracker][todo] for listing problems and feature requests

View File

@ -1,9 +1,12 @@
import cherrypy
import dateutil.parser
import requests
import re, datetime, time
import xml.etree.ElementTree as ET
from tools.converters import *
from tools.extractors import extract_yt_initial_data, eu_consent_cookie
from tools.invidious_ported import browse, protodec
from extractors.playlist import *
from threading import Lock
from cachetools import TTLCache
@ -12,17 +15,20 @@ channel_cache_lock = Lock()
channel_latest_cache = TTLCache(maxsize=500, ttl=300)
channel_latest_cache_lock = Lock()
def extract_channel(ucid, second__path="user"):
def extract_channel_new(ucid, second__path="user"):
cache_key = (ucid, second__path)
with channel_cache_lock:
if cache_key in channel_cache:
return channel_cache[cache_key]
channel_type = "channel" if len(ucid) == 24 and ucid[:2] == "UC" else second__path
r = requests.get("https://www.youtube.com/{}/{}/videos?hl=en".format(channel_type, ucid), cookies=eu_consent_cookie())
r.raise_for_status()
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))
yt_initial_data = browse(browseId=ucid)
if yt_initial_data is None:
return {
"error": alert_text,
"identifier": "NOT_FOUND"
}
for alert in yt_initial_data.get("alerts", []):
alert_text = combine_runs(alert["alertRenderer"]["text"])
@ -42,88 +48,352 @@ def extract_channel(ucid, second__path="user"):
"identifier": "UNKNOWN"
}
header = yt_initial_data["header"]["c4TabbedHeaderRenderer"] if "c4TabbedHeaderRenderer" in yt_initial_data["header"] else {}
channel_metadata = yt_initial_data["metadata"]["channelMetadataRenderer"]
# Redirect
browse_redirect = try_dig(yt_initial_data, "onResponseReceivedActions", 0, "navigateAction", "endpoint", "browseEndpoint")
# I dont know what to do with this...
if header:
author = header["title"]
author_id = header["channelId"]
author_url = header["navigationEndpoint"]["commandMetadata"]["webCommandMetadata"]["url"]
auto_generated = False
if not "metadata" in yt_initial_data:
auto_generated = True
elif try_dig(yt_initial_data, "metadata", "channelMetadataRenderer", "musicArtistName") is not None:
auto_generated = True
tags = []
tab_names = []
total_views = 0
banner = None
joined = None
video_count = None
video_count_text = None
age_gate_renderer = try_dig(yt_initial_data, "contents", "twoColumnBrowseResultsRenderer", "tabs", 0, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "channelAgeGateRenderer")
if age_gate_renderer is not None:
author = combine_runs(age_gate_renderer["channelTitle"])
newUcid = try_dig(yt_initial_data, "responseContext", "serviceTrackingParams", 0, "params", 0, "value")
if newUcid is not None:
ucid = newUcid
author_url = "https://www.youtube.com/channel/#{}".format(ucid)
author_thumbnail = try_dig(age_gate_renderer, "avatar", "thumbnails", [0], "url")
banners = []
banner = None
descriptionNode = None
is_family_friendly = None
is_age_gated = True
tab_names = ["videos", "shorts", "streams"]
auto_generated = False
else:
author = channel_metadata["title"]
author_id = channel_metadata["externalId"]
author_url = channel_metadata["channelUrl"]
banners = try_dig(yt_initial_data, "header", "pageHeaderRenderer", "content", "pageHeaderViewModel", "banner", "imageBannerViewModel", "image", "sources")
if banners is not None:
banner = try_dig(banners, len(banners) - 1, "url")
author = try_dig(yt_initial_data, "metadata", "channelMetadataRenderer", "title")
author_url = try_dig(yt_initial_data, "metadata", "channelMetadataRenderer", "channelUrl")
ucid = try_dig(yt_initial_data, "metadata", "channelMetadataRenderer", "externalId")
descriptionNode = try_dig(yt_initial_data, "metadata", "channelMetadataRenderer", "description")
tags = try_dig(yt_initial_data, "microformat", "microformatDataRenderer", "tags")
subscriber_count = combine_runs(header["subscriberCountText"]) if "subscriberCountText" in header else "Unknown subscribers"
description = channel_metadata["description"]
allowed_regions = channel_metadata["availableCountryCodes"]
is_family_friendly = try_dig(yt_initial_data, "microformat", "microformatDataRenderer", "familySafe")
tabs_json = try_dig(yt_initial_data, "contents", "twoColumnBrowseResultsRenderer", "tabs")
if tabs_json is not None:
tab_names = []
for tab in tabs_json:
name = try_dig(tab, "tabRenderer", "title")
if name is not None:
name = name.lower()
if name == "live":
name = "streams"
elif name == "posts":
name = "community"
tab_names.append(name)
# Get selected tab
selected_tab = None
for tab in tabs_json:
is_selected = try_dig(tab, "tabRenderer", "selected") == True
if is_selected:
selected_tab = try_dig(tab, "tabRenderer")
break
about_tab = selected_tab
author_thumbnail = try_dig(yt_initial_data, "metadata", "channelMetadataRenderer", "avatar")
if author_thumbnail is None:
author_thumbnail = try_dig(yt_initial_data, "header", "c4TabbedHeaderRenderer", "avatar")
if author_thumbnail is not None:
author_thumbnails = generate_full_author_thumbnails(author_thumbnail["thumbnails"])
author_thumbnail = try_dig(author_thumbnail, "thumbnails", 0, "url")
allowed_regions = yt_initial_data["microformat"]["microformatDataRenderer"]["availableCountries"]
description = descriptionNode #todo?
sub_count = 0
sub_count_text = "0"
if auto_generated:
sub_count_text = None
else:
metadata_rows = try_dig(yt_initial_data, "header", "pageHeaderRenderer", "content", "pageHeaderViewModel", "metadata", "contentMetadataViewModel", "metadataRows")
if metadata_rows is not None:
for row in metadata_rows:
metadata_parts = try_dig(row, "metadataParts")
for part in metadata_parts:
if "subscribers" in part["text"]["content"]:
count = part["text"]["content"].split(" ")[0]
sub_count = uncompress_counter(count)
sub_count_text = count + " subscribers"
break
# Get some extra data using the continuation token
continuation = try_dig(yt_initial_data, "header", "pageHeaderRenderer", "content", "pageHeaderViewModel", "description", "descriptionPreviewViewModel", "rendererContext", "commandContext", "onTap", "innertubeCommand", "showEngagementPanelEndpoint", "engagementPanel", "engagementPanelSectionListRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token")
if continuation is not None:
yt_extra_data = browse(continuation=continuation)
extra_data = try_dig(yt_extra_data, "onResponseReceivedEndpoints", 0, "appendContinuationItemsAction", "continuationItems", 0, "aboutChannelRenderer", "metadata", "aboutChannelViewModel")
if extra_data is not None:
if not auto_generated:
sub_count_text = sub_count_text or extra_data["subscriberCountText"]
sub_count = sub_count or uncompress_counter(sub_count_text.split(" ")[0])
total_views = total_views or int(extra_data["viewCountText"].replace(",", "").split(" ")[0])
joined = joined or time.mktime(datetime.datetime.strptime(extra_data["joinedDateText"]["content"], "Joined %b %d, %Y").timetuple())
video_count_text = extra_data["videoCountText"]
video_count = video_count or uncompress_counter(video_count_text.split("videos")[0])
author_banners = []
if "banner" in header:
author_banners = header["banner"]["thumbnails"]
for t in author_banners:
t["url"] = normalise_url_protocol(t["url"])
if banner is not None:
for q in [{"width": 2560, "height": 424}, {"width": 2120, "height": 351}, {"width": 1060, "height": 175}]:
author_banners.append({
"url": banner.replace("=w1060-", "=w{}-".format(q["width"]), 1),
"width": q["width"],
"height": q["height"]
})
author_banners.append({
"url": banner.split("=w1060-")[0],
"width": 512,
"height": 288
})
author_thumbnails = []
avatar = header.get("avatar") or channel_metadata.get("avatar")
if avatar:
author_thumbnails = generate_full_author_thumbnails(avatar["thumbnails"])
channel = {
"author": author,
"authorId": ucid,
"authorUrl": author_url,
"authorBanners": author_banners,
"banner": banner,
"authorThumbnails": author_thumbnails,
"thumbnail": author_thumbnail,
"subCount": sub_count,
"second__subCountText": sub_count_text,
"totalViews": total_views,
"joined": joined,
"paid": None,
"autoGenerated": auto_generated,
"ageGated": age_gate_renderer is not None,
"isFamilyFriendly": is_family_friendly,
"description": description,
"descriptionHtml": add_html_links(escape_html_textcontent(description)) if description is not None else None,
"allowedRegions": allowed_regions,
"tabs": tab_names,
"tags": tags,
#"latestVideos": videos,
"videoCount": video_count,
"videoCountText": video_count_text,
"relatedChannels": []
}
latest_videos = []
tabs = yt_initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
try:
videos_tab = next(tab["tabRenderer"] for tab in tabs if tab.get("tabRenderer", {}).get("title") == "Videos")
tab_parts = videos_tab["content"]
except StopIteration:
tab_parts = {}
channel["latestVideos"] = extract_channel_latest(ucid, second__path, channel=channel)#["videos"]
# check that the channel actually has videos - this may be replaced
# with messageRenderer.text.simpleText == "This channel has no videos."
if "richGridRenderer" in tab_parts:
videos = (
v["richItemRenderer"]["content"]["videoRenderer"] for v in tab_parts["richGridRenderer"]["contents"] if "richItemRenderer" in v
)
for v in videos:
with channel_cache_lock:
channel_cache[cache_key] = channel
return channel
def produce_channel_content_continuation(ucid, content_type, page=1, sort_by="newest", targetId=None):
# object_inner_2 = {
# "2:0:embedded": {
# "1:0:varint": 0
# },
# "5:varint": 50,
# "6:varint": 1,
# "7:varint": page * 30,
# "9:varint": 1,
# "10:varint": 0
# }
#object_inner_2_encoded = protodec(json.dumps(object_inner_2, separators=(',', ':')), "-e")
#object_inner_2_encoded = object_inner_2_encoded.split("=")[0] + "%3D"
content_type_numerical = 15
if content_type == "streams":
content_type_numerical = 14
elif content_type == "shorts":
content_type_numerical = 10
sort_by_numerical = 1
if sort_by == "popular":
sort_by_numerical = 2
elif sort_by == "oldest":
sort_by_numerical = 4
object = {
"80226972:embedded": {
"2:string": ucid,
"3:base64": {
"110:embedded": {
"3:embedded": {
"{}:embedded".format(content_type_numerical): {
"2:string": "\n${}".format(targetId),
"4:varint": sort_by_numerical == 2 and 2 or 5
}
}
}
}
}
}
continuation = protodec(json.dumps(object, separators=(',', ':')), "-e")
#continuation = continuation.split("=")[0]
return continuation
def extract_videos_from_initial_data(yt_initial_data, channel, content_type):
content = try_dig(yt_initial_data, "contents")
videoItems = None
# "content"
if content is not None:
tabs = try_dig(content, "twoColumnBrowseResultsRenderer", "tabs")
active_tab = tabs[0]
for tab in tabs:
if "selected" in tab["tabRenderer"] and tab["tabRenderer"]["selected"]:
active_tab = tab
break
if content_type == "playlists":
videoItems = try_dig(active_tab, "tabRenderer", "content", "sectionListRenderer", "contents", 0, "itemSectionRenderer", "contents", 0, "gridRenderer", "items")
else:
videoItems = try_dig(active_tab, "tabRenderer", "content", "richGridRenderer", "contents")
# "response"
if content is None:
content = try_dig(yt_initial_data, "response")
# I DONT KNOW HOW TO GET THE CONTINUATION TOKEN HERE WAHHHH
if content is not None:
with open("PLEASE LOOK.txt", "w") as f:
f.write(content)
# "onResponseReceivedActions"
if content is None:
content = try_dig(yt_initial_data, "onResponseReceivedActions")
if content is not None:
content = content[-1]
videoItems = try_dig(content, "reloadContinuationItemsCommand", "continuationItems")
if videoItems is None:
videoItems = try_dig(content, "appendContinuationItemsAction", "continuationItems")
with open("meow.txt", "w") as f:
f.write(json.dumps(videoItems))
continuation = try_dig(videoItems[-1], "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token")
# Parse videos
videosToParse = videoItems
videos = []
for v in videosToParse:
if "continuationItemRenderer" in v:
continue
base = try_dig(v, "videoRenderer")
if content_type == "playlists":
base = try_dig(v, "lockupViewModel")
if base is None:
base = try_dig(v, "gridPlaylistRenderer")
elif base is None:
base = try_dig(v, "richItemRenderer", "content", content_type == "shorts" and "shortsLockupViewModel" or "videoRenderer")
if content_type != "playlists" or not "lockupViewModel" in v:
is_upcoming = False
live = False
length_text = None
length_seconds = None
if "thumbnailOverlays" in base:
for o in base["thumbnailOverlays"]:
if "thumbnailOverlayTimeStatusRenderer" in o:
length_text = combine_runs(o["thumbnailOverlayTimeStatusRenderer"]["text"])
length_text_style = o["thumbnailOverlayTimeStatusRenderer"]["style"]
if length_text_style == "DEFAULT":
length_seconds = length_text_to_seconds(length_text)
elif length_text_style == "LIVE":
live = True
elif length_text_style == "UPCOMING":
is_upcoming = True
if length_text is None and "lengthText" in base:
length_text = combine_runs(base["lengthText"])
length_seconds = length_text_to_seconds(length_text)
# Shorts
if content_type == "shorts":
title = try_dig(base, "overlayMetadata", "primaryText", "content")
video_id = try_dig(base, "onTap", "innertubeCommand", "reelWatchEndpoint", "videoId")
description = None
description_html = None
view_text = try_dig(base, "overlayMetadata", "secondaryText", "content")
view_count = uncompress_counter(view_text.split(" views")[0])
view_text_short = view_text
published = None
published_text = None
live = False
is_upcoming = False
length_text = "UNKNOWN"
length_seconds = -1
for o in v["thumbnailOverlays"]:
if "thumbnailOverlayTimeStatusRenderer" in o:
length_text = combine_runs(o["thumbnailOverlayTimeStatusRenderer"]["text"])
length_text_style = o["thumbnailOverlayTimeStatusRenderer"]["style"]
if length_text_style == "DEFAULT":
length_seconds = length_text_to_seconds(length_text)
elif length_text_style == "LIVE":
live = True
elif length_text_style == "UPCOMING":
is_upcoming = True
published = 0
published_text = "Live now"
premiere_timestamp = None
if "publishedTimeText" in v:
published_text = v["publishedTimeText"]["simpleText"]
published = past_text_to_time(published_text)
if "upcomingEventData" in v:
premiere_timestamp = v["upcomingEventData"]["startTime"]
published_text = time_to_past_text(int(premiere_timestamp))
# Playlists
elif content_type == "playlists":
if "lockupViewModel" in v:
metadata = try_dig(base, "metadata", "lockupMetadataViewModel")
title = try_dig(metadata, "title", "content")
playlist_id = try_dig(base, "contentId")
playlist_thumbnail = try_dig(base, "contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "image", "sources", 0, "url")
video_count_text = try_dig(base, "contentImage", "collectionThumbnailViewModel", "primaryThumbnail", "thumbnailViewModel", "overlays", 0, "thumbnailOverlayBadgeViewModel", "thumbnailBadges", 0, "thumbnailBadgeViewModel", "text")
if video_count_text is not None:
video_count = int(video_count_text.split(" ")[0])
else:
video_count = None
updated_text = try_dig(metadata, "metadata", "contentMetadataViewModel", "metadataRows", 0, "metadataParts", 0, "text", "content")
if updated_text is not None and updated_text.find("dated ") > -1:
updated = past_text_to_time(updated_text.split("dated ")[1])
else:
updated = None
updated_text = None
else:
title = try_dig(base, "title", combine=True)
playlist_id = try_dig(base, "playlistId")
playlist_thumbnail = try_dig(base, "thumbnail", "thumbnails", -1, "url")
video_count_text = try_dig(base, "videoCountText", combine=True)
if video_count_text is not None:
video_count = int(video_count_text.split(" ")[0])
else:
video_count = None
updated_text = None
updated = None
# Normal
else:
title = combine_runs(base["title"])
video_id = base["videoId"]
description = combine_runs(base["descriptionSnippet"])
description_html = add_html_links(escape_html_textcontent(combine_runs(base["descriptionSnippet"])))
view_text = combine_runs(base["viewCountText"])
view_count = uncompress_counter(view_text.split(" ")[0])
view_text_short = combine_runs(base["shortViewCountText"]) if "shortViewCountText" in base else view_text
published_text = combine_runs(base["publishedTimeText"])
published = past_text_to_time(published_text)
view_count_text = combine_runs(v["viewCountText"]) if "viewCountText" in v else None
view_count_text_short = combine_runs(v["shortViewCountText"]) if "shortViewCountText" in v else None
latest_videos.append({
if content_type != "playlists":
videos.append({
"type": "video",
"title": combine_runs(v["title"]),
"videoId": v["videoId"],
"author": author,
"authorId": author_id,
"authorUrl": author_url,
"videoThumbnails": generate_video_thumbnails(v["videoId"]),
"description": "",
"descriptionHtml": "",
"viewCount": view_count_text_to_number(view_count_text),
"second__viewCountText": view_count_text,
"second__viewCountTextShort": view_count_text_short,
"title": title,
"videoId": video_id,
"author": channel["author"],
"authorId": channel["authorId"],
"authorUrl": channel["authorUrl"],
"videoThumbnails": generate_video_thumbnails(video_id),
"description": description,
"descriptionHtml": description_html,
"viewCount": view_count,
"second__viewCountText": view_text,
"second__viewCountTextShort": view_text_short,
"published": published,
"publishedText": published_text,
"lengthSeconds": length_seconds,
@ -131,43 +401,99 @@ def extract_channel(ucid, second__path="user"):
"liveNow": live,
"paid": None,
"premium": None,
"isUpcoming": is_upcoming,
"premiereTimestamp": premiere_timestamp
"isUpcoming": is_upcoming
})
else:
videos.append({
"type": "playlist",
"title": title,
"playlistId": playlist_id,
"playlistThumbnail": playlist_thumbnail,
"author": channel["author"],
"authorId": channel["authorId"],
"authorUrl": channel["authorUrl"],
"videoCount": video_count,
"videoCountText": video_count_text,
"second__videoCountText": video_count_text,
"videos": [],
"updatedText": updated_text,
"second__updatedText": updated_text,
"updated": updated
})
channel = {
"author": author,
"authorId": author_id,
"authorUrl": author_url,
"authorBanners": author_banners,
"authorThumbnails": author_thumbnails,
"subCount": uncompress_counter(subscriber_count.split(" ")[0]),
"second__subCountText": subscriber_count,
"totalViews": None,
"joined": None,
"paid": None,
"autoGenerated": None,
"isFamilyFriendly": None,
"description": description,
"descriptionHtml": add_html_links(escape_html_textcontent(description)),
"allowedRegions": allowed_regions,
"latestVideos": latest_videos,
"relatedChannels": []
return {
(content_type == "playlists" and "playlists" or "videos"): videos,
"continuation": continuation
}
with channel_cache_lock:
channel_cache[cache_key] = channel
return channel
def extract_channel_videos(ucid, second__path="channel"):
channel = extract_channel(ucid, second__path)
# UULF - videos
# UUSH - shorts
# UULV - streams
def extract_channel_videos(ucid, content_type, second__path="channel", **kwargs):
channel = None
if "channel" in kwargs:
channel = kwargs["channel"]
else:
channel = extract_channel_new(ucid, second__path)
if "error" in channel:
return channel
else:
return channel["latestVideos"]
# Reads the channel like a playlist
if channel["ageGated"]:
return extract_channel_videos_as_playlist(ucid, content_type, second__path, kwargs)
# Uses youtube's strange content sorting stuff based on channel content sorting ??? stuff i dunno
else:
continuation = None
params = None
def extract_channel_latest(ucid):
# Videos
if content_type == "videos":
params = "EgZ2aWRlb3PyBgQKAjoA"
# Shorts
elif content_type == "shorts":
params = "8gYFCgOaAQA%3D"
# Streams
elif content_type == "streams":
params = "EgdzdHJlYW1z8gYECgJ6AA%3D%3D"
if "sort_by" in kwargs and kwargs["sort_by"] != "newest":
yt_initial_data = browse(browseId=ucid, params=params)
tabs = try_dig(yt_initial_data, "contents", "twoColumnBrowseResultsRenderer", "tabs")
active_tab = tabs[0]
for tab in tabs:
if "selected" in tab["tabRenderer"]:
active_tab = tab
break
target_id = try_dig(active_tab, "tabRenderer", "content", "richGridRenderer", "targetId")
continuation = produce_channel_content_continuation(channel["authorId"], content_type, 1, kwargs["sort_by"], target_id)
params = None
if params is not None:
yt_initial_data = browse(browseId=ucid, params=params)
else:
yt_initial_data = browse(continuation=continuation)
return extract_videos_from_initial_data(yt_initial_data, channel, content_type)
def extract_channel_videos_as_playlist(ucid, content_type, second__path="channel", **kwargs):
channel = extract_channel_new(ucid, second__path)
if "error" in channel:
return channel
else:
plid = channel["authorId"].replace("UC", {"videos": "UULF", "shorts": "UUSH", "streams": "UULV"}[content_type], 1)
offset = 0
if "continuation" in kwargs:
offset = parse_playlist_continuation(kwargs["continuation"])
videos = get_playlist_videos(plid, offset)
return {
"videos": videos,
"continuation": len(videos) > 0 and produce_playlist_continuation(plid, len(videos) + offset) or None
}
#def extract_channel_latest(ucid, second__path="channel", channel=None):
#return extract_channel_videos(ucid, "videos", second__path, channel=channel)
# TODO: replace this with whatever youtube uses. information like video length is missing
def extract_channel_latest(ucid, second__path, **kwargs):
with channel_latest_cache_lock:
if ucid in channel_latest_cache:
return channel_latest_cache[ucid]
@ -231,3 +557,29 @@ def extract_channel_latest(ucid):
channel_latest_cache[ucid] = results
return results
def extract_channel_playlists(ucid, second__path, **kwargs):
channel = extract_channel_new(ucid, second__path)
if "error" in channel:
return channel
else:
sort_by = "newest"
if "sort" in kwargs:
sort_by = kwargs["sort"]
elif "sort_by" in kwargs:
sort_by = kwargs["sort_by"]
sort_by = sort_by.lower()
yt_initial_data = None
if "continuation" in kwargs:
yt_initial_data = browse(continuation=kwargs["continuation"])
else:
params = "EglwbGF5bGlzdHMYBCABMAE%3D"
if sort_by == "newest" or sort_by == "newest_created":
params = "EglwbGF5bGlzdHMYAyABMAE%3D"
yt_initial_data = browse(browseId=ucid, params=params)
with open("meow.json", "w") as f:
f.write(json.dumps(yt_initial_data))
return extract_videos_from_initial_data(yt_initial_data, channel, "playlists")

View File

@ -3,7 +3,9 @@ import requests
import urllib.parse
from tools.converters import *
from tools.extractors import extract_yt_initial_data, extract_yt_cfg, eu_consent_cookie
from tools.invidious_ported import next
# TODO: support extracting comments from continuation
def extract_comments(id, **kwargs):
s = requests.session()
s.headers.update({"accept-language": "en-US,en;q=0.9"})
@ -12,36 +14,210 @@ def extract_comments(id, **kwargs):
r.raise_for_status()
yt_initial_data = extract_yt_initial_data(r.content.decode("utf8"))
item = yt_initial_data["contents"]["twoColumnWatchNextResults"]["results"]["results"]["contents"][2]["itemSectionRenderer"]
continuation = item["continuations"][0]["nextContinuationData"]["continuation"]
itct = item["continuations"][0]["nextContinuationData"]["clickTrackingParams"]
xsrf_token = extract_yt_cfg(r.content.decode("utf8")).get("XSRF_TOKEN", None)
if not xsrf_token:
cherrypy.response.status = 500
continuation = item["contents"][0]["continuationItemRenderer"]["continuationEndpoint"]["continuationCommand"]["token"]
yt_initial_data = next(continuation=continuation)
contents = None
header = None
body = None
if "onResponseReceivedEndpoints" in yt_initial_data:
for endpoint in yt_initial_data["onResponseReceivedEndpoints"]:
if "reloadContinuationItemsCommand" in endpoint:
slot = endpoint["reloadContinuationItemsCommand"]["slot"]
if slot == "RELOAD_CONTINUATION_SLOT_HEADER":
header = endpoint["reloadContinuationItemsCommand"]["continuationItems"][0]
elif slot == "RELOAD_CONTINUATION_SLOT_BODY":
contents = try_dig(endpoint, "reloadContinuationItemsCommand", "continuationItems")
elif "appendContinuationItemsAction" in endpoint:
contents = endpoint["appendContinuationItemsAction"]["continuationItems"]
elif "continuationContents" in yt_initial_data:
yt_initial_data = yt_initial_data["continuationContents"]
if "commentRepliesContinuation" in yt_initial_data:
body = yt_initial_data["commentRepliesContinuation"]
else:
body = yt_initial_data["itemSectionContinuation"]
contents = try_dig(body, "contents")
header = try_dig(body, "header")
if contents is None:
return {
"error": "NewLeaf was unable to obtain XSRF_TOKEN from ytcfg.",
"identifier": "XSRF_TOKEN_NOT_FOUND"
"commentCount": 0,
"comments": []
}
url = "https://www.youtube.com/comment_service_ajax?action_get_comments=1&pbj=1&ctoken={}&continuation={}&type=next&itct={}".format(continuation, continuation, urllib.parse.quote_plus(itct))
with s.post(url, headers={"x-youtube-client-name": "1", "x-youtube-client-version": "2.20210422.04.00"}, data={"session_token": xsrf_token}) as rr:
data = json.loads(rr.content.decode("utf8"))
return {
"videoId": id,
"comments": [
{
"author": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorText"]["simpleText"],
"authorThumbnails": [x for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorThumbnail"]["thumbnails"]],
"authorId": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorEndpoint"]["browseEndpoint"]["browseId"],
"authorUrl": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorEndpoint"]["browseEndpoint"]["canonicalBaseUrl"],
"isEdited": " (edited)" in "".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["publishedTimeText"]["runs"]]),
"content": "".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["contentText"]["runs"]]),
"contentHtml": escape_html_textcontent("".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["contentText"]["runs"]])),
"publishedText": "".join([x["text"] for x in c["commentThreadRenderer"]["comment"]["commentRenderer"]["publishedTimeText"]["runs"]]),
# "likeCount": int(c["commentThreadRenderer"]["comment"]["commentRenderer"]["voteCount"]["simpleText"].replace(",", ""))
"commentId": c["commentThreadRenderer"]["comment"]["commentRenderer"]["commentId"],
"authorIsChannelOwner": c["commentThreadRenderer"]["comment"]["commentRenderer"]["authorIsChannelOwner"],
# "replies": {
# "replyCount": c["commentThreadRenderer"]["comment"]["commentRenderer"]["replyCount"]
# }
} for c in data["response"]["continuationContents"]["itemSectionContinuation"]["contents"]
]
continuation_item_renderer = None
for content in contents:
if "continuationItemRenderer" in content:
continuation_item_renderer = content["continuationItemRenderer"]
contents.remove(content)
break
mutations = try_dig(yt_initial_data, "frameworkUpdates", "entityBatchUpdate", "mutations") or []
response = {}
if header is not None:
count_text = combine_runs(header["commentsHeaderRenderer"]["countText"])
response["commentCount"] = view_count_text_to_number(count_text)
# TODO
response["videoId"] = id
response["comments"] = []
reply_count = 0
for node in contents:
comment_data = {}
response["comments"].append(comment_data)
if "commentThreadRenderer" in node:
node = node["commentThreadRenderer"]
node_replies = None
if "replies" in node:
node_replies = node["replies"]["commentRepliesRenderer"]
cvm = try_dig(node, "commentViewModel")
if cvm is not None:
if "commentViewModel" in cvm:
cvm = cvm["commentViewModel"]
comment_key = cvm["commentKey"]
toolbar_key = cvm["toolbarStateKey"]
comment_mutation = None
toolbar_mutation = None
for mutation in mutations:
key = try_dig(mutation, "payload", "commentEntityPayload", "key")
if key == comment_key:
comment_mutation = mutation
break
for mutation in mutations:
key = try_dig(mutation, "entityKey")
if key == toolbar_key:
toolbar_mutation = mutation
break
if comment_mutation is not None and toolbar_mutation is not None:
comment_content = try_dig(comment_mutation, "payload", "commentEntityPayload", "properties", "content")
comment_data["content"] = try_dig(comment_content, "content", combine=True)
comment_author = comment_mutation["payload"]["commentEntityPayload"]["author"]
comment_data["author"] = comment_author["displayName"]
comment_data["authorId"] = comment_author["channelId"]
comment_data["authorUrl"] = "/channel/{}".format(comment_author["channelId"])
comment_data["verified"] = comment_author["isVerified"]
comment_data["authorThumbnails"] = try_dig(comment_mutation, "payload", "commentEntityPayload", "avatar", "image", "sources")
comment_data["authorIsChannelOwner"] = comment_author["isCreator"]
comment_data["isSponsor"] = "sponsorBadgeUrl" in comment_author
if comment_data["isSponsor"]:
comment_data["sponsorIconUrl"] = comment_author["sponsorBadgeUrl"]
comment_toolbar = try_dig(comment_mutation, "payload", "commentEntityPayload", "toolbar")
comment_data["second__likeCount"] = comment_toolbar["likeCountA11y"] if "likeCountA11y" in comment_toolbar else None
comment_data["likeCount"] = uncompress_counter(comment_toolbar["likeCountA11y"].split(" ")[0])
comment_data["second__replyText"] = comment_toolbar["replyCountA11y"]
reply_count = uncompress_counter(try_dig(comment_toolbar, "replyCount") or "0")
heart_state = try_dig(toolbar_mutation, "payload", "engagementToolbarStateEntityPayload", "heartState")
if heart_state is not None and heart_state == "TOOLBAR_HEART_STATE_HEARTED":
comment_data["creatorHeart"] = {
"creatorThumbnail": comment_toolbar["creatorThumbnailUrl"],
"creatorName": comment_toolbar["heartActiveTooltip"].replace("❤ by ", "")
}
comment_data["publishedText"] = try_dig(comment_mutation, "payload", "commentEntityPayload", "properties", "publishedTime", combine=True)
comment_data["isPinned"] = "pinnedText" in cvm
comment_data["commentId"] = cvm["commentId"]
else:
if "comment" in node:
node_comment = node["comment"]["commentRenderer"]
else:
node_comment = node["commentRenderer"]
comment_data["commentId"] = node_comment["commentId"]
comment_content = {"content": try_dig(node_comment, "contentText")} if "contentText" in node_comment else {"content": ""}
comment_data["content"] = comment_content["content"]
comment_data["verified"] = "authorCommentBadge" in node_comment
comment_data["author"] = try_dig(node_comment, "authorText", combine=True)
comment_data["authorThumbnails"] = try_dig(node_comment, "authorThumbnails", "thumbnails")
comment_action_buttons_renderer = try_dig(node_comment, "actionButtons", "commentActionButtonsRenderer")
if comment_action_buttons_renderer is not None:
comment_data["likeCount"] = int(try_dig(comment_action_buttons_renderer, "likeButton", "toggleButtonRenderer", "accessibilityData", "accessibilityData", "label").split(" ")[0])
comment_data["second__likeCount"] = "{} like{}".format(comment_data["likeCount"], "s" if comment_data["likeCount"] != 1 else "")
if "creatorHeart" in comment_action_buttons_renderer:
heart_data = try_dig(comment_action_buttons_renderer, "creatorHeart", "creatorHeartRenderer", "creatorThumbnail")
comment_data["creatorHeart"] = {
"creatorThumbnail": try_dig(heart_data, "thumbnails", -1, "url"),
"creatorName": try_dig(heart_data, "accessibility", "accessibilityData", "label")
}
comment_data["authorId"] = try_dig(node_comment, "authorEndpoint", "browseEndpoint", "browseId")
comment_data["authorUrl"] = try_dig(node_comment, "authorEndpoint", "browseEndpoint", "canonicalBaseUrl")
comment_data["authorIsChannelOwner"] = "authorIsChannelOwner" in node_comment
comment_data["isPinned"] = "pinnedCommentBadge" in node_comment
comment_data["publishedText"] = try_dig(node_comment, "publishedTimeText", combine=True)
comment_data["isSponsor"] = "sponsorCommentBadge" in node_comment
if comment_data["isSponsor"]:
comment_data["sponsorIconUrl"] = try_dig(node_comment, "sponsorCommentBadge", "sponsorCommentBadgeRenderer", "customBadge", "thumbnails", 0, "url")
reply_count = node_comment["replyCount"]
comment_data["contentHtml"] = parse_comment_content(comment_content)
if "publishedText" in comment_data and comment_data["publishedText"] is not None:
comment_data["published"] = past_text_to_time(comment_data["publishedText"].split(" (edited)")[0])
comment_data["isEdited"] = comment_data["publishedText"].find(" (edited)") > -1
continuation = None
if node_replies is not None and not "commentRepliesContinuation" in response:
if "continuations" in node_replies:
continuation = try_dig(node_replies, "continuations", 0, "nextContinuationData", "continuation")
if continuation is None:
continuation = try_dig(node_replies, "contents", 0, "continuationItemRenderer", "continuationEndpoint", "continuationCommand", "token")
comment_data["replies"] = {
"replyCount": reply_count,
"second__replyText": comment_data["second__replyText"] if "second__replyText" in comment_data else None,
"continuation": continuation
}
comment_data["second__replyText"] = None
if continuation_item_renderer is not None:
continuation_endpoint = try_dig(continuation_item_renderer, "continuationEndpoint") or try_dig(continuation_item_renderer, "button", "buttonRenderer", "command")
if continuation_endpoint is not None:
response["continuation"] = continuation_endpoint["continuationCommand"]["token"]
if not "commentCount" in response:
response["commentCount"] = 0
#response["commentCount"] = len(response["comments"])
return response
def parse_comment_content(content):
contentOffset = 0
lastEnd = 0
segments = []
if "attachmentRuns" in content:
for attachment in content["attachmentRuns"]:
start = attachment["startIndex"] + contentOffset
stop = start + attachment["length"]
segments.append(escape_html_textcontent(content["content"][lastEnd:start]))
substitution = "<img "
element = attachment["element"]
if "height" in element["properties"]["layoutProperties"]:
substitution += "height={} ".format(element["properties"]["layoutProperties"]["height"]["value"])
if "width" in element["properties"]["layoutProperties"]:
substitution += "width={} ".format(element["properties"]["layoutProperties"]["width"]["value"])
substitution += "src='{}'".format(element["type"]["imageType"]["image"]["sources"][0]["url"])
substitution += ">"
contentOffset += len(substitution) - (stop - start)
lastEnd = stop + len(substitution) - (stop - start)
segments.append(substitution)
content["content"] = content["content"][:start] + substitution + content["content"][stop:]
segments.append(escape_html_textcontent(content["content"][lastEnd:]))
return "".join(segments)
#return escape_html_textcontent(content["content"])

268
extractors/playlist.py Normal file
View File

@ -0,0 +1,268 @@
from tools.converters import *
from tools.invidious_ported import browse, protodec
import json, re, datetime, time
from threading import Lock
from cachetools import TTLCache
playlist_cache = TTLCache(maxsize=50, ttl=300)
playlist_cache_lock = Lock()
def produce_playlist_continuation(id, index):
if id.startswith("UC"):
id = "UU" + id.split("UC", 1)[1]
plid = "VL" + id
request_count = index / 100
data = {"1:varint": index}
data = protodec(json.dumps(data, separators=(',', ':')), "-e")
data = data.split("=")[0]
object = {
"80226972:embedded": {
"2:string": plid,
"3:base64": {
"1:varint": int(request_count),
"15:string": "PT:{}".format(data),
"104:embedded": {"1:0:varint": 0},
},
"35:string": id,
}
}
object = json.dumps(object, separators=(',', ':'))
continuation = protodec(object, "-e")
continuation = continuation.split("=")[0]
return continuation
def parse_playlist_continuation(continuation):
object = protodec(continuation, "-db")
object = json.loads(object)
data = object["80226972:0:embedded"]["3:1:base64"]["15:1:string"]
data = data.split("PT:")[1]
data = protodec(data, "-db")
data = json.loads(data)
data = data["1:0:varint"]
return data
def fetch_playlist(plid):
if plid.startswith("UC"):
plid = "UU" + plid.split("UC", 1)[1]
with playlist_cache_lock:
if plid in playlist_cache:
return playlist_cache[plid]
yt_initial_data = browse(browseId="VL" + plid)
playlist_sidebar_renderer = try_dig(yt_initial_data, "sidebar", "playlistSidebarRenderer", "items")
if playlist_sidebar_renderer is None:
raise "Could not extract playlistSidebarRenderer."
playlist_info = try_dig(playlist_sidebar_renderer, 0, "playlistSidebarPrimaryInfoRenderer")
if playlist_info is None:
raise "Could not extract playlist info"
title = try_dig(playlist_info, "title", "runs", 0, "text")
desc_item = playlist_info["description"] if "description" in playlist_info else None
if desc_item is not None:
description_txt = combine_runs(desc_item)
description_html = add_html_links(escape_html_textcontent(description_txt))
else:
description_txt = None
description_html = None
thumbnail = try_dig(playlist_info, "thumbnailRenderer", "playlistVideoThumbnailRenderer", "thumbnail", "thumbnails", 0, "url")
views = 0
updated = 0
video_count = 0
subtitle = try_dig(yt_initial_data, "header", "playlist", "subtitle", combine=True)
stats = playlist_info["stats"]
if stats is not None:
for stat in stats:
text = combine_runs(stat)
if text is None:
continue
if "video" in text or "episode" in text:
video_count = int(re.sub("\D", "", text))
elif "view" in text:
views = re.sub("\D", "", text)
elif "updated" in text.lower():
if "Last updated on" in text:
updated = time.mktime(datetime.datetime.strptime(text, "Last updated on %b %d, %Y").timetuple())
else:
updated = past_text_to_time(text.split("Updated ")[1])
# if "seconds ago" in text:
# updated = datetime.datetime.utcnow() - datetime.timedelta(seconds=int(re.sub("\D", "", text)))
# elif "minutes ago" in text:
# updated = datetime.datetime.utcnow() - datetime.timedelta(minutes=int(re.sub("\D", "", text)))
# elif "hours ago" in text:
# updated = datetime.datetime.utcnow() - datetime.timedelta(hours=int(re.sub("\D", "", text)))
# elif "days ago" in text:
# updated = datetime.datetime.utcnow() - datetime.timedelta(days=int(re.sub("\D", "", text)))
# elif "weeks ago" in text:
# updated = datetime.datetime.utcnow() - datetime.timedelta(days=int(re.sub("\D", "", text)) * 7)
# elif "months ago" in text:
# updated = datetime.datetime.utcnow() - datetime.timedelta(days=int(re.sub("\D", "", text)) * 30)
if len(playlist_sidebar_renderer) < 2:
author = None
author_thumbnail = None
ucid = None
else:
author_info = try_dig(playlist_sidebar_renderer, 1, "playlistSidebarSecondaryInfoRenderer", "videoOwner", "videoOwnerRenderer")
if author_info is None:
raise "Could not extract author info"
author = try_dig(author_info, "title", "runs", 0, "text")
author_thumbnail = try_dig(author_info, "thumbnail", "thumbnails", 0, "url")
ucid = try_dig(author_info, "title", "runs", 0, "navigationEndpoint", "browseEndpoint", "browseId")
playlist = {
"title": title,
"id": plid,
"author": author,
"author_thumbnail": author_thumbnail,
"ucid": ucid,
"description": description_txt,
"description_html": description_html,
"video_count": video_count,
"views": views,
"updated": updated,
"thumbnail": thumbnail,
"subtitle": subtitle
}
with playlist_cache_lock:
playlist_cache[plid] = playlist
return playlist
def get_playlist_videos(plid, offset, videoId=None):
pl = fetch_playlist(plid)
if offset >= pl["video_count"] or offset < 0:
return []
if videoId is not None:
yt_initial_data = next(data={"videoId": videoId, "playlistId": pl["id"]})
new_offset = try_dig(yt_initial_data, "contents", "twoColumnWatchNextResults", "playlist", "playlist", "currentIndex")
if new_offset is not None:
offset = new_offset
videos = []
while True:
ctoken = produce_playlist_continuation(pl["id"], offset)
yt_initial_data = browse(continuation=ctoken)
for video in extract_playlist_videos(yt_initial_data):
videos.append(video)
offset = offset + 100
if len(videos) >= 200 or len(videos) == pl["video_count"] or offset >= pl["video_count"]:
break
break
return videos
def extract_playlist_videos(yt_initial_data):
videos = []
if "contents" in yt_initial_data:
tabs = yt_initial_data["contents"]["twoColumnBrowseResultsRenderer"]["tabs"]
tabs_renderer = []
for tab in tabs:
if tab["tabRenderer"]["selected"] and ("contents" in tab["tabRenderer"] or "content" in tab["tabRenderer"]):
tabs_renderer = tab["tabRenderer"]
break
if "contents" in tabs_renderer or "content" in tabs_renderer:
tab_contents = tabs_renderer["contents"] if "contents" in tabs_renderer else (tabs_renderer["content"])
list_renderer = tab_contents["sectionListRenderer"]["contents"][0]
item_renderer = list_renderer["itemSectionRenderer"]["contents"][0]
contents = item_renderer["playlistVideoListRenderer"]["contents"]
else:
contents = try_dig(yt_initial_data, "onResponseReceivedActions", 0, "appendContinuationItemsAction", "continuationItems")
else:
contents = try_dig(yt_initial_data, "response", "continuationContents", "playlistVideoListContinuation", "contents")
if contents is not None:
for v in contents:
if not "playlistVideoRenderer" in v:
continue
v = v["playlistVideoRenderer"]
video_id = v["navigationEndpoint"]["watchEndpoint"]["videoId"]
plid = v["navigationEndpoint"]["watchEndpoint"]["playlistId"]
index = v["navigationEndpoint"]["watchEndpoint"]["index"]
title = try_dig(v, "title", combine=True)
author = try_dig(v, "shortBylineText", "runs", 0, "text")
ucid = try_dig(v, "shortBylineText", "runs", 0, "navigationEndpoint", "browseEndpoint", "browseId")
length_seconds = v["lengthSeconds"] if "lengthSeconds" in v else None
live = False
is_upcoming = False
length_text = "UNKNOWN"
if length_seconds is None:
live = True
length_seconds = 0
for o in v["thumbnailOverlays"]:
if "thumbnailOverlayTimeStatusRenderer" in o:
length_text = combine_runs(o["thumbnailOverlayTimeStatusRenderer"]["text"])
length_text_style = o["thumbnailOverlayTimeStatusRenderer"]["style"]
if length_text_style == "DEFAULT":
length_seconds = length_text_to_seconds(length_text)
elif length_text_style == "LIVE":
live = True
elif length_text_style == "UPCOMING":
is_upcoming = True
published = 0
published_text = "Live now"
premiere_timestamp = None
view_count_text = "0 views"
for run in v["videoInfo"]["runs"]:
if run["text"].endswith("views"):
view_count_text = run["text"]
elif len(run["text"].split(" ")) == 3 or run["text"].startswith("Streamed"):
published_text = run["text"]
if published_text != "Live now":
published = past_text_to_time(published_text)
# TODO i dont know what this looks like...
if "upcomingEventData" in v:
premiere_timestamp = v["upcomingEventData"]["startTime"]
published_text = time_to_past_text(int(premiere_timestamp))
if view_count_text != "0 views":
view_count_text_short = view_count_text
view_count_text = uncompress_counter(view_count_text.split(" ")[0])
videos.append({
"type": "video",
"title": title,
"videoId": video_id,
"id": video_id,
"author": author,
"ucid": ucid,
"length_seconds": length_seconds,
"lengthSeconds": length_seconds,
"second__lengthText": length_text,
"viewCount": view_count_text,
"second__viewCountText": view_count_text_short,
"second__viewCountTextShort": view_count_text_short,
"published": published,
"publishedText": published_text,
"plid": plid,
"live_now": live,
"isUpcoming": is_upcoming,
"premiereTimestamp": premiere_timestamp,
"index": index
})
return videos

View File

@ -35,10 +35,14 @@ def extract_search(q):
if "videoRenderer" in item:
video = item["videoRenderer"]
published = 0
published_text = "Live now"
published_text = None
live = is_live(video)
if "publishedTimeText" in video:
published_text = video["publishedTimeText"]["simpleText"]
published = past_text_to_time(published_text)
else:
if live:
published_text = "Live now"
results.append({
"type": "video",
"title": combine_runs(video["title"]),
@ -55,7 +59,7 @@ def extract_search(q):
"publishedText": published_text,
"lengthSeconds": get_length_or_live_now(video),
"second__lengthText": get_length_text_or_live_now(video),
"liveNow": is_live(video),
"liveNow": video,
"paid": None,
"premium": None,
"isUpcoming": None

View File

@ -74,9 +74,10 @@ def extract_video(id):
"publishedText": None,
"keywords": None,
"viewCount": info["view_count"],
"viewCountText": None,
"second__viewCountText": None,
"second__viewCountTextShort": None,
"likeCount": 0,
"likeCount": info["like_count"],#0,
"dislikeCount": 0,
"paid": None,
"premium": None,
@ -102,7 +103,7 @@ def extract_video(id):
"adaptiveFormats": [],
"formatStreams": [],
"captions": [],
"recommendedVideos": []
"recommendedVideos": info["recommendedVideos"] if "recommendedVideos" in info else []
}
# result = info["formats"]
@ -270,7 +271,7 @@ def get_more_stuff_from_file(id, result):
"viewCountText": get_view_count_text_or_recommended(r),
"viewCount": get_view_count_or_recommended(r),
"second__liveNow": is_live(r)
} for r in [get_useful_recommendation_data(r) for r in recommendations if get_useful_recommendation_data(r)])
} for r in [get_useful_recommendation_data(r) for r in recommendations if get_useful_recommendation_data(r)]) if len(result["recommendedVideos"]) == 0 else result["recommendedVideos"]
# m_yt_player_config = re.search(r_yt_player_config, content)
# if m_yt_player_config:
@ -281,32 +282,33 @@ def get_more_stuff_from_file(id, result):
# result = player_response
# return result
if "dashManifestUrl" in player_response["streamingData"]:
result["second__providedDashUrl"] = player_response["streamingData"]["dashManifestUrl"]
result["liveNow"] = player_response["videoDetails"]["isLiveContent"]
if "streamingData" in player_response:
if "dashManifestUrl" in player_response["streamingData"]:
result["second__providedDashUrl"] = player_response["streamingData"]["dashManifestUrl"]
result["liveNow"] = player_response["videoDetails"]["isLiveContent"]
itagDict = {}
for f in player_response["streamingData"]["adaptiveFormats"]:
if "indexRange" in f:
itagDict[str(f["itag"])] = {
"initRange": f["initRange"],
"indexRange": f["indexRange"],
"audioChannels": f["audioChannels"] if "audioChannels" in f else None
}
for f in result["adaptiveFormats"]:
if f["itag"] in itagDict:
i = itagDict[f["itag"]]
f["init"] = "{}-{}".format(i["initRange"]["start"], i["initRange"]["end"])
f["index"] = "{}-{}".format(i["indexRange"]["start"], i["indexRange"]["end"])
f["second__audioChannels"] = i["audioChannels"]
if f["second__height"]:
resolution = str(f["second__height"]) + "p"
f["resolution"] = resolution
label = resolution
if f["fps"] > 30:
label += str(f["fps"])
f["qualityLabel"] = label
f["second__order"] = format_order(f)
itagDict = {}
for f in player_response["streamingData"]["adaptiveFormats"]:
if "indexRange" in f:
itagDict[str(f["itag"])] = {
"initRange": f["initRange"],
"indexRange": f["indexRange"],
"audioChannels": f["audioChannels"] if "audioChannels" in f else None
}
for f in result["adaptiveFormats"]:
if f["itag"] in itagDict:
i = itagDict[f["itag"]]
f["init"] = "{}-{}".format(i["initRange"]["start"], i["initRange"]["end"])
f["index"] = "{}-{}".format(i["indexRange"]["start"], i["indexRange"]["end"])
f["second__audioChannels"] = i["audioChannels"]
if f["second__height"]:
resolution = str(f["second__height"]) + "p"
f["resolution"] = resolution
label = resolution
if f["fps"] > 30:
label += str(f["fps"])
f["qualityLabel"] = label
f["second__order"] = format_order(f)
if "captions" in player_response:
for track in player_response["captions"]["playerCaptionsTracklistRenderer"]["captionTracks"]:

View File

@ -4,7 +4,7 @@ import pathlib
import requests
import yt_dlp
from extractors.video import extract_video
from extractors.channel import extract_channel, extract_channel_videos, extract_channel_latest
from extractors.channel import *
from extractors.manifest import extract_manifest
from extractors.search import extract_search
from extractors.suggestions import extract_search_suggestions
@ -61,7 +61,7 @@ class NewLeaf(object):
def channels(self, *suffix, second__path="channel", **kwargs):
ucid = ""
part = ""
possible_parts = ("videos", "latest", "playlists")
possible_parts = ("videos", "latest", "playlists", "shorts")
if len(suffix) == 1:
ucid = suffix[0]
else: # len(suffix) >= 2
@ -82,13 +82,16 @@ class NewLeaf(object):
}
if part == "playlists":
return []
return extract_channel_playlists(ucid, second__path, **kwargs)
elif part == "latest":
return extract_channel_latest(ucid)
elif part == "videos":
return extract_channel_videos(ucid, second__path)
return extract_channel_latest(ucid, second__path)
elif part in ["videos", "shorts", "streams"]:
#if not "sort_by" in kwargs or kwargs["sort_by"] == "newest":
#return extract_channel_videos_as_playlist(ucid, part, second__path, **kwargs)
#else:
return extract_channel_videos(ucid, part, second__path, **kwargs)
else: # part == "", so extract whole channel
return extract_channel(ucid, second__path)
return extract_channel_new(ucid, second__path)
@cherrypy.expose
@cherrypy.tools.json_out()

View File

@ -15,6 +15,18 @@ def combine_runs(runs):
runs = runs["runs"]
return "".join([r["text"] for r in runs])
def try_dig(node, *keys, combine=None):
currentNode = node
for key in keys:
if key in currentNode or (isinstance(key, int) and key < len(currentNode)):
currentNode = currentNode[key]
else:
return None
if combine == True and ("simpleText" in currentNode or "runs" in currentNode):
return combine_runs(currentNode)
else:
return currentNode
def escape_html_textcontent(text):
return (
text
@ -72,7 +84,12 @@ def get_view_count_or_recommended(view_count_container):
return view_count_text_to_number(text)
def get_view_count_text_or_recommended(view_count_container):
if "viewCountText" in view_count_container:
if "shortViewCountText" in view_count_container:
text = view_count_container["shortViewCountText"]
if not "runs" in text:
if "simpleText" in text:
text["simpleText"] = text["simpleText"].replace(" views", "")
elif "viewCountText" in view_count_container:
text = view_count_container["viewCountText"]
elif "viewCount" in view_count_container:
text = view_count_container["viewCount"]
@ -147,8 +164,8 @@ def uncompress_counter(text):
if text.lower() == "no" or text.lower() == "unknown":
return 0
last = text[-1:].lower()
if last >= "0" and last <= "9":
return int(last)
if not last in ["k", "m", "b"]:
return int(text.replace(",", ""))
else:
multiplier = 1
if last == "k":
@ -227,7 +244,7 @@ def get_subtitle_api_url(id, label, language_code):
subtitle_api_url = "/api/v1/captions/{}?".format(id)
params = {}
if label and "auto-generated" in label:
if label: #and "auto-generated" in label:
params["label"] = label
else:
params["lang"] = language_code

View File

@ -0,0 +1,32 @@
const fetch = require("node-fetch")
;(async () => {
if (process.argv.length < 3) {
console.error("Needs at least two arguments.")
process.exit(1)
}
// node index.js url data
const url = process.argv[2]
const data = process.argv[3]
let dataAsJson
try {
dataAsJson = JSON.parse(data)
} catch (e) {
console.error("Second argument (data) is not valid JSON.")
process.exit(1)
}
// Do fetch
f = await fetch(url, dataAsJson)
if (!f.ok) {
console.error(JSON.stringify(f))
return
}
// Await parse
asJson = await f.json()
console.log(JSON.stringify(asJson))
})()

50
tools/fetch-with-node/node_modules/.package-lock.json generated vendored Normal file
View File

@ -0,0 +1,50 @@
{
"name": "fetch-with-node",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"node_modules/node-fetch": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
"license": "MIT",
"dependencies": {
"whatwg-url": "^5.0.0"
},
"engines": {
"node": "4.x || >=6.0.0"
},
"peerDependencies": {
"encoding": "^0.1.0"
},
"peerDependenciesMeta": {
"encoding": {
"optional": true
}
}
},
"node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
"license": "MIT"
},
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
"license": "BSD-2-Clause"
},
"node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
"license": "MIT",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
}
}
}

View File

@ -0,0 +1,22 @@
The MIT License (MIT)
Copyright (c) 2016 David Frank
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

634
tools/fetch-with-node/node_modules/node-fetch/README.md generated vendored Normal file
View File

@ -0,0 +1,634 @@
node-fetch
==========
[![npm version][npm-image]][npm-url]
[![build status][travis-image]][travis-url]
[![coverage status][codecov-image]][codecov-url]
[![install size][install-size-image]][install-size-url]
[![Discord][discord-image]][discord-url]
A light-weight module that brings `window.fetch` to Node.js
(We are looking for [v2 maintainers and collaborators](https://github.com/bitinn/node-fetch/issues/567))
[![Backers][opencollective-image]][opencollective-url]
<!-- TOC -->
- [Motivation](#motivation)
- [Features](#features)
- [Difference from client-side fetch](#difference-from-client-side-fetch)
- [Installation](#installation)
- [Loading and configuring the module](#loading-and-configuring-the-module)
- [Common Usage](#common-usage)
- [Plain text or HTML](#plain-text-or-html)
- [JSON](#json)
- [Simple Post](#simple-post)
- [Post with JSON](#post-with-json)
- [Post with form parameters](#post-with-form-parameters)
- [Handling exceptions](#handling-exceptions)
- [Handling client and server errors](#handling-client-and-server-errors)
- [Advanced Usage](#advanced-usage)
- [Streams](#streams)
- [Buffer](#buffer)
- [Accessing Headers and other Meta data](#accessing-headers-and-other-meta-data)
- [Extract Set-Cookie Header](#extract-set-cookie-header)
- [Post data using a file stream](#post-data-using-a-file-stream)
- [Post with form-data (detect multipart)](#post-with-form-data-detect-multipart)
- [Request cancellation with AbortSignal](#request-cancellation-with-abortsignal)
- [API](#api)
- [fetch(url[, options])](#fetchurl-options)
- [Options](#options)
- [Class: Request](#class-request)
- [Class: Response](#class-response)
- [Class: Headers](#class-headers)
- [Interface: Body](#interface-body)
- [Class: FetchError](#class-fetcherror)
- [License](#license)
- [Acknowledgement](#acknowledgement)
<!-- /TOC -->
## Motivation
Instead of implementing `XMLHttpRequest` in Node.js to run browser-specific [Fetch polyfill](https://github.com/github/fetch), why not go from native `http` to `fetch` API directly? Hence, `node-fetch`, minimal code for a `window.fetch` compatible API on Node.js runtime.
See Matt Andrews' [isomorphic-fetch](https://github.com/matthew-andrews/isomorphic-fetch) or Leonardo Quixada's [cross-fetch](https://github.com/lquixada/cross-fetch) for isomorphic usage (exports `node-fetch` for server-side, `whatwg-fetch` for client-side).
## Features
- Stay consistent with `window.fetch` API.
- Make conscious trade-off when following [WHATWG fetch spec][whatwg-fetch] and [stream spec](https://streams.spec.whatwg.org/) implementation details, document known differences.
- Use native promise but allow substituting it with [insert your favorite promise library].
- Use native Node streams for body on both request and response.
- Decode content encoding (gzip/deflate) properly and convert string output (such as `res.text()` and `res.json()`) to UTF-8 automatically.
- Useful extensions such as timeout, redirect limit, response size limit, [explicit errors](ERROR-HANDLING.md) for troubleshooting.
## Difference from client-side fetch
- See [Known Differences](LIMITS.md) for details.
- If you happen to use a missing feature that `window.fetch` offers, feel free to open an issue.
- Pull requests are welcomed too!
## Installation
Current stable release (`2.x`)
```sh
$ npm install node-fetch
```
## Loading and configuring the module
We suggest you load the module via `require` until the stabilization of ES modules in node:
```js
const fetch = require('node-fetch');
```
If you are using a Promise library other than native, set it through `fetch.Promise`:
```js
const Bluebird = require('bluebird');
fetch.Promise = Bluebird;
```
## Common Usage
NOTE: The documentation below is up-to-date with `2.x` releases; see the [`1.x` readme](https://github.com/bitinn/node-fetch/blob/1.x/README.md), [changelog](https://github.com/bitinn/node-fetch/blob/1.x/CHANGELOG.md) and [2.x upgrade guide](UPGRADE-GUIDE.md) for the differences.
#### Plain text or HTML
```js
fetch('https://github.com/')
.then(res => res.text())
.then(body => console.log(body));
```
#### JSON
```js
fetch('https://api.github.com/users/github')
.then(res => res.json())
.then(json => console.log(json));
```
#### Simple Post
```js
fetch('https://httpbin.org/post', { method: 'POST', body: 'a=1' })
.then(res => res.json()) // expecting a json response
.then(json => console.log(json));
```
#### Post with JSON
```js
const body = { a: 1 };
fetch('https://httpbin.org/post', {
method: 'post',
body: JSON.stringify(body),
headers: { 'Content-Type': 'application/json' },
})
.then(res => res.json())
.then(json => console.log(json));
```
#### Post with form parameters
`URLSearchParams` is available in Node.js as of v7.5.0. See [official documentation](https://nodejs.org/api/url.html#url_class_urlsearchparams) for more usage methods.
NOTE: The `Content-Type` header is only set automatically to `x-www-form-urlencoded` when an instance of `URLSearchParams` is given as such:
```js
const { URLSearchParams } = require('url');
const params = new URLSearchParams();
params.append('a', 1);
fetch('https://httpbin.org/post', { method: 'POST', body: params })
.then(res => res.json())
.then(json => console.log(json));
```
#### Handling exceptions
NOTE: 3xx-5xx responses are *NOT* exceptions and should be handled in `then()`; see the next section for more information.
Adding a catch to the fetch promise chain will catch *all* exceptions, such as errors originating from node core libraries, network errors and operational errors, which are instances of FetchError. See the [error handling document](ERROR-HANDLING.md) for more details.
```js
fetch('https://domain.invalid/')
.catch(err => console.error(err));
```
#### Handling client and server errors
It is common to create a helper function to check that the response contains no client (4xx) or server (5xx) error responses:
```js
function checkStatus(res) {
if (res.ok) { // res.status >= 200 && res.status < 300
return res;
} else {
throw MyCustomError(res.statusText);
}
}
fetch('https://httpbin.org/status/400')
.then(checkStatus)
.then(res => console.log('will not get here...'))
```
## Advanced Usage
#### Streams
The "Node.js way" is to use streams when possible:
```js
fetch('https://assets-cdn.github.com/images/modules/logos_page/Octocat.png')
.then(res => {
const dest = fs.createWriteStream('./octocat.png');
res.body.pipe(dest);
});
```
In Node.js 14 you can also use async iterators to read `body`; however, be careful to catch
errors -- the longer a response runs, the more likely it is to encounter an error.
```js
const fetch = require('node-fetch');
const response = await fetch('https://httpbin.org/stream/3');
try {
for await (const chunk of response.body) {
console.dir(JSON.parse(chunk.toString()));
}
} catch (err) {
console.error(err.stack);
}
```
In Node.js 12 you can also use async iterators to read `body`; however, async iterators with streams
did not mature until Node.js 14, so you need to do some extra work to ensure you handle errors
directly from the stream and wait on it response to fully close.
```js
const fetch = require('node-fetch');
const read = async body => {
let error;
body.on('error', err => {
error = err;
});
for await (const chunk of body) {
console.dir(JSON.parse(chunk.toString()));
}
return new Promise((resolve, reject) => {
body.on('close', () => {
error ? reject(error) : resolve();
});
});
};
try {
const response = await fetch('https://httpbin.org/stream/3');
await read(response.body);
} catch (err) {
console.error(err.stack);
}
```
#### Buffer
If you prefer to cache binary data in full, use buffer(). (NOTE: `buffer()` is a `node-fetch`-only API)
```js
const fileType = require('file-type');
fetch('https://assets-cdn.github.com/images/modules/logos_page/Octocat.png')
.then(res => res.buffer())
.then(buffer => fileType(buffer))
.then(type => { /* ... */ });
```
#### Accessing Headers and other Meta data
```js
fetch('https://github.com/')
.then(res => {
console.log(res.ok);
console.log(res.status);
console.log(res.statusText);
console.log(res.headers.raw());
console.log(res.headers.get('content-type'));
});
```
#### Extract Set-Cookie Header
Unlike browsers, you can access raw `Set-Cookie` headers manually using `Headers.raw()`. This is a `node-fetch` only API.
```js
fetch(url).then(res => {
// returns an array of values, instead of a string of comma-separated values
console.log(res.headers.raw()['set-cookie']);
});
```
#### Post data using a file stream
```js
const { createReadStream } = require('fs');
const stream = createReadStream('input.txt');
fetch('https://httpbin.org/post', { method: 'POST', body: stream })
.then(res => res.json())
.then(json => console.log(json));
```
#### Post with form-data (detect multipart)
```js
const FormData = require('form-data');
const form = new FormData();
form.append('a', 1);
fetch('https://httpbin.org/post', { method: 'POST', body: form })
.then(res => res.json())
.then(json => console.log(json));
// OR, using custom headers
// NOTE: getHeaders() is non-standard API
const form = new FormData();
form.append('a', 1);
const options = {
method: 'POST',
body: form,
headers: form.getHeaders()
}
fetch('https://httpbin.org/post', options)
.then(res => res.json())
.then(json => console.log(json));
```
#### Request cancellation with AbortSignal
> NOTE: You may cancel streamed requests only on Node >= v8.0.0
You may cancel requests with `AbortController`. A suggested implementation is [`abort-controller`](https://www.npmjs.com/package/abort-controller).
An example of timing out a request after 150ms could be achieved as the following:
```js
import AbortController from 'abort-controller';
const controller = new AbortController();
const timeout = setTimeout(
() => { controller.abort(); },
150,
);
fetch(url, { signal: controller.signal })
.then(res => res.json())
.then(
data => {
useData(data)
},
err => {
if (err.name === 'AbortError') {
// request was aborted
}
},
)
.finally(() => {
clearTimeout(timeout);
});
```
See [test cases](https://github.com/bitinn/node-fetch/blob/master/test/test.js) for more examples.
## API
### fetch(url[, options])
- `url` A string representing the URL for fetching
- `options` [Options](#fetch-options) for the HTTP(S) request
- Returns: <code>Promise&lt;[Response](#class-response)&gt;</code>
Perform an HTTP(S) fetch.
`url` should be an absolute url, such as `https://example.com/`. A path-relative URL (`/file/under/root`) or protocol-relative URL (`//can-be-http-or-https.com/`) will result in a rejected `Promise`.
<a id="fetch-options"></a>
### Options
The default values are shown after each option key.
```js
{
// These properties are part of the Fetch Standard
method: 'GET',
headers: {}, // request headers. format is the identical to that accepted by the Headers constructor (see below)
body: null, // request body. can be null, a string, a Buffer, a Blob, or a Node.js Readable stream
redirect: 'follow', // set to `manual` to extract redirect headers, `error` to reject redirect
signal: null, // pass an instance of AbortSignal to optionally abort requests
// The following properties are node-fetch extensions
follow: 20, // maximum redirect count. 0 to not follow redirect
timeout: 0, // req/res timeout in ms, it resets on redirect. 0 to disable (OS limit applies). Signal is recommended instead.
compress: true, // support gzip/deflate content encoding. false to disable
size: 0, // maximum response body size in bytes. 0 to disable
agent: null // http(s).Agent instance or function that returns an instance (see below)
}
```
##### Default Headers
If no values are set, the following request headers will be sent automatically:
Header | Value
------------------- | --------------------------------------------------------
`Accept-Encoding` | `gzip,deflate` _(when `options.compress === true`)_
`Accept` | `*/*`
`Content-Length` | _(automatically calculated, if possible)_
`Transfer-Encoding` | `chunked` _(when `req.body` is a stream)_
`User-Agent` | `node-fetch/1.0 (+https://github.com/bitinn/node-fetch)`
Note: when `body` is a `Stream`, `Content-Length` is not set automatically.
##### Custom Agent
The `agent` option allows you to specify networking related options which are out of the scope of Fetch, including and not limited to the following:
- Support self-signed certificate
- Use only IPv4 or IPv6
- Custom DNS Lookup
See [`http.Agent`](https://nodejs.org/api/http.html#http_new_agent_options) for more information.
If no agent is specified, the default agent provided by Node.js is used. Note that [this changed in Node.js 19](https://github.com/nodejs/node/blob/4267b92604ad78584244488e7f7508a690cb80d0/lib/_http_agent.js#L564) to have `keepalive` true by default. If you wish to enable `keepalive` in an earlier version of Node.js, you can override the agent as per the following code sample.
In addition, the `agent` option accepts a function that returns `http`(s)`.Agent` instance given current [URL](https://nodejs.org/api/url.html), this is useful during a redirection chain across HTTP and HTTPS protocol.
```js
const httpAgent = new http.Agent({
keepAlive: true
});
const httpsAgent = new https.Agent({
keepAlive: true
});
const options = {
agent: function (_parsedURL) {
if (_parsedURL.protocol == 'http:') {
return httpAgent;
} else {
return httpsAgent;
}
}
}
```
<a id="class-request"></a>
### Class: Request
An HTTP(S) request containing information about URL, method, headers, and the body. This class implements the [Body](#iface-body) interface.
Due to the nature of Node.js, the following properties are not implemented at this moment:
- `type`
- `destination`
- `referrer`
- `referrerPolicy`
- `mode`
- `credentials`
- `cache`
- `integrity`
- `keepalive`
The following node-fetch extension properties are provided:
- `follow`
- `compress`
- `counter`
- `agent`
See [options](#fetch-options) for exact meaning of these extensions.
#### new Request(input[, options])
<small>*(spec-compliant)*</small>
- `input` A string representing a URL, or another `Request` (which will be cloned)
- `options` [Options][#fetch-options] for the HTTP(S) request
Constructs a new `Request` object. The constructor is identical to that in the [browser](https://developer.mozilla.org/en-US/docs/Web/API/Request/Request).
In most cases, directly `fetch(url, options)` is simpler than creating a `Request` object.
<a id="class-response"></a>
### Class: Response
An HTTP(S) response. This class implements the [Body](#iface-body) interface.
The following properties are not implemented in node-fetch at this moment:
- `Response.error()`
- `Response.redirect()`
- `type`
- `trailer`
#### new Response([body[, options]])
<small>*(spec-compliant)*</small>
- `body` A `String` or [`Readable` stream][node-readable]
- `options` A [`ResponseInit`][response-init] options dictionary
Constructs a new `Response` object. The constructor is identical to that in the [browser](https://developer.mozilla.org/en-US/docs/Web/API/Response/Response).
Because Node.js does not implement service workers (for which this class was designed), one rarely has to construct a `Response` directly.
#### response.ok
<small>*(spec-compliant)*</small>
Convenience property representing if the request ended normally. Will evaluate to true if the response status was greater than or equal to 200 but smaller than 300.
#### response.redirected
<small>*(spec-compliant)*</small>
Convenience property representing if the request has been redirected at least once. Will evaluate to true if the internal redirect counter is greater than 0.
<a id="class-headers"></a>
### Class: Headers
This class allows manipulating and iterating over a set of HTTP headers. All methods specified in the [Fetch Standard][whatwg-fetch] are implemented.
#### new Headers([init])
<small>*(spec-compliant)*</small>
- `init` Optional argument to pre-fill the `Headers` object
Construct a new `Headers` object. `init` can be either `null`, a `Headers` object, an key-value map object or any iterable object.
```js
// Example adapted from https://fetch.spec.whatwg.org/#example-headers-class
const meta = {
'Content-Type': 'text/xml',
'Breaking-Bad': '<3'
};
const headers = new Headers(meta);
// The above is equivalent to
const meta = [
[ 'Content-Type', 'text/xml' ],
[ 'Breaking-Bad', '<3' ]
];
const headers = new Headers(meta);
// You can in fact use any iterable objects, like a Map or even another Headers
const meta = new Map();
meta.set('Content-Type', 'text/xml');
meta.set('Breaking-Bad', '<3');
const headers = new Headers(meta);
const copyOfHeaders = new Headers(headers);
```
<a id="iface-body"></a>
### Interface: Body
`Body` is an abstract interface with methods that are applicable to both `Request` and `Response` classes.
The following methods are not yet implemented in node-fetch at this moment:
- `formData()`
#### body.body
<small>*(deviation from spec)*</small>
* Node.js [`Readable` stream][node-readable]
Data are encapsulated in the `Body` object. Note that while the [Fetch Standard][whatwg-fetch] requires the property to always be a WHATWG `ReadableStream`, in node-fetch it is a Node.js [`Readable` stream][node-readable].
#### body.bodyUsed
<small>*(spec-compliant)*</small>
* `Boolean`
A boolean property for if this body has been consumed. Per the specs, a consumed body cannot be used again.
#### body.arrayBuffer()
#### body.blob()
#### body.json()
#### body.text()
<small>*(spec-compliant)*</small>
* Returns: <code>Promise</code>
Consume the body and return a promise that will resolve to one of these formats.
#### body.buffer()
<small>*(node-fetch extension)*</small>
* Returns: <code>Promise&lt;Buffer&gt;</code>
Consume the body and return a promise that will resolve to a Buffer.
#### body.textConverted()
<small>*(node-fetch extension)*</small>
* Returns: <code>Promise&lt;String&gt;</code>
Identical to `body.text()`, except instead of always converting to UTF-8, encoding sniffing will be performed and text converted to UTF-8 if possible.
(This API requires an optional dependency of the npm package [encoding](https://www.npmjs.com/package/encoding), which you need to install manually. `webpack` users may see [a warning message](https://github.com/bitinn/node-fetch/issues/412#issuecomment-379007792) due to this optional dependency.)
<a id="class-fetcherror"></a>
### Class: FetchError
<small>*(node-fetch extension)*</small>
An operational error in the fetching process. See [ERROR-HANDLING.md][] for more info.
<a id="class-aborterror"></a>
### Class: AbortError
<small>*(node-fetch extension)*</small>
An Error thrown when the request is aborted in response to an `AbortSignal`'s `abort` event. It has a `name` property of `AbortError`. See [ERROR-HANDLING.MD][] for more info.
## Acknowledgement
Thanks to [github/fetch](https://github.com/github/fetch) for providing a solid implementation reference.
`node-fetch` v1 was maintained by [@bitinn](https://github.com/bitinn); v2 was maintained by [@TimothyGu](https://github.com/timothygu), [@bitinn](https://github.com/bitinn) and [@jimmywarting](https://github.com/jimmywarting); v2 readme is written by [@jkantr](https://github.com/jkantr).
## License
MIT
[npm-image]: https://flat.badgen.net/npm/v/node-fetch
[npm-url]: https://www.npmjs.com/package/node-fetch
[travis-image]: https://flat.badgen.net/travis/bitinn/node-fetch
[travis-url]: https://travis-ci.org/bitinn/node-fetch
[codecov-image]: https://flat.badgen.net/codecov/c/github/bitinn/node-fetch/master
[codecov-url]: https://codecov.io/gh/bitinn/node-fetch
[install-size-image]: https://flat.badgen.net/packagephobia/install/node-fetch
[install-size-url]: https://packagephobia.now.sh/result?p=node-fetch
[discord-image]: https://img.shields.io/discord/619915844268326952?color=%237289DA&label=Discord&style=flat-square
[discord-url]: https://discord.gg/Zxbndcm
[opencollective-image]: https://opencollective.com/node-fetch/backers.svg
[opencollective-url]: https://opencollective.com/node-fetch
[whatwg-fetch]: https://fetch.spec.whatwg.org/
[response-init]: https://fetch.spec.whatwg.org/#responseinit
[node-readable]: https://nodejs.org/api/stream.html#stream_readable_streams
[mdn-headers]: https://developer.mozilla.org/en-US/docs/Web/API/Headers
[LIMITS.md]: https://github.com/bitinn/node-fetch/blob/master/LIMITS.md
[ERROR-HANDLING.md]: https://github.com/bitinn/node-fetch/blob/master/ERROR-HANDLING.md
[UPGRADE-GUIDE.md]: https://github.com/bitinn/node-fetch/blob/master/UPGRADE-GUIDE.md

View File

@ -0,0 +1,25 @@
"use strict";
// ref: https://github.com/tc39/proposal-global
var getGlobal = function () {
// the only reliable means to get the global object is
// `Function('return this')()`
// However, this causes CSP violations in Chrome apps.
if (typeof self !== 'undefined') { return self; }
if (typeof window !== 'undefined') { return window; }
if (typeof global !== 'undefined') { return global; }
throw new Error('unable to locate global object');
}
var globalObject = getGlobal();
module.exports = exports = globalObject.fetch;
// Needed for TypeScript and Webpack.
if (globalObject.fetch) {
exports.default = globalObject.fetch.bind(globalObject);
}
exports.Headers = globalObject.Headers;
exports.Request = globalObject.Request;
exports.Response = globalObject.Response;

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,89 @@
{
"name": "node-fetch",
"version": "2.7.0",
"description": "A light-weight module that brings window.fetch to node.js",
"main": "lib/index.js",
"browser": "./browser.js",
"module": "lib/index.mjs",
"files": [
"lib/index.js",
"lib/index.mjs",
"lib/index.es.js",
"browser.js"
],
"engines": {
"node": "4.x || >=6.0.0"
},
"scripts": {
"build": "cross-env BABEL_ENV=rollup rollup -c",
"prepare": "npm run build",
"test": "cross-env BABEL_ENV=test mocha --require babel-register --throw-deprecation test/test.js",
"report": "cross-env BABEL_ENV=coverage nyc --reporter lcov --reporter text mocha -R spec test/test.js",
"coverage": "cross-env BABEL_ENV=coverage nyc --reporter json --reporter text mocha -R spec test/test.js && codecov -f coverage/coverage-final.json"
},
"repository": {
"type": "git",
"url": "https://github.com/bitinn/node-fetch.git"
},
"keywords": [
"fetch",
"http",
"promise"
],
"author": "David Frank",
"license": "MIT",
"bugs": {
"url": "https://github.com/bitinn/node-fetch/issues"
},
"homepage": "https://github.com/bitinn/node-fetch",
"dependencies": {
"whatwg-url": "^5.0.0"
},
"peerDependencies": {
"encoding": "^0.1.0"
},
"peerDependenciesMeta": {
"encoding": {
"optional": true
}
},
"devDependencies": {
"@ungap/url-search-params": "^0.1.2",
"abort-controller": "^1.1.0",
"abortcontroller-polyfill": "^1.3.0",
"babel-core": "^6.26.3",
"babel-plugin-istanbul": "^4.1.6",
"babel-plugin-transform-async-generator-functions": "^6.24.1",
"babel-polyfill": "^6.26.0",
"babel-preset-env": "1.4.0",
"babel-register": "^6.16.3",
"chai": "^3.5.0",
"chai-as-promised": "^7.1.1",
"chai-iterator": "^1.1.1",
"chai-string": "~1.3.0",
"codecov": "3.3.0",
"cross-env": "^5.2.0",
"form-data": "^2.3.3",
"is-builtin-module": "^1.0.0",
"mocha": "^5.0.0",
"nyc": "11.9.0",
"parted": "^0.1.1",
"promise": "^8.0.3",
"resumer": "0.0.0",
"rollup": "^0.63.4",
"rollup-plugin-babel": "^3.0.7",
"string-to-arraybuffer": "^1.0.2",
"teeny-request": "3.7.0"
},
"release": {
"branches": [
"+([0-9]).x",
"main",
"next",
{
"name": "beta",
"prerelease": true
}
]
}
}

4
tools/fetch-with-node/node_modules/tr46/.npmignore generated vendored Normal file
View File

@ -0,0 +1,4 @@
scripts/
test/
!lib/mapping_table.json

193
tools/fetch-with-node/node_modules/tr46/index.js generated vendored Normal file
View File

@ -0,0 +1,193 @@
"use strict";
var punycode = require("punycode");
var mappingTable = require("./lib/mappingTable.json");
var PROCESSING_OPTIONS = {
TRANSITIONAL: 0,
NONTRANSITIONAL: 1
};
function normalize(str) { // fix bug in v8
return str.split('\u0000').map(function (s) { return s.normalize('NFC'); }).join('\u0000');
}
function findStatus(val) {
var start = 0;
var end = mappingTable.length - 1;
while (start <= end) {
var mid = Math.floor((start + end) / 2);
var target = mappingTable[mid];
if (target[0][0] <= val && target[0][1] >= val) {
return target;
} else if (target[0][0] > val) {
end = mid - 1;
} else {
start = mid + 1;
}
}
return null;
}
var regexAstralSymbols = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g;
function countSymbols(string) {
return string
// replace every surrogate pair with a BMP symbol
.replace(regexAstralSymbols, '_')
// then get the length
.length;
}
function mapChars(domain_name, useSTD3, processing_option) {
var hasError = false;
var processed = "";
var len = countSymbols(domain_name);
for (var i = 0; i < len; ++i) {
var codePoint = domain_name.codePointAt(i);
var status = findStatus(codePoint);
switch (status[1]) {
case "disallowed":
hasError = true;
processed += String.fromCodePoint(codePoint);
break;
case "ignored":
break;
case "mapped":
processed += String.fromCodePoint.apply(String, status[2]);
break;
case "deviation":
if (processing_option === PROCESSING_OPTIONS.TRANSITIONAL) {
processed += String.fromCodePoint.apply(String, status[2]);
} else {
processed += String.fromCodePoint(codePoint);
}
break;
case "valid":
processed += String.fromCodePoint(codePoint);
break;
case "disallowed_STD3_mapped":
if (useSTD3) {
hasError = true;
processed += String.fromCodePoint(codePoint);
} else {
processed += String.fromCodePoint.apply(String, status[2]);
}
break;
case "disallowed_STD3_valid":
if (useSTD3) {
hasError = true;
}
processed += String.fromCodePoint(codePoint);
break;
}
}
return {
string: processed,
error: hasError
};
}
var combiningMarksRegex = /[\u0300-\u036F\u0483-\u0489\u0591-\u05BD\u05BF\u05C1\u05C2\u05C4\u05C5\u05C7\u0610-\u061A\u064B-\u065F\u0670\u06D6-\u06DC\u06DF-\u06E4\u06E7\u06E8\u06EA-\u06ED\u0711\u0730-\u074A\u07A6-\u07B0\u07EB-\u07F3\u0816-\u0819\u081B-\u0823\u0825-\u0827\u0829-\u082D\u0859-\u085B\u08E4-\u0903\u093A-\u093C\u093E-\u094F\u0951-\u0957\u0962\u0963\u0981-\u0983\u09BC\u09BE-\u09C4\u09C7\u09C8\u09CB-\u09CD\u09D7\u09E2\u09E3\u0A01-\u0A03\u0A3C\u0A3E-\u0A42\u0A47\u0A48\u0A4B-\u0A4D\u0A51\u0A70\u0A71\u0A75\u0A81-\u0A83\u0ABC\u0ABE-\u0AC5\u0AC7-\u0AC9\u0ACB-\u0ACD\u0AE2\u0AE3\u0B01-\u0B03\u0B3C\u0B3E-\u0B44\u0B47\u0B48\u0B4B-\u0B4D\u0B56\u0B57\u0B62\u0B63\u0B82\u0BBE-\u0BC2\u0BC6-\u0BC8\u0BCA-\u0BCD\u0BD7\u0C00-\u0C03\u0C3E-\u0C44\u0C46-\u0C48\u0C4A-\u0C4D\u0C55\u0C56\u0C62\u0C63\u0C81-\u0C83\u0CBC\u0CBE-\u0CC4\u0CC6-\u0CC8\u0CCA-\u0CCD\u0CD5\u0CD6\u0CE2\u0CE3\u0D01-\u0D03\u0D3E-\u0D44\u0D46-\u0D48\u0D4A-\u0D4D\u0D57\u0D62\u0D63\u0D82\u0D83\u0DCA\u0DCF-\u0DD4\u0DD6\u0DD8-\u0DDF\u0DF2\u0DF3\u0E31\u0E34-\u0E3A\u0E47-\u0E4E\u0EB1\u0EB4-\u0EB9\u0EBB\u0EBC\u0EC8-\u0ECD\u0F18\u0F19\u0F35\u0F37\u0F39\u0F3E\u0F3F\u0F71-\u0F84\u0F86\u0F87\u0F8D-\u0F97\u0F99-\u0FBC\u0FC6\u102B-\u103E\u1056-\u1059\u105E-\u1060\u1062-\u1064\u1067-\u106D\u1071-\u1074\u1082-\u108D\u108F\u109A-\u109D\u135D-\u135F\u1712-\u1714\u1732-\u1734\u1752\u1753\u1772\u1773\u17B4-\u17D3\u17DD\u180B-\u180D\u18A9\u1920-\u192B\u1930-\u193B\u19B0-\u19C0\u19C8\u19C9\u1A17-\u1A1B\u1A55-\u1A5E\u1A60-\u1A7C\u1A7F\u1AB0-\u1ABE\u1B00-\u1B04\u1B34-\u1B44\u1B6B-\u1B73\u1B80-\u1B82\u1BA1-\u1BAD\u1BE6-\u1BF3\u1C24-\u1C37\u1CD0-\u1CD2\u1CD4-\u1CE8\u1CED\u1CF2-\u1CF4\u1CF8\u1CF9\u1DC0-\u1DF5\u1DFC-\u1DFF\u20D0-\u20F0\u2CEF-\u2CF1\u2D7F\u2DE0-\u2DFF\u302A-\u302F\u3099\u309A\uA66F-\uA672\uA674-\uA67D\uA69F\uA6F0\uA6F1\uA802\uA806\uA80B\uA823-\uA827\uA880\uA881\uA8B4-\uA8C4\uA8E0-\uA8F1\uA926-\uA92D\uA947-\uA953\uA980-\uA983\uA9B3-\uA9C0\uA9E5\uAA29-\uAA36\uAA43\uAA4C\uAA4D\uAA7B-\uAA7D\uAAB0\uAAB2-\uAAB4\uAAB7\uAAB8\uAABE\uAABF\uAAC1\uAAEB-\uAAEF\uAAF5\uAAF6\uABE3-\uABEA\uABEC\uABED\uFB1E\uFE00-\uFE0F\uFE20-\uFE2D]|\uD800[\uDDFD\uDEE0\uDF76-\uDF7A]|\uD802[\uDE01-\uDE03\uDE05\uDE06\uDE0C-\uDE0F\uDE38-\uDE3A\uDE3F\uDEE5\uDEE6]|\uD804[\uDC00-\uDC02\uDC38-\uDC46\uDC7F-\uDC82\uDCB0-\uDCBA\uDD00-\uDD02\uDD27-\uDD34\uDD73\uDD80-\uDD82\uDDB3-\uDDC0\uDE2C-\uDE37\uDEDF-\uDEEA\uDF01-\uDF03\uDF3C\uDF3E-\uDF44\uDF47\uDF48\uDF4B-\uDF4D\uDF57\uDF62\uDF63\uDF66-\uDF6C\uDF70-\uDF74]|\uD805[\uDCB0-\uDCC3\uDDAF-\uDDB5\uDDB8-\uDDC0\uDE30-\uDE40\uDEAB-\uDEB7]|\uD81A[\uDEF0-\uDEF4\uDF30-\uDF36]|\uD81B[\uDF51-\uDF7E\uDF8F-\uDF92]|\uD82F[\uDC9D\uDC9E]|\uD834[\uDD65-\uDD69\uDD6D-\uDD72\uDD7B-\uDD82\uDD85-\uDD8B\uDDAA-\uDDAD\uDE42-\uDE44]|\uD83A[\uDCD0-\uDCD6]|\uDB40[\uDD00-\uDDEF]/;
function validateLabel(label, processing_option) {
if (label.substr(0, 4) === "xn--") {
label = punycode.toUnicode(label);
processing_option = PROCESSING_OPTIONS.NONTRANSITIONAL;
}
var error = false;
if (normalize(label) !== label ||
(label[3] === "-" && label[4] === "-") ||
label[0] === "-" || label[label.length - 1] === "-" ||
label.indexOf(".") !== -1 ||
label.search(combiningMarksRegex) === 0) {
error = true;
}
var len = countSymbols(label);
for (var i = 0; i < len; ++i) {
var status = findStatus(label.codePointAt(i));
if ((processing === PROCESSING_OPTIONS.TRANSITIONAL && status[1] !== "valid") ||
(processing === PROCESSING_OPTIONS.NONTRANSITIONAL &&
status[1] !== "valid" && status[1] !== "deviation")) {
error = true;
break;
}
}
return {
label: label,
error: error
};
}
function processing(domain_name, useSTD3, processing_option) {
var result = mapChars(domain_name, useSTD3, processing_option);
result.string = normalize(result.string);
var labels = result.string.split(".");
for (var i = 0; i < labels.length; ++i) {
try {
var validation = validateLabel(labels[i]);
labels[i] = validation.label;
result.error = result.error || validation.error;
} catch(e) {
result.error = true;
}
}
return {
string: labels.join("."),
error: result.error
};
}
module.exports.toASCII = function(domain_name, useSTD3, processing_option, verifyDnsLength) {
var result = processing(domain_name, useSTD3, processing_option);
var labels = result.string.split(".");
labels = labels.map(function(l) {
try {
return punycode.toASCII(l);
} catch(e) {
result.error = true;
return l;
}
});
if (verifyDnsLength) {
var total = labels.slice(0, labels.length - 1).join(".").length;
if (total.length > 253 || total.length === 0) {
result.error = true;
}
for (var i=0; i < labels.length; ++i) {
if (labels.length > 63 || labels.length === 0) {
result.error = true;
break;
}
}
}
if (result.error) return null;
return labels.join(".");
};
module.exports.toUnicode = function(domain_name, useSTD3) {
var result = processing(domain_name, useSTD3, PROCESSING_OPTIONS.NONTRANSITIONAL);
return {
domain: result.string,
error: result.error
};
};
module.exports.PROCESSING_OPTIONS = PROCESSING_OPTIONS;

0
tools/fetch-with-node/node_modules/tr46/lib/.gitkeep generated vendored Normal file
View File

File diff suppressed because one or more lines are too long

31
tools/fetch-with-node/node_modules/tr46/package.json generated vendored Normal file
View File

@ -0,0 +1,31 @@
{
"name": "tr46",
"version": "0.0.3",
"description": "An implementation of the Unicode TR46 spec",
"main": "index.js",
"scripts": {
"test": "mocha",
"pretest": "node scripts/getLatestUnicodeTests.js",
"prepublish": "node scripts/generateMappingTable.js"
},
"repository": {
"type": "git",
"url": "git+https://github.com/Sebmaster/tr46.js.git"
},
"keywords": [
"unicode",
"tr46",
"url",
"whatwg"
],
"author": "Sebastian Mayr <npm@smayr.name>",
"license": "MIT",
"bugs": {
"url": "https://github.com/Sebmaster/tr46.js/issues"
},
"homepage": "https://github.com/Sebmaster/tr46.js#readme",
"devDependencies": {
"mocha": "^2.2.5",
"request": "^2.57.0"
}
}

View File

@ -0,0 +1,12 @@
# The BSD 2-Clause License
Copyright (c) 2014, Domenic Denicola
All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -0,0 +1,53 @@
# WebIDL Type Conversions on JavaScript Values
This package implements, in JavaScript, the algorithms to convert a given JavaScript value according to a given [WebIDL](http://heycam.github.io/webidl/) [type](http://heycam.github.io/webidl/#idl-types).
The goal is that you should be able to write code like
```js
const conversions = require("webidl-conversions");
function doStuff(x, y) {
x = conversions["boolean"](x);
y = conversions["unsigned long"](y);
// actual algorithm code here
}
```
and your function `doStuff` will behave the same as a WebIDL operation declared as
```webidl
void doStuff(boolean x, unsigned long y);
```
## API
This package's main module's default export is an object with a variety of methods, each corresponding to a different WebIDL type. Each method, when invoked on a JavaScript value, will give back the new JavaScript value that results after passing through the WebIDL conversion rules. (See below for more details on what that means.) Alternately, the method could throw an error, if the WebIDL algorithm is specified to do so: for example `conversions["float"](NaN)` [will throw a `TypeError`](http://heycam.github.io/webidl/#es-float).
## Status
All of the numeric types are implemented (float being implemented as double) and some others are as well - check the source for all of them. This list will grow over time in service of the [HTML as Custom Elements](https://github.com/dglazkov/html-as-custom-elements) project, but in the meantime, pull requests welcome!
I'm not sure yet what the strategy will be for modifiers, e.g. [`[Clamp]`](http://heycam.github.io/webidl/#Clamp). Maybe something like `conversions["unsigned long"](x, { clamp: true })`? We'll see.
We might also want to extend the API to give better error messages, e.g. "Argument 1 of HTMLMediaElement.fastSeek is not a finite floating-point value" instead of "Argument is not a finite floating-point value." This would require passing in more information to the conversion functions than we currently do.
## Background
What's actually going on here, conceptually, is pretty weird. Let's try to explain.
WebIDL, as part of its madness-inducing design, has its own type system. When people write algorithms in web platform specs, they usually operate on WebIDL values, i.e. instances of WebIDL types. For example, if they were specifying the algorithm for our `doStuff` operation above, they would treat `x` as a WebIDL value of [WebIDL type `boolean`](http://heycam.github.io/webidl/#idl-boolean). Crucially, they would _not_ treat `x` as a JavaScript variable whose value is either the JavaScript `true` or `false`. They're instead working in a different type system altogether, with its own rules.
Separately from its type system, WebIDL defines a ["binding"](http://heycam.github.io/webidl/#ecmascript-binding) of the type system into JavaScript. This contains rules like: when you pass a JavaScript value to the JavaScript method that manifests a given WebIDL operation, how does that get converted into a WebIDL value? For example, a JavaScript `true` passed in the position of a WebIDL `boolean` argument becomes a WebIDL `true`. But, a JavaScript `true` passed in the position of a [WebIDL `unsigned long`](http://heycam.github.io/webidl/#idl-unsigned-long) becomes a WebIDL `1`. And so on.
Finally, we have the actual implementation code. This is usually C++, although these days [some smart people are using Rust](https://github.com/servo/servo). The implementation, of course, has its own type system. So when they implement the WebIDL algorithms, they don't actually use WebIDL values, since those aren't "real" outside of specs. Instead, implementations apply the WebIDL binding rules in such a way as to convert incoming JavaScript values into C++ values. For example, if code in the browser called `doStuff(true, true)`, then the implementation code would eventually receive a C++ `bool` containing `true` and a C++ `uint32_t` containing `1`.
The upside of all this is that implementations can abstract all the conversion logic away, letting WebIDL handle it, and focus on implementing the relevant methods in C++ with values of the correct type already provided. That is payoff of WebIDL, in a nutshell.
And getting to that payoff is the goal of _this_ project—but for JavaScript implementations, instead of C++ ones. That is, this library is designed to make it easier for JavaScript developers to write functions that behave like a given WebIDL operation. So conceptually, the conversion pipeline, which in its general form is JavaScript values ↦ WebIDL values ↦ implementation-language values, in this case becomes JavaScript values ↦ WebIDL values ↦ JavaScript values. And that intermediate step is where all the logic is performed: a JavaScript `true` becomes a WebIDL `1` in an unsigned long context, which then becomes a JavaScript `1`.
## Don't Use This
Seriously, why would you ever use this? You really shouldn't. WebIDL is … not great, and you shouldn't be emulating its semantics. If you're looking for a generic argument-processing library, you should find one with better rules than those from WebIDL. In general, your JavaScript should not be trying to become more like WebIDL; if anything, we should fix WebIDL to make it more like JavaScript.
The _only_ people who should use this are those trying to create faithful implementations (or polyfills) of web platform interfaces defined in WebIDL.

View File

@ -0,0 +1,189 @@
"use strict";
var conversions = {};
module.exports = conversions;
function sign(x) {
return x < 0 ? -1 : 1;
}
function evenRound(x) {
// Round x to the nearest integer, choosing the even integer if it lies halfway between two.
if ((x % 1) === 0.5 && (x & 1) === 0) { // [even number].5; round down (i.e. floor)
return Math.floor(x);
} else {
return Math.round(x);
}
}
function createNumberConversion(bitLength, typeOpts) {
if (!typeOpts.unsigned) {
--bitLength;
}
const lowerBound = typeOpts.unsigned ? 0 : -Math.pow(2, bitLength);
const upperBound = Math.pow(2, bitLength) - 1;
const moduloVal = typeOpts.moduloBitLength ? Math.pow(2, typeOpts.moduloBitLength) : Math.pow(2, bitLength);
const moduloBound = typeOpts.moduloBitLength ? Math.pow(2, typeOpts.moduloBitLength - 1) : Math.pow(2, bitLength - 1);
return function(V, opts) {
if (!opts) opts = {};
let x = +V;
if (opts.enforceRange) {
if (!Number.isFinite(x)) {
throw new TypeError("Argument is not a finite number");
}
x = sign(x) * Math.floor(Math.abs(x));
if (x < lowerBound || x > upperBound) {
throw new TypeError("Argument is not in byte range");
}
return x;
}
if (!isNaN(x) && opts.clamp) {
x = evenRound(x);
if (x < lowerBound) x = lowerBound;
if (x > upperBound) x = upperBound;
return x;
}
if (!Number.isFinite(x) || x === 0) {
return 0;
}
x = sign(x) * Math.floor(Math.abs(x));
x = x % moduloVal;
if (!typeOpts.unsigned && x >= moduloBound) {
return x - moduloVal;
} else if (typeOpts.unsigned) {
if (x < 0) {
x += moduloVal;
} else if (x === -0) { // don't return negative zero
return 0;
}
}
return x;
}
}
conversions["void"] = function () {
return undefined;
};
conversions["boolean"] = function (val) {
return !!val;
};
conversions["byte"] = createNumberConversion(8, { unsigned: false });
conversions["octet"] = createNumberConversion(8, { unsigned: true });
conversions["short"] = createNumberConversion(16, { unsigned: false });
conversions["unsigned short"] = createNumberConversion(16, { unsigned: true });
conversions["long"] = createNumberConversion(32, { unsigned: false });
conversions["unsigned long"] = createNumberConversion(32, { unsigned: true });
conversions["long long"] = createNumberConversion(32, { unsigned: false, moduloBitLength: 64 });
conversions["unsigned long long"] = createNumberConversion(32, { unsigned: true, moduloBitLength: 64 });
conversions["double"] = function (V) {
const x = +V;
if (!Number.isFinite(x)) {
throw new TypeError("Argument is not a finite floating-point value");
}
return x;
};
conversions["unrestricted double"] = function (V) {
const x = +V;
if (isNaN(x)) {
throw new TypeError("Argument is NaN");
}
return x;
};
// not quite valid, but good enough for JS
conversions["float"] = conversions["double"];
conversions["unrestricted float"] = conversions["unrestricted double"];
conversions["DOMString"] = function (V, opts) {
if (!opts) opts = {};
if (opts.treatNullAsEmptyString && V === null) {
return "";
}
return String(V);
};
conversions["ByteString"] = function (V, opts) {
const x = String(V);
let c = undefined;
for (let i = 0; (c = x.codePointAt(i)) !== undefined; ++i) {
if (c > 255) {
throw new TypeError("Argument is not a valid bytestring");
}
}
return x;
};
conversions["USVString"] = function (V) {
const S = String(V);
const n = S.length;
const U = [];
for (let i = 0; i < n; ++i) {
const c = S.charCodeAt(i);
if (c < 0xD800 || c > 0xDFFF) {
U.push(String.fromCodePoint(c));
} else if (0xDC00 <= c && c <= 0xDFFF) {
U.push(String.fromCodePoint(0xFFFD));
} else {
if (i === n - 1) {
U.push(String.fromCodePoint(0xFFFD));
} else {
const d = S.charCodeAt(i + 1);
if (0xDC00 <= d && d <= 0xDFFF) {
const a = c & 0x3FF;
const b = d & 0x3FF;
U.push(String.fromCodePoint((2 << 15) + (2 << 9) * a + b));
++i;
} else {
U.push(String.fromCodePoint(0xFFFD));
}
}
}
}
return U.join('');
};
conversions["Date"] = function (V, opts) {
if (!(V instanceof Date)) {
throw new TypeError("Argument is not a Date object");
}
if (isNaN(V)) {
return undefined;
}
return V;
};
conversions["RegExp"] = function (V, opts) {
if (!(V instanceof RegExp)) {
V = new RegExp(V);
}
return V;
};

View File

@ -0,0 +1,23 @@
{
"name": "webidl-conversions",
"version": "3.0.1",
"description": "Implements the WebIDL algorithms for converting to and from JavaScript values",
"main": "lib/index.js",
"scripts": {
"test": "mocha test/*.js"
},
"repository": "jsdom/webidl-conversions",
"keywords": [
"webidl",
"web",
"types"
],
"files": [
"lib/"
],
"author": "Domenic Denicola <d@domenic.me> (https://domenic.me/)",
"license": "BSD-2-Clause",
"devDependencies": {
"mocha": "^1.21.4"
}
}

View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 20152016 Sebastian Mayr
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

View File

@ -0,0 +1,67 @@
# whatwg-url
whatwg-url is a full implementation of the WHATWG [URL Standard](https://url.spec.whatwg.org/). It can be used standalone, but it also exposes a lot of the internal algorithms that are useful for integrating a URL parser into a project like [jsdom](https://github.com/tmpvar/jsdom).
## Current Status
whatwg-url is currently up to date with the URL spec up to commit [a62223](https://github.com/whatwg/url/commit/a622235308342c9adc7fc2fd1659ff059f7d5e2a).
## API
### The `URL` Constructor
The main API is the [`URL`](https://url.spec.whatwg.org/#url) export, which follows the spec's behavior in all ways (including e.g. `USVString` conversion). Most consumers of this library will want to use this.
### Low-level URL Standard API
The following methods are exported for use by places like jsdom that need to implement things like [`HTMLHyperlinkElementUtils`](https://html.spec.whatwg.org/#htmlhyperlinkelementutils). They operate on or return an "internal URL" or ["URL record"](https://url.spec.whatwg.org/#concept-url) type.
- [URL parser](https://url.spec.whatwg.org/#concept-url-parser): `parseURL(input, { baseURL, encodingOverride })`
- [Basic URL parser](https://url.spec.whatwg.org/#concept-basic-url-parser): `basicURLParse(input, { baseURL, encodingOverride, url, stateOverride })`
- [URL serializer](https://url.spec.whatwg.org/#concept-url-serializer): `serializeURL(urlRecord, excludeFragment)`
- [Host serializer](https://url.spec.whatwg.org/#concept-host-serializer): `serializeHost(hostFromURLRecord)`
- [Serialize an integer](https://url.spec.whatwg.org/#serialize-an-integer): `serializeInteger(number)`
- [Origin](https://url.spec.whatwg.org/#concept-url-origin) [serializer](https://html.spec.whatwg.org/multipage/browsers.html#serialization-of-an-origin): `serializeURLOrigin(urlRecord)`
- [Set the username](https://url.spec.whatwg.org/#set-the-username): `setTheUsername(urlRecord, usernameString)`
- [Set the password](https://url.spec.whatwg.org/#set-the-password): `setThePassword(urlRecord, passwordString)`
- [Cannot have a username/password/port](https://url.spec.whatwg.org/#cannot-have-a-username-password-port): `cannotHaveAUsernamePasswordPort(urlRecord)`
The `stateOverride` parameter is one of the following strings:
- [`"scheme start"`](https://url.spec.whatwg.org/#scheme-start-state)
- [`"scheme"`](https://url.spec.whatwg.org/#scheme-state)
- [`"no scheme"`](https://url.spec.whatwg.org/#no-scheme-state)
- [`"special relative or authority"`](https://url.spec.whatwg.org/#special-relative-or-authority-state)
- [`"path or authority"`](https://url.spec.whatwg.org/#path-or-authority-state)
- [`"relative"`](https://url.spec.whatwg.org/#relative-state)
- [`"relative slash"`](https://url.spec.whatwg.org/#relative-slash-state)
- [`"special authority slashes"`](https://url.spec.whatwg.org/#special-authority-slashes-state)
- [`"special authority ignore slashes"`](https://url.spec.whatwg.org/#special-authority-ignore-slashes-state)
- [`"authority"`](https://url.spec.whatwg.org/#authority-state)
- [`"host"`](https://url.spec.whatwg.org/#host-state)
- [`"hostname"`](https://url.spec.whatwg.org/#hostname-state)
- [`"port"`](https://url.spec.whatwg.org/#port-state)
- [`"file"`](https://url.spec.whatwg.org/#file-state)
- [`"file slash"`](https://url.spec.whatwg.org/#file-slash-state)
- [`"file host"`](https://url.spec.whatwg.org/#file-host-state)
- [`"path start"`](https://url.spec.whatwg.org/#path-start-state)
- [`"path"`](https://url.spec.whatwg.org/#path-state)
- [`"cannot-be-a-base-URL path"`](https://url.spec.whatwg.org/#cannot-be-a-base-url-path-state)
- [`"query"`](https://url.spec.whatwg.org/#query-state)
- [`"fragment"`](https://url.spec.whatwg.org/#fragment-state)
The URL record type has the following API:
- [`scheme`](https://url.spec.whatwg.org/#concept-url-scheme)
- [`username`](https://url.spec.whatwg.org/#concept-url-username)
- [`password`](https://url.spec.whatwg.org/#concept-url-password)
- [`host`](https://url.spec.whatwg.org/#concept-url-host)
- [`port`](https://url.spec.whatwg.org/#concept-url-port)
- [`path`](https://url.spec.whatwg.org/#concept-url-path) (as an array)
- [`query`](https://url.spec.whatwg.org/#concept-url-query)
- [`fragment`](https://url.spec.whatwg.org/#concept-url-fragment)
- [`cannotBeABaseURL`](https://url.spec.whatwg.org/#url-cannot-be-a-base-url-flag) (as a boolean)
These properties should be treated with care, as in general changing them will cause the URL record to be in an inconsistent state until the appropriate invocation of `basicURLParse` is used to fix it up. You can see examples of this in the URL Standard, where there are many step sequences like "4. Set context objects urls fragment to the empty string. 5. Basic URL parse _input_ with context objects url as _url_ and fragment state as _state override_." In between those two steps, a URL record is in an unusable state.
The return value of "failure" in the spec is represented by the string `"failure"`. That is, functions like `parseURL` and `basicURLParse` can return _either_ a URL record _or_ the string `"failure"`.

View File

@ -0,0 +1,200 @@
"use strict";
const usm = require("./url-state-machine");
exports.implementation = class URLImpl {
constructor(constructorArgs) {
const url = constructorArgs[0];
const base = constructorArgs[1];
let parsedBase = null;
if (base !== undefined) {
parsedBase = usm.basicURLParse(base);
if (parsedBase === "failure") {
throw new TypeError("Invalid base URL");
}
}
const parsedURL = usm.basicURLParse(url, { baseURL: parsedBase });
if (parsedURL === "failure") {
throw new TypeError("Invalid URL");
}
this._url = parsedURL;
// TODO: query stuff
}
get href() {
return usm.serializeURL(this._url);
}
set href(v) {
const parsedURL = usm.basicURLParse(v);
if (parsedURL === "failure") {
throw new TypeError("Invalid URL");
}
this._url = parsedURL;
}
get origin() {
return usm.serializeURLOrigin(this._url);
}
get protocol() {
return this._url.scheme + ":";
}
set protocol(v) {
usm.basicURLParse(v + ":", { url: this._url, stateOverride: "scheme start" });
}
get username() {
return this._url.username;
}
set username(v) {
if (usm.cannotHaveAUsernamePasswordPort(this._url)) {
return;
}
usm.setTheUsername(this._url, v);
}
get password() {
return this._url.password;
}
set password(v) {
if (usm.cannotHaveAUsernamePasswordPort(this._url)) {
return;
}
usm.setThePassword(this._url, v);
}
get host() {
const url = this._url;
if (url.host === null) {
return "";
}
if (url.port === null) {
return usm.serializeHost(url.host);
}
return usm.serializeHost(url.host) + ":" + usm.serializeInteger(url.port);
}
set host(v) {
if (this._url.cannotBeABaseURL) {
return;
}
usm.basicURLParse(v, { url: this._url, stateOverride: "host" });
}
get hostname() {
if (this._url.host === null) {
return "";
}
return usm.serializeHost(this._url.host);
}
set hostname(v) {
if (this._url.cannotBeABaseURL) {
return;
}
usm.basicURLParse(v, { url: this._url, stateOverride: "hostname" });
}
get port() {
if (this._url.port === null) {
return "";
}
return usm.serializeInteger(this._url.port);
}
set port(v) {
if (usm.cannotHaveAUsernamePasswordPort(this._url)) {
return;
}
if (v === "") {
this._url.port = null;
} else {
usm.basicURLParse(v, { url: this._url, stateOverride: "port" });
}
}
get pathname() {
if (this._url.cannotBeABaseURL) {
return this._url.path[0];
}
if (this._url.path.length === 0) {
return "";
}
return "/" + this._url.path.join("/");
}
set pathname(v) {
if (this._url.cannotBeABaseURL) {
return;
}
this._url.path = [];
usm.basicURLParse(v, { url: this._url, stateOverride: "path start" });
}
get search() {
if (this._url.query === null || this._url.query === "") {
return "";
}
return "?" + this._url.query;
}
set search(v) {
// TODO: query stuff
const url = this._url;
if (v === "") {
url.query = null;
return;
}
const input = v[0] === "?" ? v.substring(1) : v;
url.query = "";
usm.basicURLParse(input, { url, stateOverride: "query" });
}
get hash() {
if (this._url.fragment === null || this._url.fragment === "") {
return "";
}
return "#" + this._url.fragment;
}
set hash(v) {
if (v === "") {
this._url.fragment = null;
return;
}
const input = v[0] === "#" ? v.substring(1) : v;
this._url.fragment = "";
usm.basicURLParse(input, { url: this._url, stateOverride: "fragment" });
}
toJSON() {
return this.href;
}
};

View File

@ -0,0 +1,196 @@
"use strict";
const conversions = require("webidl-conversions");
const utils = require("./utils.js");
const Impl = require(".//URL-impl.js");
const impl = utils.implSymbol;
function URL(url) {
if (!this || this[impl] || !(this instanceof URL)) {
throw new TypeError("Failed to construct 'URL': Please use the 'new' operator, this DOM object constructor cannot be called as a function.");
}
if (arguments.length < 1) {
throw new TypeError("Failed to construct 'URL': 1 argument required, but only " + arguments.length + " present.");
}
const args = [];
for (let i = 0; i < arguments.length && i < 2; ++i) {
args[i] = arguments[i];
}
args[0] = conversions["USVString"](args[0]);
if (args[1] !== undefined) {
args[1] = conversions["USVString"](args[1]);
}
module.exports.setup(this, args);
}
URL.prototype.toJSON = function toJSON() {
if (!this || !module.exports.is(this)) {
throw new TypeError("Illegal invocation");
}
const args = [];
for (let i = 0; i < arguments.length && i < 0; ++i) {
args[i] = arguments[i];
}
return this[impl].toJSON.apply(this[impl], args);
};
Object.defineProperty(URL.prototype, "href", {
get() {
return this[impl].href;
},
set(V) {
V = conversions["USVString"](V);
this[impl].href = V;
},
enumerable: true,
configurable: true
});
URL.prototype.toString = function () {
if (!this || !module.exports.is(this)) {
throw new TypeError("Illegal invocation");
}
return this.href;
};
Object.defineProperty(URL.prototype, "origin", {
get() {
return this[impl].origin;
},
enumerable: true,
configurable: true
});
Object.defineProperty(URL.prototype, "protocol", {
get() {
return this[impl].protocol;
},
set(V) {
V = conversions["USVString"](V);
this[impl].protocol = V;
},
enumerable: true,
configurable: true
});
Object.defineProperty(URL.prototype, "username", {
get() {
return this[impl].username;
},
set(V) {
V = conversions["USVString"](V);
this[impl].username = V;
},
enumerable: true,
configurable: true
});
Object.defineProperty(URL.prototype, "password", {
get() {
return this[impl].password;
},
set(V) {
V = conversions["USVString"](V);
this[impl].password = V;
},
enumerable: true,
configurable: true
});
Object.defineProperty(URL.prototype, "host", {
get() {
return this[impl].host;
},
set(V) {
V = conversions["USVString"](V);
this[impl].host = V;
},
enumerable: true,
configurable: true
});
Object.defineProperty(URL.prototype, "hostname", {
get() {
return this[impl].hostname;
},
set(V) {
V = conversions["USVString"](V);
this[impl].hostname = V;
},
enumerable: true,
configurable: true
});
Object.defineProperty(URL.prototype, "port", {
get() {
return this[impl].port;
},
set(V) {
V = conversions["USVString"](V);
this[impl].port = V;
},
enumerable: true,
configurable: true
});
Object.defineProperty(URL.prototype, "pathname", {
get() {
return this[impl].pathname;
},
set(V) {
V = conversions["USVString"](V);
this[impl].pathname = V;
},
enumerable: true,
configurable: true
});
Object.defineProperty(URL.prototype, "search", {
get() {
return this[impl].search;
},
set(V) {
V = conversions["USVString"](V);
this[impl].search = V;
},
enumerable: true,
configurable: true
});
Object.defineProperty(URL.prototype, "hash", {
get() {
return this[impl].hash;
},
set(V) {
V = conversions["USVString"](V);
this[impl].hash = V;
},
enumerable: true,
configurable: true
});
module.exports = {
is(obj) {
return !!obj && obj[impl] instanceof Impl.implementation;
},
create(constructorArgs, privateData) {
let obj = Object.create(URL.prototype);
this.setup(obj, constructorArgs, privateData);
return obj;
},
setup(obj, constructorArgs, privateData) {
if (!privateData) privateData = {};
privateData.wrapper = obj;
obj[impl] = new Impl.implementation(constructorArgs, privateData);
obj[impl][utils.wrapperSymbol] = obj;
},
interface: URL,
expose: {
Window: { URL: URL },
Worker: { URL: URL }
}
};

View File

@ -0,0 +1,11 @@
"use strict";
exports.URL = require("./URL").interface;
exports.serializeURL = require("./url-state-machine").serializeURL;
exports.serializeURLOrigin = require("./url-state-machine").serializeURLOrigin;
exports.basicURLParse = require("./url-state-machine").basicURLParse;
exports.setTheUsername = require("./url-state-machine").setTheUsername;
exports.setThePassword = require("./url-state-machine").setThePassword;
exports.serializeHost = require("./url-state-machine").serializeHost;
exports.serializeInteger = require("./url-state-machine").serializeInteger;
exports.parseURL = require("./url-state-machine").parseURL;

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,20 @@
"use strict";
module.exports.mixin = function mixin(target, source) {
const keys = Object.getOwnPropertyNames(source);
for (let i = 0; i < keys.length; ++i) {
Object.defineProperty(target, keys[i], Object.getOwnPropertyDescriptor(source, keys[i]));
}
};
module.exports.wrapperSymbol = Symbol("wrapper");
module.exports.implSymbol = Symbol("impl");
module.exports.wrapperForImpl = function (impl) {
return impl[module.exports.wrapperSymbol];
};
module.exports.implForWrapper = function (wrapper) {
return wrapper[module.exports.implSymbol];
};

View File

@ -0,0 +1,32 @@
{
"name": "whatwg-url",
"version": "5.0.0",
"description": "An implementation of the WHATWG URL Standard's URL API and parsing machinery",
"main": "lib/public-api.js",
"files": [
"lib/"
],
"author": "Sebastian Mayr <github@smayr.name>",
"license": "MIT",
"repository": "jsdom/whatwg-url",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
},
"devDependencies": {
"eslint": "^2.6.0",
"istanbul": "~0.4.3",
"mocha": "^2.2.4",
"recast": "~0.10.29",
"request": "^2.55.0",
"webidl2js": "^3.0.2"
},
"scripts": {
"build": "node scripts/transform.js && node scripts/convert-idl.js",
"coverage": "istanbul cover node_modules/mocha/bin/_mocha",
"lint": "eslint .",
"prepublish": "npm run build",
"pretest": "node scripts/get-latest-platform-tests.js && npm run build",
"test": "mocha"
}
}

58
tools/fetch-with-node/package-lock.json generated Normal file
View File

@ -0,0 +1,58 @@
{
"name": "fetch-with-node",
"version": "1.0.0",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "fetch-with-node",
"version": "1.0.0",
"license": "ISC",
"dependencies": {
"node-fetch": "^2.6.6"
}
},
"node_modules/node-fetch": {
"version": "2.7.0",
"resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.7.0.tgz",
"integrity": "sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==",
"license": "MIT",
"dependencies": {
"whatwg-url": "^5.0.0"
},
"engines": {
"node": "4.x || >=6.0.0"
},
"peerDependencies": {
"encoding": "^0.1.0"
},
"peerDependenciesMeta": {
"encoding": {
"optional": true
}
}
},
"node_modules/tr46": {
"version": "0.0.3",
"resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
"integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==",
"license": "MIT"
},
"node_modules/webidl-conversions": {
"version": "3.0.1",
"resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
"integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==",
"license": "BSD-2-Clause"
},
"node_modules/whatwg-url": {
"version": "5.0.0",
"resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
"integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
"license": "MIT",
"dependencies": {
"tr46": "~0.0.3",
"webidl-conversions": "^3.0.0"
}
}
}
}

View File

@ -0,0 +1,14 @@
{
"name": "fetch-with-node",
"version": "1.0.0",
"description": "simple tool to run fetch commands with nodejs since its bugging out on python",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"author": "Eir",
"license": "ISC",
"dependencies": {
"node-fetch": "^2.6.6"
}
}

123
tools/invidious_ported.py Normal file
View File

@ -0,0 +1,123 @@
import os, subprocess, asyncio, json
async def nodeFetchInternal(url, data):
sp = await asyncio.create_subprocess_exec(
"node", "--disable-warning", "DEP0040", os.path.join(os.path.abspath(os.path.dirname(__file__)), "fetch-with-node/index.js"), url, data,
stdout=asyncio.subprocess.PIPE
)
out = await sp.stdout.read()
out = out.decode("utf-8").rstrip()
try:
asJson = json.loads(out)
except Exception:
print("ERROR")
print(out)
return None
return asJson
def nodeFetch(url, data):
return asyncio.run(nodeFetchInternal(url, data))
async def protodecInternal(input, flags):
sp = subprocess.Popen(
'echo \'' + input + '\' | ' + os.path.join(os.path.abspath(os.path.dirname(__file__)), "protodec") + ' ' + flags,
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.STDOUT
)
out = sp.communicate()[0]
out = out.decode("utf-8").rstrip()
return out
def protodec(input, flags):
return asyncio.run(protodecInternal(input, flags))
###
defaultHeaders = {
"Accept-Encoding": "gzip, deflate, br",
"Accept-Language": "en-US,en;q=0.9",
"Content-Type": "application/json; charset=UTF-8",
"Cookie": "GPS=1; YSC=q6STb5ub1CU; VISITOR_INFO1_LIVE=Hbzrltf2qrk; VISITOR_PRIVACY_METADATA=CgJVUxIEGgAgCg%3D%3D; ",
"DNT": "1",
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "com.google.android.youtube/19.02.39 (Linux; U; Android 14) gzip",
#"x-goog-api-format-version" => "2",
#"x-youtube-client-name" => client_config.name_proto,
#"x-youtube-client-version" => client_config.version,
}
cachedInnertubeContent = {
"client": {
"hl":"en",
"gl":"US",
"remoteHost":"98.200.139.166",
"deviceMake":"",
"deviceModel":"",
"visitorData":"CgtIYnpybHRmMnFyayjf8s63BjIKCgJVUxIEGgAgCg%3D%3D",
"userAgent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0,gzip(gfe)",
"clientName":"WEB",
"clientVersion":"2.20240924.01.00",
"osName":"Windows",
"osVersion":"10.0",
"originalUrl":"https://www.youtube.com/",
"platform":"DESKTOP",
"clientFormFactor":"UNKNOWN_FORM_FACTOR",
"configInfo": {
"appInstallData":"CN_yzrcGEI3QsQUQ3-2wBRDh7LAFEMnmsAUQooGwBRCPw7EFEImnsQUQ6sOvBRCDvLEFEI2UsQUQws2xBRCI468FEJzRsQUQ782wBRCFp7EFEK7BsQUQ2cmvBRDvx7EFEJKusQUQxtCxBRCdprAFEO_LsQUQ7KixBRCEtrAFEIzNsQUQppKxBRCSwP8SEJ3QsAUQt-r-EhDwu7EFEJSJsQUQms6xBRComrAFENCNsAUQ49GwBRDbvrEFEKrYsAUQsO6wBRC9tq4FENPhrwUQppqwBRCinbEFEPirsQUQt--vBRCWlbAFEPSrsAUQhcOxBRDmz7EFEJmYsQUQ4tSuBRCNzLAFEP7XsAUQ47qxBRC9mbAFEMfUsQUQvYqwBRDW3bAFEOW5sQUQkMyxBRC8vrEFEIiHsAUQx8qxBRCHw7EFEN3o_hIQgsawBRD5zLEFEI_QsQUQieiuBRDRuLEFEOOVsAUQksuxBRDq0LEFENfprwUQr8exBRCKobEFEMX1sAUQydewBRDGpLEFEN6tsQUQ65mxBRDtubEFEMW_sQUQx-awBRDN17AFEMi_sQUQ86KxBRDM364FEJT-sAUQ9quwBRCBw7EFEKaTsQUQyfevBRDr6P4SENuvrwUQ28GxBRDiq7EFEPjGsQUQ_dGxBRD7rbEFEMvCsQUQ2dKxBRDszf8SENrNsQUqJENBTVNGeFVVcGIyd0ROemtCb096OUF2b3NRU3ctUTc1UWgwSA%3D%3D"
},
"userInterfaceTheme":"USER_INTERFACE_THEME_LIGHT",
"browserName":"Firefox",
"browserVersion":"102.0",
"acceptHeader":"text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"deviceExperimentId":"ChxOelF4T0RRM05qZ3lPVE0zT0RBek16UTROZz09EN_yzrcGGN_yzrcG"
}
}
def browse(*kwargs, browseId=None, continuation=None, params=None):
data = {
"headers": defaultHeaders,
"referrer": "https://www.youtube.com/",
"referrerPolicy": "strict-origin-when-cross-origin",
"body": {
"context": cachedInnertubeContent
},
"method": "POST",
"mode": "cors"
}
if browseId is not None:
data["body"]["browseId"] = browseId
if continuation is not None:
data["body"]["continuation"] = continuation
if params is not None:
data["body"]["params"] = params
data["body"] = json.dumps(data["body"])
return nodeFetch("https://www.youtube.com/youtubei/v1/browse?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json.dumps(data))
def next(data=None, continuation=None):
newData = {
"headers": defaultHeaders,
"referrer": "https://www.youtube.com/",
"referrerPolicy": "strict-origin-when-cross-origin",
"body": {
"context": cachedInnertubeContent
},
"method": "POST",
"mode": "cors"
}
if data is not None:
for k in data:
newData["body"][k] = data[k]
if continuation is not None:
newData["body"]["continuation"] = continuation
newData["body"] = json.dumps(newData["body"])
return nodeFetch("https://www.youtube.com/youtubei/v1/next?key=AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8", json.dumps(newData))