class-wp-oembed.php 30 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770
  1. <?php
  2. /**
  3. * API for fetching the HTML to embed remote content based on a provided URL
  4. *
  5. * Used internally by the WP_Embed class, but is designed to be generic.
  6. *
  7. * @link https://wordpress.org/support/article/embeds/
  8. * @link http://oembed.com/
  9. *
  10. * @package WordPress
  11. * @subpackage oEmbed
  12. */
  13. /**
  14. * Core class used to implement oEmbed functionality.
  15. *
  16. * @since 2.9.0
  17. */
  18. #[AllowDynamicProperties]
  19. class WP_oEmbed {
  20. /**
  21. * A list of oEmbed providers.
  22. *
  23. * @since 2.9.0
  24. * @var array
  25. */
  26. public $providers = array();
  27. /**
  28. * A list of an early oEmbed providers.
  29. *
  30. * @since 4.0.0
  31. * @var array
  32. */
  33. public static $early_providers = array();
  34. /**
  35. * A list of private/protected methods, used for backward compatibility.
  36. *
  37. * @since 4.2.0
  38. * @var array
  39. */
  40. private $compat_methods = array( '_fetch_with_format', '_parse_json', '_parse_xml', '_parse_xml_body' );
  41. /**
  42. * Constructor.
  43. *
  44. * @since 2.9.0
  45. */
  46. public function __construct() {
  47. $host = urlencode( home_url() );
  48. $providers = array(
  49. '#https?://((m|www)\.)?youtube\.com/watch.*#i' => array( 'https://www.youtube.com/oembed', true ),
  50. '#https?://((m|www)\.)?youtube\.com/playlist.*#i' => array( 'https://www.youtube.com/oembed', true ),
  51. '#https?://((m|www)\.)?youtube\.com/shorts/*#i' => array( 'https://www.youtube.com/oembed', true ),
  52. '#https?://youtu\.be/.*#i' => array( 'https://www.youtube.com/oembed', true ),
  53. '#https?://(.+\.)?vimeo\.com/.*#i' => array( 'https://vimeo.com/api/oembed.{format}', true ),
  54. '#https?://(www\.)?dailymotion\.com/.*#i' => array( 'https://www.dailymotion.com/services/oembed', true ),
  55. '#https?://dai\.ly/.*#i' => array( 'https://www.dailymotion.com/services/oembed', true ),
  56. '#https?://(www\.)?flickr\.com/.*#i' => array( 'https://www.flickr.com/services/oembed/', true ),
  57. '#https?://flic\.kr/.*#i' => array( 'https://www.flickr.com/services/oembed/', true ),
  58. '#https?://(.+\.)?smugmug\.com/.*#i' => array( 'https://api.smugmug.com/services/oembed/', true ),
  59. '#https?://(www\.)?scribd\.com/(doc|document)/.*#i' => array( 'https://www.scribd.com/services/oembed', true ),
  60. '#https?://wordpress\.tv/.*#i' => array( 'https://wordpress.tv/oembed/', true ),
  61. '#https?://(.+\.)?polldaddy\.com/.*#i' => array( 'https://api.crowdsignal.com/oembed', true ),
  62. '#https?://poll\.fm/.*#i' => array( 'https://api.crowdsignal.com/oembed', true ),
  63. '#https?://(.+\.)?survey\.fm/.*#i' => array( 'https://api.crowdsignal.com/oembed', true ),
  64. '#https?://(www\.)?twitter\.com/\w{1,15}/status(es)?/.*#i' => array( 'https://publish.twitter.com/oembed', true ),
  65. '#https?://(www\.)?twitter\.com/\w{1,15}$#i' => array( 'https://publish.twitter.com/oembed', true ),
  66. '#https?://(www\.)?twitter\.com/\w{1,15}/likes$#i' => array( 'https://publish.twitter.com/oembed', true ),
  67. '#https?://(www\.)?twitter\.com/\w{1,15}/lists/.*#i' => array( 'https://publish.twitter.com/oembed', true ),
  68. '#https?://(www\.)?twitter\.com/\w{1,15}/timelines/.*#i' => array( 'https://publish.twitter.com/oembed', true ),
  69. '#https?://(www\.)?twitter\.com/i/moments/.*#i' => array( 'https://publish.twitter.com/oembed', true ),
  70. '#https?://(www\.)?soundcloud\.com/.*#i' => array( 'https://soundcloud.com/oembed', true ),
  71. '#https?://(.+?\.)?slideshare\.net/.*#i' => array( 'https://www.slideshare.net/api/oembed/2', true ),
  72. '#https?://(open|play)\.spotify\.com/.*#i' => array( 'https://embed.spotify.com/oembed/', true ),
  73. '#https?://(.+\.)?imgur\.com/.*#i' => array( 'https://api.imgur.com/oembed', true ),
  74. '#https?://(www\.)?issuu\.com/.+/docs/.+#i' => array( 'https://issuu.com/oembed_wp', true ),
  75. '#https?://(www\.)?mixcloud\.com/.*#i' => array( 'https://www.mixcloud.com/oembed', true ),
  76. '#https?://(www\.|embed\.)?ted\.com/talks/.*#i' => array( 'https://www.ted.com/services/v1/oembed.{format}', true ),
  77. '#https?://(www\.)?(animoto|video214)\.com/play/.*#i' => array( 'https://animoto.com/oembeds/create', true ),
  78. '#https?://(.+)\.tumblr\.com/.*#i' => array( 'https://www.tumblr.com/oembed/1.0', true ),
  79. '#https?://(www\.)?kickstarter\.com/projects/.*#i' => array( 'https://www.kickstarter.com/services/oembed', true ),
  80. '#https?://kck\.st/.*#i' => array( 'https://www.kickstarter.com/services/oembed', true ),
  81. '#https?://cloudup\.com/.*#i' => array( 'https://cloudup.com/oembed', true ),
  82. '#https?://(www\.)?reverbnation\.com/.*#i' => array( 'https://www.reverbnation.com/oembed', true ),
  83. '#https?://videopress\.com/v/.*#' => array( 'https://public-api.wordpress.com/oembed/?for=' . $host, true ),
  84. '#https?://(www\.)?reddit\.com/r/[^/]+/comments/.*#i' => array( 'https://www.reddit.com/oembed', true ),
  85. '#https?://(www\.)?speakerdeck\.com/.*#i' => array( 'https://speakerdeck.com/oembed.{format}', true ),
  86. '#https?://(www\.)?screencast\.com/.*#i' => array( 'https://api.screencast.com/external/oembed', true ),
  87. '#https?://([a-z0-9-]+\.)?amazon\.(com|com\.mx|com\.br|ca)/.*#i' => array( 'https://read.amazon.com/kp/api/oembed', true ),
  88. '#https?://([a-z0-9-]+\.)?amazon\.(co\.uk|de|fr|it|es|in|nl|ru)/.*#i' => array( 'https://read.amazon.co.uk/kp/api/oembed', true ),
  89. '#https?://([a-z0-9-]+\.)?amazon\.(co\.jp|com\.au)/.*#i' => array( 'https://read.amazon.com.au/kp/api/oembed', true ),
  90. '#https?://([a-z0-9-]+\.)?amazon\.cn/.*#i' => array( 'https://read.amazon.cn/kp/api/oembed', true ),
  91. '#https?://(www\.)?a\.co/.*#i' => array( 'https://read.amazon.com/kp/api/oembed', true ),
  92. '#https?://(www\.)?amzn\.to/.*#i' => array( 'https://read.amazon.com/kp/api/oembed', true ),
  93. '#https?://(www\.)?amzn\.eu/.*#i' => array( 'https://read.amazon.co.uk/kp/api/oembed', true ),
  94. '#https?://(www\.)?amzn\.in/.*#i' => array( 'https://read.amazon.in/kp/api/oembed', true ),
  95. '#https?://(www\.)?amzn\.asia/.*#i' => array( 'https://read.amazon.com.au/kp/api/oembed', true ),
  96. '#https?://(www\.)?z\.cn/.*#i' => array( 'https://read.amazon.cn/kp/api/oembed', true ),
  97. '#https?://www\.someecards\.com/.+-cards/.+#i' => array( 'https://www.someecards.com/v2/oembed/', true ),
  98. '#https?://www\.someecards\.com/usercards/viewcard/.+#i' => array( 'https://www.someecards.com/v2/oembed/', true ),
  99. '#https?://some\.ly\/.+#i' => array( 'https://www.someecards.com/v2/oembed/', true ),
  100. '#https?://(www\.)?tiktok\.com/.*/video/.*#i' => array( 'https://www.tiktok.com/oembed', true ),
  101. '#https?://([a-z]{2}|www)\.pinterest\.com(\.(au|mx))?/.*#i' => array( 'https://www.pinterest.com/oembed.json', true ),
  102. '#https?://(www\.)?wolframcloud\.com/obj/.+#i' => array( 'https://www.wolframcloud.com/oembed', true ),
  103. '#https?://pca\.st/.+#i' => array( 'https://pca.st/oembed.json', true ),
  104. );
  105. if ( ! empty( self::$early_providers['add'] ) ) {
  106. foreach ( self::$early_providers['add'] as $format => $data ) {
  107. $providers[ $format ] = $data;
  108. }
  109. }
  110. if ( ! empty( self::$early_providers['remove'] ) ) {
  111. foreach ( self::$early_providers['remove'] as $format ) {
  112. unset( $providers[ $format ] );
  113. }
  114. }
  115. self::$early_providers = array();
  116. /**
  117. * Filters the list of sanctioned oEmbed providers.
  118. *
  119. * Since WordPress 4.4, oEmbed discovery is enabled for all users and allows embedding of sanitized
  120. * iframes. The providers in this list are sanctioned, meaning they are trusted and allowed to
  121. * embed any content, such as iframes, videos, JavaScript, and arbitrary HTML.
  122. *
  123. * Supported providers:
  124. *
  125. * | Provider | Flavor | Since |
  126. * | ------------ | ----------------------------------------- | ------- |
  127. * | Dailymotion | dailymotion.com | 2.9.0 |
  128. * | Flickr | flickr.com | 2.9.0 |
  129. * | Scribd | scribd.com | 2.9.0 |
  130. * | Vimeo | vimeo.com | 2.9.0 |
  131. * | WordPress.tv | wordpress.tv | 2.9.0 |
  132. * | YouTube | youtube.com/watch | 2.9.0 |
  133. * | Crowdsignal | polldaddy.com | 3.0.0 |
  134. * | SmugMug | smugmug.com | 3.0.0 |
  135. * | YouTube | youtu.be | 3.0.0 |
  136. * | Twitter | twitter.com | 3.4.0 |
  137. * | Slideshare | slideshare.net | 3.5.0 |
  138. * | SoundCloud | soundcloud.com | 3.5.0 |
  139. * | Dailymotion | dai.ly | 3.6.0 |
  140. * | Flickr | flic.kr | 3.6.0 |
  141. * | Spotify | spotify.com | 3.6.0 |
  142. * | Imgur | imgur.com | 3.9.0 |
  143. * | Animoto | animoto.com | 4.0.0 |
  144. * | Animoto | video214.com | 4.0.0 |
  145. * | Issuu | issuu.com | 4.0.0 |
  146. * | Mixcloud | mixcloud.com | 4.0.0 |
  147. * | Crowdsignal | poll.fm | 4.0.0 |
  148. * | TED | ted.com | 4.0.0 |
  149. * | YouTube | youtube.com/playlist | 4.0.0 |
  150. * | Tumblr | tumblr.com | 4.2.0 |
  151. * | Kickstarter | kickstarter.com | 4.2.0 |
  152. * | Kickstarter | kck.st | 4.2.0 |
  153. * | Cloudup | cloudup.com | 4.3.0 |
  154. * | ReverbNation | reverbnation.com | 4.4.0 |
  155. * | VideoPress | videopress.com | 4.4.0 |
  156. * | Reddit | reddit.com | 4.4.0 |
  157. * | Speaker Deck | speakerdeck.com | 4.4.0 |
  158. * | Twitter | twitter.com/timelines | 4.5.0 |
  159. * | Twitter | twitter.com/moments | 4.5.0 |
  160. * | Twitter | twitter.com/user | 4.7.0 |
  161. * | Twitter | twitter.com/likes | 4.7.0 |
  162. * | Twitter | twitter.com/lists | 4.7.0 |
  163. * | Screencast | screencast.com | 4.8.0 |
  164. * | Amazon | amazon.com (com.mx, com.br, ca) | 4.9.0 |
  165. * | Amazon | amazon.de (fr, it, es, in, nl, ru, co.uk) | 4.9.0 |
  166. * | Amazon | amazon.co.jp (com.au) | 4.9.0 |
  167. * | Amazon | amazon.cn | 4.9.0 |
  168. * | Amazon | a.co | 4.9.0 |
  169. * | Amazon | amzn.to (eu, in, asia) | 4.9.0 |
  170. * | Amazon | z.cn | 4.9.0 |
  171. * | Someecards | someecards.com | 4.9.0 |
  172. * | Someecards | some.ly | 4.9.0 |
  173. * | Crowdsignal | survey.fm | 5.1.0 |
  174. * | TikTok | tiktok.com | 5.4.0 |
  175. * | Pinterest | pinterest.com | 5.9.0 |
  176. * | WolframCloud | wolframcloud.com | 5.9.0 |
  177. * | Pocket Casts | pocketcasts.com | 6.1.0 |
  178. *
  179. * No longer supported providers:
  180. *
  181. * | Provider | Flavor | Since | Removed |
  182. * | ------------ | -------------------- | --------- | --------- |
  183. * | Qik | qik.com | 2.9.0 | 3.9.0 |
  184. * | Viddler | viddler.com | 2.9.0 | 4.0.0 |
  185. * | Revision3 | revision3.com | 2.9.0 | 4.2.0 |
  186. * | Blip | blip.tv | 2.9.0 | 4.4.0 |
  187. * | Rdio | rdio.com | 3.6.0 | 4.4.1 |
  188. * | Rdio | rd.io | 3.6.0 | 4.4.1 |
  189. * | Vine | vine.co | 4.1.0 | 4.9.0 |
  190. * | Photobucket | photobucket.com | 2.9.0 | 5.1.0 |
  191. * | Funny or Die | funnyordie.com | 3.0.0 | 5.1.0 |
  192. * | CollegeHumor | collegehumor.com | 4.0.0 | 5.3.1 |
  193. * | Hulu | hulu.com | 2.9.0 | 5.5.0 |
  194. * | Instagram | instagram.com | 3.5.0 | 5.5.2 |
  195. * | Instagram | instagr.am | 3.5.0 | 5.5.2 |
  196. * | Instagram TV | instagram.com | 5.1.0 | 5.5.2 |
  197. * | Instagram TV | instagr.am | 5.1.0 | 5.5.2 |
  198. * | Facebook | facebook.com | 4.7.0 | 5.5.2 |
  199. * | Meetup.com | meetup.com | 3.9.0 | 6.0.1 |
  200. * | Meetup.com | meetu.ps | 3.9.0 | 6.0.1 |
  201. *
  202. * @see wp_oembed_add_provider()
  203. *
  204. * @since 2.9.0
  205. *
  206. * @param array[] $providers An array of arrays containing data about popular oEmbed providers.
  207. */
  208. $this->providers = apply_filters( 'oembed_providers', $providers );
  209. // Fix any embeds that contain new lines in the middle of the HTML which breaks wpautop().
  210. add_filter( 'oembed_dataparse', array( $this, '_strip_newlines' ), 10, 3 );
  211. }
  212. /**
  213. * Exposes private/protected methods for backward compatibility.
  214. *
  215. * @since 4.0.0
  216. *
  217. * @param string $name Method to call.
  218. * @param array $arguments Arguments to pass when calling.
  219. * @return mixed|false Return value of the callback, false otherwise.
  220. */
  221. public function __call( $name, $arguments ) {
  222. if ( in_array( $name, $this->compat_methods, true ) ) {
  223. return $this->$name( ...$arguments );
  224. }
  225. return false;
  226. }
  227. /**
  228. * Takes a URL and returns the corresponding oEmbed provider's URL, if there is one.
  229. *
  230. * @since 4.0.0
  231. *
  232. * @see WP_oEmbed::discover()
  233. *
  234. * @param string $url The URL to the content.
  235. * @param string|array $args {
  236. * Optional. Additional provider arguments. Default empty.
  237. *
  238. * @type bool $discover Optional. Determines whether to attempt to discover link tags
  239. * at the given URL for an oEmbed provider when the provider URL
  240. * is not found in the built-in providers list. Default true.
  241. * }
  242. * @return string|false The oEmbed provider URL on success, false on failure.
  243. */
  244. public function get_provider( $url, $args = '' ) {
  245. $args = wp_parse_args( $args );
  246. $provider = false;
  247. if ( ! isset( $args['discover'] ) ) {
  248. $args['discover'] = true;
  249. }
  250. foreach ( $this->providers as $matchmask => $data ) {
  251. list( $providerurl, $regex ) = $data;
  252. // Turn the asterisk-type provider URLs into regex.
  253. if ( ! $regex ) {
  254. $matchmask = '#' . str_replace( '___wildcard___', '(.+)', preg_quote( str_replace( '*', '___wildcard___', $matchmask ), '#' ) ) . '#i';
  255. $matchmask = preg_replace( '|^#http\\\://|', '#https?\://', $matchmask );
  256. }
  257. if ( preg_match( $matchmask, $url ) ) {
  258. $provider = str_replace( '{format}', 'json', $providerurl ); // JSON is easier to deal with than XML.
  259. break;
  260. }
  261. }
  262. if ( ! $provider && $args['discover'] ) {
  263. $provider = $this->discover( $url );
  264. }
  265. return $provider;
  266. }
  267. /**
  268. * Adds an oEmbed provider.
  269. *
  270. * The provider is added just-in-time when wp_oembed_add_provider() is called before
  271. * the {@see 'plugins_loaded'} hook.
  272. *
  273. * The just-in-time addition is for the benefit of the {@see 'oembed_providers'} filter.
  274. *
  275. * @since 4.0.0
  276. *
  277. * @see wp_oembed_add_provider()
  278. *
  279. * @param string $format Format of URL that this provider can handle. You can use
  280. * asterisks as wildcards.
  281. * @param string $provider The URL to the oEmbed provider..
  282. * @param bool $regex Optional. Whether the $format parameter is in a regex format.
  283. * Default false.
  284. */
  285. public static function _add_provider_early( $format, $provider, $regex = false ) {
  286. if ( empty( self::$early_providers['add'] ) ) {
  287. self::$early_providers['add'] = array();
  288. }
  289. self::$early_providers['add'][ $format ] = array( $provider, $regex );
  290. }
  291. /**
  292. * Removes an oEmbed provider.
  293. *
  294. * The provider is removed just-in-time when wp_oembed_remove_provider() is called before
  295. * the {@see 'plugins_loaded'} hook.
  296. *
  297. * The just-in-time removal is for the benefit of the {@see 'oembed_providers'} filter.
  298. *
  299. * @since 4.0.0
  300. *
  301. * @see wp_oembed_remove_provider()
  302. *
  303. * @param string $format The format of URL that this provider can handle. You can use
  304. * asterisks as wildcards.
  305. */
  306. public static function _remove_provider_early( $format ) {
  307. if ( empty( self::$early_providers['remove'] ) ) {
  308. self::$early_providers['remove'] = array();
  309. }
  310. self::$early_providers['remove'][] = $format;
  311. }
  312. /**
  313. * Takes a URL and attempts to return the oEmbed data.
  314. *
  315. * @see WP_oEmbed::fetch()
  316. *
  317. * @since 4.8.0
  318. *
  319. * @param string $url The URL to the content that should be attempted to be embedded.
  320. * @param string|array $args Optional. Additional arguments for retrieving embed HTML.
  321. * See wp_oembed_get() for accepted arguments. Default empty.
  322. * @return object|false The result in the form of an object on success, false on failure.
  323. */
  324. public function get_data( $url, $args = '' ) {
  325. $args = wp_parse_args( $args );
  326. $provider = $this->get_provider( $url, $args );
  327. if ( ! $provider ) {
  328. return false;
  329. }
  330. $data = $this->fetch( $provider, $url, $args );
  331. if ( false === $data ) {
  332. return false;
  333. }
  334. return $data;
  335. }
  336. /**
  337. * The do-it-all function that takes a URL and attempts to return the HTML.
  338. *
  339. * @see WP_oEmbed::fetch()
  340. * @see WP_oEmbed::data2html()
  341. *
  342. * @since 2.9.0
  343. *
  344. * @param string $url The URL to the content that should be attempted to be embedded.
  345. * @param string|array $args Optional. Additional arguments for retrieving embed HTML.
  346. * See wp_oembed_get() for accepted arguments. Default empty.
  347. * @return string|false The UNSANITIZED (and potentially unsafe) HTML that should be used to embed
  348. * on success, false on failure.
  349. */
  350. public function get_html( $url, $args = '' ) {
  351. /**
  352. * Filters the oEmbed result before any HTTP requests are made.
  353. *
  354. * This allows one to short-circuit the default logic, perhaps by
  355. * replacing it with a routine that is more optimal for your setup.
  356. *
  357. * Returning a non-null value from the filter will effectively short-circuit retrieval
  358. * and return the passed value instead.
  359. *
  360. * @since 4.5.3
  361. *
  362. * @param null|string $result The UNSANITIZED (and potentially unsafe) HTML that should be used to embed.
  363. * Default null to continue retrieving the result.
  364. * @param string $url The URL to the content that should be attempted to be embedded.
  365. * @param string|array $args Optional. Additional arguments for retrieving embed HTML.
  366. * See wp_oembed_get() for accepted arguments. Default empty.
  367. */
  368. $pre = apply_filters( 'pre_oembed_result', null, $url, $args );
  369. if ( null !== $pre ) {
  370. return $pre;
  371. }
  372. $data = $this->get_data( $url, $args );
  373. if ( false === $data ) {
  374. return false;
  375. }
  376. /**
  377. * Filters the HTML returned by the oEmbed provider.
  378. *
  379. * @since 2.9.0
  380. *
  381. * @param string|false $data The returned oEmbed HTML (false if unsafe).
  382. * @param string $url URL of the content to be embedded.
  383. * @param string|array $args Optional. Additional arguments for retrieving embed HTML.
  384. * See wp_oembed_get() for accepted arguments. Default empty.
  385. */
  386. return apply_filters( 'oembed_result', $this->data2html( $data, $url ), $url, $args );
  387. }
  388. /**
  389. * Attempts to discover link tags at the given URL for an oEmbed provider.
  390. *
  391. * @since 2.9.0
  392. *
  393. * @param string $url The URL that should be inspected for discovery `<link>` tags.
  394. * @return string|false The oEmbed provider URL on success, false on failure.
  395. */
  396. public function discover( $url ) {
  397. $providers = array();
  398. $args = array(
  399. 'limit_response_size' => 153600, // 150 KB
  400. );
  401. /**
  402. * Filters oEmbed remote get arguments.
  403. *
  404. * @since 4.0.0
  405. *
  406. * @see WP_Http::request()
  407. *
  408. * @param array $args oEmbed remote get arguments.
  409. * @param string $url URL to be inspected.
  410. */
  411. $args = apply_filters( 'oembed_remote_get_args', $args, $url );
  412. // Fetch URL content.
  413. $request = wp_safe_remote_get( $url, $args );
  414. $html = wp_remote_retrieve_body( $request );
  415. if ( $html ) {
  416. /**
  417. * Filters the link types that contain oEmbed provider URLs.
  418. *
  419. * @since 2.9.0
  420. *
  421. * @param string[] $format Array of oEmbed link types. Accepts 'application/json+oembed',
  422. * 'text/xml+oembed', and 'application/xml+oembed' (incorrect,
  423. * used by at least Vimeo).
  424. */
  425. $linktypes = apply_filters(
  426. 'oembed_linktypes',
  427. array(
  428. 'application/json+oembed' => 'json',
  429. 'text/xml+oembed' => 'xml',
  430. 'application/xml+oembed' => 'xml',
  431. )
  432. );
  433. // Strip <body>.
  434. $html_head_end = stripos( $html, '</head>' );
  435. if ( $html_head_end ) {
  436. $html = substr( $html, 0, $html_head_end );
  437. }
  438. // Do a quick check.
  439. $tagfound = false;
  440. foreach ( $linktypes as $linktype => $format ) {
  441. if ( stripos( $html, $linktype ) ) {
  442. $tagfound = true;
  443. break;
  444. }
  445. }
  446. if ( $tagfound && preg_match_all( '#<link([^<>]+)/?>#iU', $html, $links ) ) {
  447. foreach ( $links[1] as $link ) {
  448. $atts = shortcode_parse_atts( $link );
  449. if ( ! empty( $atts['type'] ) && ! empty( $linktypes[ $atts['type'] ] ) && ! empty( $atts['href'] ) ) {
  450. $providers[ $linktypes[ $atts['type'] ] ] = htmlspecialchars_decode( $atts['href'] );
  451. // Stop here if it's JSON (that's all we need).
  452. if ( 'json' === $linktypes[ $atts['type'] ] ) {
  453. break;
  454. }
  455. }
  456. }
  457. }
  458. }
  459. // JSON is preferred to XML.
  460. if ( ! empty( $providers['json'] ) ) {
  461. return $providers['json'];
  462. } elseif ( ! empty( $providers['xml'] ) ) {
  463. return $providers['xml'];
  464. } else {
  465. return false;
  466. }
  467. }
  468. /**
  469. * Connects to a oEmbed provider and returns the result.
  470. *
  471. * @since 2.9.0
  472. *
  473. * @param string $provider The URL to the oEmbed provider.
  474. * @param string $url The URL to the content that is desired to be embedded.
  475. * @param string|array $args Optional. Additional arguments for retrieving embed HTML.
  476. * See wp_oembed_get() for accepted arguments. Default empty.
  477. * @return object|false The result in the form of an object on success, false on failure.
  478. */
  479. public function fetch( $provider, $url, $args = '' ) {
  480. $args = wp_parse_args( $args, wp_embed_defaults( $url ) );
  481. $provider = add_query_arg( 'maxwidth', (int) $args['width'], $provider );
  482. $provider = add_query_arg( 'maxheight', (int) $args['height'], $provider );
  483. $provider = add_query_arg( 'url', urlencode( $url ), $provider );
  484. $provider = add_query_arg( 'dnt', 1, $provider );
  485. /**
  486. * Filters the oEmbed URL to be fetched.
  487. *
  488. * @since 2.9.0
  489. * @since 4.9.0 The `dnt` (Do Not Track) query parameter was added to all oEmbed provider URLs.
  490. *
  491. * @param string $provider URL of the oEmbed provider.
  492. * @param string $url URL of the content to be embedded.
  493. * @param array $args Optional. Additional arguments for retrieving embed HTML.
  494. * See wp_oembed_get() for accepted arguments. Default empty.
  495. */
  496. $provider = apply_filters( 'oembed_fetch_url', $provider, $url, $args );
  497. foreach ( array( 'json', 'xml' ) as $format ) {
  498. $result = $this->_fetch_with_format( $provider, $format );
  499. if ( is_wp_error( $result ) && 'not-implemented' === $result->get_error_code() ) {
  500. continue;
  501. }
  502. return ( $result && ! is_wp_error( $result ) ) ? $result : false;
  503. }
  504. return false;
  505. }
  506. /**
  507. * Fetches result from an oEmbed provider for a specific format and complete provider URL
  508. *
  509. * @since 3.0.0
  510. *
  511. * @param string $provider_url_with_args URL to the provider with full arguments list (url, maxheight, etc.)
  512. * @param string $format Format to use.
  513. * @return object|false|WP_Error The result in the form of an object on success, false on failure.
  514. */
  515. private function _fetch_with_format( $provider_url_with_args, $format ) {
  516. $provider_url_with_args = add_query_arg( 'format', $format, $provider_url_with_args );
  517. /** This filter is documented in wp-includes/class-wp-oembed.php */
  518. $args = apply_filters( 'oembed_remote_get_args', array(), $provider_url_with_args );
  519. $response = wp_safe_remote_get( $provider_url_with_args, $args );
  520. if ( 501 == wp_remote_retrieve_response_code( $response ) ) {
  521. return new WP_Error( 'not-implemented' );
  522. }
  523. $body = wp_remote_retrieve_body( $response );
  524. if ( ! $body ) {
  525. return false;
  526. }
  527. $parse_method = "_parse_$format";
  528. return $this->$parse_method( $body );
  529. }
  530. /**
  531. * Parses a json response body.
  532. *
  533. * @since 3.0.0
  534. *
  535. * @param string $response_body
  536. * @return object|false
  537. */
  538. private function _parse_json( $response_body ) {
  539. $data = json_decode( trim( $response_body ) );
  540. return ( $data && is_object( $data ) ) ? $data : false;
  541. }
  542. /**
  543. * Parses an XML response body.
  544. *
  545. * @since 3.0.0
  546. *
  547. * @param string $response_body
  548. * @return object|false
  549. */
  550. private function _parse_xml( $response_body ) {
  551. if ( ! function_exists( 'libxml_disable_entity_loader' ) ) {
  552. return false;
  553. }
  554. if ( PHP_VERSION_ID < 80000 ) {
  555. // This function has been deprecated in PHP 8.0 because in libxml 2.9.0, external entity loading
  556. // is disabled by default, so this function is no longer needed to protect against XXE attacks.
  557. // phpcs:ignore PHPCompatibility.FunctionUse.RemovedFunctions.libxml_disable_entity_loaderDeprecated
  558. $loader = libxml_disable_entity_loader( true );
  559. }
  560. $errors = libxml_use_internal_errors( true );
  561. $return = $this->_parse_xml_body( $response_body );
  562. libxml_use_internal_errors( $errors );
  563. if ( PHP_VERSION_ID < 80000 && isset( $loader ) ) {
  564. // phpcs:ignore PHPCompatibility.FunctionUse.RemovedFunctions.libxml_disable_entity_loaderDeprecated
  565. libxml_disable_entity_loader( $loader );
  566. }
  567. return $return;
  568. }
  569. /**
  570. * Serves as a helper function for parsing an XML response body.
  571. *
  572. * @since 3.6.0
  573. *
  574. * @param string $response_body
  575. * @return stdClass|false
  576. */
  577. private function _parse_xml_body( $response_body ) {
  578. if ( ! function_exists( 'simplexml_import_dom' ) || ! class_exists( 'DOMDocument', false ) ) {
  579. return false;
  580. }
  581. $dom = new DOMDocument;
  582. $success = $dom->loadXML( $response_body );
  583. if ( ! $success ) {
  584. return false;
  585. }
  586. if ( isset( $dom->doctype ) ) {
  587. return false;
  588. }
  589. foreach ( $dom->childNodes as $child ) {
  590. if ( XML_DOCUMENT_TYPE_NODE === $child->nodeType ) {
  591. return false;
  592. }
  593. }
  594. $xml = simplexml_import_dom( $dom );
  595. if ( ! $xml ) {
  596. return false;
  597. }
  598. $return = new stdClass;
  599. foreach ( $xml as $key => $value ) {
  600. $return->$key = (string) $value;
  601. }
  602. return $return;
  603. }
  604. /**
  605. * Converts a data object from WP_oEmbed::fetch() and returns the HTML.
  606. *
  607. * @since 2.9.0
  608. *
  609. * @param object $data A data object result from an oEmbed provider.
  610. * @param string $url The URL to the content that is desired to be embedded.
  611. * @return string|false The HTML needed to embed on success, false on failure.
  612. */
  613. public function data2html( $data, $url ) {
  614. if ( ! is_object( $data ) || empty( $data->type ) ) {
  615. return false;
  616. }
  617. $return = false;
  618. switch ( $data->type ) {
  619. case 'photo':
  620. if ( empty( $data->url ) || empty( $data->width ) || empty( $data->height ) ) {
  621. break;
  622. }
  623. if ( ! is_string( $data->url ) || ! is_numeric( $data->width ) || ! is_numeric( $data->height ) ) {
  624. break;
  625. }
  626. $title = ! empty( $data->title ) && is_string( $data->title ) ? $data->title : '';
  627. $return = '<a href="' . esc_url( $url ) . '"><img src="' . esc_url( $data->url ) . '" alt="' . esc_attr( $title ) . '" width="' . esc_attr( $data->width ) . '" height="' . esc_attr( $data->height ) . '" /></a>';
  628. break;
  629. case 'video':
  630. case 'rich':
  631. if ( ! empty( $data->html ) && is_string( $data->html ) ) {
  632. $return = $data->html;
  633. }
  634. break;
  635. case 'link':
  636. if ( ! empty( $data->title ) && is_string( $data->title ) ) {
  637. $return = '<a href="' . esc_url( $url ) . '">' . esc_html( $data->title ) . '</a>';
  638. }
  639. break;
  640. default:
  641. $return = false;
  642. }
  643. /**
  644. * Filters the returned oEmbed HTML.
  645. *
  646. * Use this filter to add support for custom data types, or to filter the result.
  647. *
  648. * @since 2.9.0
  649. *
  650. * @param string $return The returned oEmbed HTML.
  651. * @param object $data A data object result from an oEmbed provider.
  652. * @param string $url The URL of the content to be embedded.
  653. */
  654. return apply_filters( 'oembed_dataparse', $return, $data, $url );
  655. }
  656. /**
  657. * Strips any new lines from the HTML.
  658. *
  659. * @since 2.9.0 as strip_scribd_newlines()
  660. * @since 3.0.0
  661. *
  662. * @param string $html Existing HTML.
  663. * @param object $data Data object from WP_oEmbed::data2html()
  664. * @param string $url The original URL passed to oEmbed.
  665. * @return string Possibly modified $html
  666. */
  667. public function _strip_newlines( $html, $data, $url ) {
  668. if ( false === strpos( $html, "\n" ) ) {
  669. return $html;
  670. }
  671. $count = 1;
  672. $found = array();
  673. $token = '__PRE__';
  674. $search = array( "\t", "\n", "\r", ' ' );
  675. $replace = array( '__TAB__', '__NL__', '__CR__', '__SPACE__' );
  676. $tokenized = str_replace( $search, $replace, $html );
  677. preg_match_all( '#(<pre[^>]*>.+?</pre>)#i', $tokenized, $matches, PREG_SET_ORDER );
  678. foreach ( $matches as $i => $match ) {
  679. $tag_html = str_replace( $replace, $search, $match[0] );
  680. $tag_token = $token . $i;
  681. $found[ $tag_token ] = $tag_html;
  682. $html = str_replace( $tag_html, $tag_token, $html, $count );
  683. }
  684. $replaced = str_replace( $replace, $search, $html );
  685. $stripped = str_replace( array( "\r\n", "\n" ), '', $replaced );
  686. $pre = array_values( $found );
  687. $tokens = array_keys( $found );
  688. return str_replace( $tokens, $pre, $stripped );
  689. }
  690. }