Entities.php 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610
  1. <?php
  2. /**
  3. * SimplePie
  4. *
  5. * A PHP-Based RSS and Atom Feed Framework.
  6. * Takes the hard work out of managing a complete RSS/Atom solution.
  7. *
  8. * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
  9. * All rights reserved.
  10. *
  11. * Redistribution and use in source and binary forms, with or without modification, are
  12. * permitted provided that the following conditions are met:
  13. *
  14. * * Redistributions of source code must retain the above copyright notice, this list of
  15. * conditions and the following disclaimer.
  16. *
  17. * * Redistributions in binary form must reproduce the above copyright notice, this list
  18. * of conditions and the following disclaimer in the documentation and/or other materials
  19. * provided with the distribution.
  20. *
  21. * * Neither the name of the SimplePie Team nor the names of its contributors may be used
  22. * to endorse or promote products derived from this software without specific prior
  23. * written permission.
  24. *
  25. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
  26. * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
  27. * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
  28. * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  29. * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  30. * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  31. * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
  32. * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  33. * POSSIBILITY OF SUCH DAMAGE.
  34. *
  35. * @package SimplePie
  36. * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
  37. * @author Ryan Parman
  38. * @author Sam Sneddon
  39. * @author Ryan McCue
  40. * @link http://simplepie.org/ SimplePie
  41. * @license http://www.opensource.org/licenses/bsd-license.php BSD License
  42. */
  43. /**
  44. * Decode HTML Entities
  45. *
  46. * This implements HTML5 as of revision 967 (2007-06-28)
  47. *
  48. * @deprecated Use DOMDocument instead!
  49. * @package SimplePie
  50. */
  51. class SimplePie_Decode_HTML_Entities
  52. {
  53. /**
  54. * Data to be parsed
  55. *
  56. * @access private
  57. * @var string
  58. */
  59. var $data = '';
  60. /**
  61. * Currently consumed bytes
  62. *
  63. * @access private
  64. * @var string
  65. */
  66. var $consumed = '';
  67. /**
  68. * Position of the current byte being parsed
  69. *
  70. * @access private
  71. * @var int
  72. */
  73. var $position = 0;
  74. /**
  75. * Create an instance of the class with the input data
  76. *
  77. * @access public
  78. * @param string $data Input data
  79. */
  80. public function __construct($data)
  81. {
  82. $this->data = $data;
  83. }
  84. /**
  85. * Parse the input data
  86. *
  87. * @access public
  88. * @return string Output data
  89. */
  90. public function parse()
  91. {
  92. while (($this->position = strpos($this->data, '&', $this->position)) !== false)
  93. {
  94. $this->consume();
  95. $this->entity();
  96. $this->consumed = '';
  97. }
  98. return $this->data;
  99. }
  100. /**
  101. * Consume the next byte
  102. *
  103. * @access private
  104. * @return mixed The next byte, or false, if there is no more data
  105. */
  106. public function consume()
  107. {
  108. if (isset($this->data[$this->position]))
  109. {
  110. $this->consumed .= $this->data[$this->position];
  111. return $this->data[$this->position++];
  112. }
  113. return false;
  114. }
  115. /**
  116. * Consume a range of characters
  117. *
  118. * @access private
  119. * @param string $chars Characters to consume
  120. * @return mixed A series of characters that match the range, or false
  121. */
  122. public function consume_range($chars)
  123. {
  124. if ($len = strspn($this->data, $chars, $this->position))
  125. {
  126. $data = substr($this->data, $this->position, $len);
  127. $this->consumed .= $data;
  128. $this->position += $len;
  129. return $data;
  130. }
  131. return false;
  132. }
  133. /**
  134. * Unconsume one byte
  135. *
  136. * @access private
  137. */
  138. public function unconsume()
  139. {
  140. $this->consumed = substr($this->consumed, 0, -1);
  141. $this->position--;
  142. }
  143. /**
  144. * Decode an entity
  145. *
  146. * @access private
  147. */
  148. public function entity()
  149. {
  150. switch ($this->consume())
  151. {
  152. case "\x09":
  153. case "\x0A":
  154. case "\x0B":
  155. case "\x0C":
  156. case "\x20":
  157. case "\x3C":
  158. case "\x26":
  159. case false:
  160. break;
  161. case "\x23":
  162. switch ($this->consume())
  163. {
  164. case "\x78":
  165. case "\x58":
  166. $range = '0123456789ABCDEFabcdef';
  167. $hex = true;
  168. break;
  169. default:
  170. $range = '0123456789';
  171. $hex = false;
  172. $this->unconsume();
  173. break;
  174. }
  175. if ($codepoint = $this->consume_range($range))
  176. {
  177. static $windows_1252_specials = array(0x0D => "\x0A", 0x80 => "\xE2\x82\xAC", 0x81 => "\xEF\xBF\xBD", 0x82 => "\xE2\x80\x9A", 0x83 => "\xC6\x92", 0x84 => "\xE2\x80\x9E", 0x85 => "\xE2\x80\xA6", 0x86 => "\xE2\x80\xA0", 0x87 => "\xE2\x80\xA1", 0x88 => "\xCB\x86", 0x89 => "\xE2\x80\xB0", 0x8A => "\xC5\xA0", 0x8B => "\xE2\x80\xB9", 0x8C => "\xC5\x92", 0x8D => "\xEF\xBF\xBD", 0x8E => "\xC5\xBD", 0x8F => "\xEF\xBF\xBD", 0x90 => "\xEF\xBF\xBD", 0x91 => "\xE2\x80\x98", 0x92 => "\xE2\x80\x99", 0x93 => "\xE2\x80\x9C", 0x94 => "\xE2\x80\x9D", 0x95 => "\xE2\x80\xA2", 0x96 => "\xE2\x80\x93", 0x97 => "\xE2\x80\x94", 0x98 => "\xCB\x9C", 0x99 => "\xE2\x84\xA2", 0x9A => "\xC5\xA1", 0x9B => "\xE2\x80\xBA", 0x9C => "\xC5\x93", 0x9D => "\xEF\xBF\xBD", 0x9E => "\xC5\xBE", 0x9F => "\xC5\xB8");
  178. if ($hex)
  179. {
  180. $codepoint = hexdec($codepoint);
  181. }
  182. else
  183. {
  184. $codepoint = intval($codepoint);
  185. }
  186. if (isset($windows_1252_specials[$codepoint]))
  187. {
  188. $replacement = $windows_1252_specials[$codepoint];
  189. }
  190. else
  191. {
  192. $replacement = SimplePie_Misc::codepoint_to_utf8($codepoint);
  193. }
  194. if (!in_array($this->consume(), array(';', false), true))
  195. {
  196. $this->unconsume();
  197. }
  198. $consumed_length = strlen($this->consumed);
  199. $this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length);
  200. $this->position += strlen($replacement) - $consumed_length;
  201. }
  202. break;
  203. default:
  204. static $entities = array(
  205. 'Aacute' => "\xC3\x81",
  206. 'aacute' => "\xC3\xA1",
  207. 'Aacute;' => "\xC3\x81",
  208. 'aacute;' => "\xC3\xA1",
  209. 'Acirc' => "\xC3\x82",
  210. 'acirc' => "\xC3\xA2",
  211. 'Acirc;' => "\xC3\x82",
  212. 'acirc;' => "\xC3\xA2",
  213. 'acute' => "\xC2\xB4",
  214. 'acute;' => "\xC2\xB4",
  215. 'AElig' => "\xC3\x86",
  216. 'aelig' => "\xC3\xA6",
  217. 'AElig;' => "\xC3\x86",
  218. 'aelig;' => "\xC3\xA6",
  219. 'Agrave' => "\xC3\x80",
  220. 'agrave' => "\xC3\xA0",
  221. 'Agrave;' => "\xC3\x80",
  222. 'agrave;' => "\xC3\xA0",
  223. 'alefsym;' => "\xE2\x84\xB5",
  224. 'Alpha;' => "\xCE\x91",
  225. 'alpha;' => "\xCE\xB1",
  226. 'AMP' => "\x26",
  227. 'amp' => "\x26",
  228. 'AMP;' => "\x26",
  229. 'amp;' => "\x26",
  230. 'and;' => "\xE2\x88\xA7",
  231. 'ang;' => "\xE2\x88\xA0",
  232. 'apos;' => "\x27",
  233. 'Aring' => "\xC3\x85",
  234. 'aring' => "\xC3\xA5",
  235. 'Aring;' => "\xC3\x85",
  236. 'aring;' => "\xC3\xA5",
  237. 'asymp;' => "\xE2\x89\x88",
  238. 'Atilde' => "\xC3\x83",
  239. 'atilde' => "\xC3\xA3",
  240. 'Atilde;' => "\xC3\x83",
  241. 'atilde;' => "\xC3\xA3",
  242. 'Auml' => "\xC3\x84",
  243. 'auml' => "\xC3\xA4",
  244. 'Auml;' => "\xC3\x84",
  245. 'auml;' => "\xC3\xA4",
  246. 'bdquo;' => "\xE2\x80\x9E",
  247. 'Beta;' => "\xCE\x92",
  248. 'beta;' => "\xCE\xB2",
  249. 'brvbar' => "\xC2\xA6",
  250. 'brvbar;' => "\xC2\xA6",
  251. 'bull;' => "\xE2\x80\xA2",
  252. 'cap;' => "\xE2\x88\xA9",
  253. 'Ccedil' => "\xC3\x87",
  254. 'ccedil' => "\xC3\xA7",
  255. 'Ccedil;' => "\xC3\x87",
  256. 'ccedil;' => "\xC3\xA7",
  257. 'cedil' => "\xC2\xB8",
  258. 'cedil;' => "\xC2\xB8",
  259. 'cent' => "\xC2\xA2",
  260. 'cent;' => "\xC2\xA2",
  261. 'Chi;' => "\xCE\xA7",
  262. 'chi;' => "\xCF\x87",
  263. 'circ;' => "\xCB\x86",
  264. 'clubs;' => "\xE2\x99\xA3",
  265. 'cong;' => "\xE2\x89\x85",
  266. 'COPY' => "\xC2\xA9",
  267. 'copy' => "\xC2\xA9",
  268. 'COPY;' => "\xC2\xA9",
  269. 'copy;' => "\xC2\xA9",
  270. 'crarr;' => "\xE2\x86\xB5",
  271. 'cup;' => "\xE2\x88\xAA",
  272. 'curren' => "\xC2\xA4",
  273. 'curren;' => "\xC2\xA4",
  274. 'Dagger;' => "\xE2\x80\xA1",
  275. 'dagger;' => "\xE2\x80\xA0",
  276. 'dArr;' => "\xE2\x87\x93",
  277. 'darr;' => "\xE2\x86\x93",
  278. 'deg' => "\xC2\xB0",
  279. 'deg;' => "\xC2\xB0",
  280. 'Delta;' => "\xCE\x94",
  281. 'delta;' => "\xCE\xB4",
  282. 'diams;' => "\xE2\x99\xA6",
  283. 'divide' => "\xC3\xB7",
  284. 'divide;' => "\xC3\xB7",
  285. 'Eacute' => "\xC3\x89",
  286. 'eacute' => "\xC3\xA9",
  287. 'Eacute;' => "\xC3\x89",
  288. 'eacute;' => "\xC3\xA9",
  289. 'Ecirc' => "\xC3\x8A",
  290. 'ecirc' => "\xC3\xAA",
  291. 'Ecirc;' => "\xC3\x8A",
  292. 'ecirc;' => "\xC3\xAA",
  293. 'Egrave' => "\xC3\x88",
  294. 'egrave' => "\xC3\xA8",
  295. 'Egrave;' => "\xC3\x88",
  296. 'egrave;' => "\xC3\xA8",
  297. 'empty;' => "\xE2\x88\x85",
  298. 'emsp;' => "\xE2\x80\x83",
  299. 'ensp;' => "\xE2\x80\x82",
  300. 'Epsilon;' => "\xCE\x95",
  301. 'epsilon;' => "\xCE\xB5",
  302. 'equiv;' => "\xE2\x89\xA1",
  303. 'Eta;' => "\xCE\x97",
  304. 'eta;' => "\xCE\xB7",
  305. 'ETH' => "\xC3\x90",
  306. 'eth' => "\xC3\xB0",
  307. 'ETH;' => "\xC3\x90",
  308. 'eth;' => "\xC3\xB0",
  309. 'Euml' => "\xC3\x8B",
  310. 'euml' => "\xC3\xAB",
  311. 'Euml;' => "\xC3\x8B",
  312. 'euml;' => "\xC3\xAB",
  313. 'euro;' => "\xE2\x82\xAC",
  314. 'exist;' => "\xE2\x88\x83",
  315. 'fnof;' => "\xC6\x92",
  316. 'forall;' => "\xE2\x88\x80",
  317. 'frac12' => "\xC2\xBD",
  318. 'frac12;' => "\xC2\xBD",
  319. 'frac14' => "\xC2\xBC",
  320. 'frac14;' => "\xC2\xBC",
  321. 'frac34' => "\xC2\xBE",
  322. 'frac34;' => "\xC2\xBE",
  323. 'frasl;' => "\xE2\x81\x84",
  324. 'Gamma;' => "\xCE\x93",
  325. 'gamma;' => "\xCE\xB3",
  326. 'ge;' => "\xE2\x89\xA5",
  327. 'GT' => "\x3E",
  328. 'gt' => "\x3E",
  329. 'GT;' => "\x3E",
  330. 'gt;' => "\x3E",
  331. 'hArr;' => "\xE2\x87\x94",
  332. 'harr;' => "\xE2\x86\x94",
  333. 'hearts;' => "\xE2\x99\xA5",
  334. 'hellip;' => "\xE2\x80\xA6",
  335. 'Iacute' => "\xC3\x8D",
  336. 'iacute' => "\xC3\xAD",
  337. 'Iacute;' => "\xC3\x8D",
  338. 'iacute;' => "\xC3\xAD",
  339. 'Icirc' => "\xC3\x8E",
  340. 'icirc' => "\xC3\xAE",
  341. 'Icirc;' => "\xC3\x8E",
  342. 'icirc;' => "\xC3\xAE",
  343. 'iexcl' => "\xC2\xA1",
  344. 'iexcl;' => "\xC2\xA1",
  345. 'Igrave' => "\xC3\x8C",
  346. 'igrave' => "\xC3\xAC",
  347. 'Igrave;' => "\xC3\x8C",
  348. 'igrave;' => "\xC3\xAC",
  349. 'image;' => "\xE2\x84\x91",
  350. 'infin;' => "\xE2\x88\x9E",
  351. 'int;' => "\xE2\x88\xAB",
  352. 'Iota;' => "\xCE\x99",
  353. 'iota;' => "\xCE\xB9",
  354. 'iquest' => "\xC2\xBF",
  355. 'iquest;' => "\xC2\xBF",
  356. 'isin;' => "\xE2\x88\x88",
  357. 'Iuml' => "\xC3\x8F",
  358. 'iuml' => "\xC3\xAF",
  359. 'Iuml;' => "\xC3\x8F",
  360. 'iuml;' => "\xC3\xAF",
  361. 'Kappa;' => "\xCE\x9A",
  362. 'kappa;' => "\xCE\xBA",
  363. 'Lambda;' => "\xCE\x9B",
  364. 'lambda;' => "\xCE\xBB",
  365. 'lang;' => "\xE3\x80\x88",
  366. 'laquo' => "\xC2\xAB",
  367. 'laquo;' => "\xC2\xAB",
  368. 'lArr;' => "\xE2\x87\x90",
  369. 'larr;' => "\xE2\x86\x90",
  370. 'lceil;' => "\xE2\x8C\x88",
  371. 'ldquo;' => "\xE2\x80\x9C",
  372. 'le;' => "\xE2\x89\xA4",
  373. 'lfloor;' => "\xE2\x8C\x8A",
  374. 'lowast;' => "\xE2\x88\x97",
  375. 'loz;' => "\xE2\x97\x8A",
  376. 'lrm;' => "\xE2\x80\x8E",
  377. 'lsaquo;' => "\xE2\x80\xB9",
  378. 'lsquo;' => "\xE2\x80\x98",
  379. 'LT' => "\x3C",
  380. 'lt' => "\x3C",
  381. 'LT;' => "\x3C",
  382. 'lt;' => "\x3C",
  383. 'macr' => "\xC2\xAF",
  384. 'macr;' => "\xC2\xAF",
  385. 'mdash;' => "\xE2\x80\x94",
  386. 'micro' => "\xC2\xB5",
  387. 'micro;' => "\xC2\xB5",
  388. 'middot' => "\xC2\xB7",
  389. 'middot;' => "\xC2\xB7",
  390. 'minus;' => "\xE2\x88\x92",
  391. 'Mu;' => "\xCE\x9C",
  392. 'mu;' => "\xCE\xBC",
  393. 'nabla;' => "\xE2\x88\x87",
  394. 'nbsp' => "\xC2\xA0",
  395. 'nbsp;' => "\xC2\xA0",
  396. 'ndash;' => "\xE2\x80\x93",
  397. 'ne;' => "\xE2\x89\xA0",
  398. 'ni;' => "\xE2\x88\x8B",
  399. 'not' => "\xC2\xAC",
  400. 'not;' => "\xC2\xAC",
  401. 'notin;' => "\xE2\x88\x89",
  402. 'nsub;' => "\xE2\x8A\x84",
  403. 'Ntilde' => "\xC3\x91",
  404. 'ntilde' => "\xC3\xB1",
  405. 'Ntilde;' => "\xC3\x91",
  406. 'ntilde;' => "\xC3\xB1",
  407. 'Nu;' => "\xCE\x9D",
  408. 'nu;' => "\xCE\xBD",
  409. 'Oacute' => "\xC3\x93",
  410. 'oacute' => "\xC3\xB3",
  411. 'Oacute;' => "\xC3\x93",
  412. 'oacute;' => "\xC3\xB3",
  413. 'Ocirc' => "\xC3\x94",
  414. 'ocirc' => "\xC3\xB4",
  415. 'Ocirc;' => "\xC3\x94",
  416. 'ocirc;' => "\xC3\xB4",
  417. 'OElig;' => "\xC5\x92",
  418. 'oelig;' => "\xC5\x93",
  419. 'Ograve' => "\xC3\x92",
  420. 'ograve' => "\xC3\xB2",
  421. 'Ograve;' => "\xC3\x92",
  422. 'ograve;' => "\xC3\xB2",
  423. 'oline;' => "\xE2\x80\xBE",
  424. 'Omega;' => "\xCE\xA9",
  425. 'omega;' => "\xCF\x89",
  426. 'Omicron;' => "\xCE\x9F",
  427. 'omicron;' => "\xCE\xBF",
  428. 'oplus;' => "\xE2\x8A\x95",
  429. 'or;' => "\xE2\x88\xA8",
  430. 'ordf' => "\xC2\xAA",
  431. 'ordf;' => "\xC2\xAA",
  432. 'ordm' => "\xC2\xBA",
  433. 'ordm;' => "\xC2\xBA",
  434. 'Oslash' => "\xC3\x98",
  435. 'oslash' => "\xC3\xB8",
  436. 'Oslash;' => "\xC3\x98",
  437. 'oslash;' => "\xC3\xB8",
  438. 'Otilde' => "\xC3\x95",
  439. 'otilde' => "\xC3\xB5",
  440. 'Otilde;' => "\xC3\x95",
  441. 'otilde;' => "\xC3\xB5",
  442. 'otimes;' => "\xE2\x8A\x97",
  443. 'Ouml' => "\xC3\x96",
  444. 'ouml' => "\xC3\xB6",
  445. 'Ouml;' => "\xC3\x96",
  446. 'ouml;' => "\xC3\xB6",
  447. 'para' => "\xC2\xB6",
  448. 'para;' => "\xC2\xB6",
  449. 'part;' => "\xE2\x88\x82",
  450. 'permil;' => "\xE2\x80\xB0",
  451. 'perp;' => "\xE2\x8A\xA5",
  452. 'Phi;' => "\xCE\xA6",
  453. 'phi;' => "\xCF\x86",
  454. 'Pi;' => "\xCE\xA0",
  455. 'pi;' => "\xCF\x80",
  456. 'piv;' => "\xCF\x96",
  457. 'plusmn' => "\xC2\xB1",
  458. 'plusmn;' => "\xC2\xB1",
  459. 'pound' => "\xC2\xA3",
  460. 'pound;' => "\xC2\xA3",
  461. 'Prime;' => "\xE2\x80\xB3",
  462. 'prime;' => "\xE2\x80\xB2",
  463. 'prod;' => "\xE2\x88\x8F",
  464. 'prop;' => "\xE2\x88\x9D",
  465. 'Psi;' => "\xCE\xA8",
  466. 'psi;' => "\xCF\x88",
  467. 'QUOT' => "\x22",
  468. 'quot' => "\x22",
  469. 'QUOT;' => "\x22",
  470. 'quot;' => "\x22",
  471. 'radic;' => "\xE2\x88\x9A",
  472. 'rang;' => "\xE3\x80\x89",
  473. 'raquo' => "\xC2\xBB",
  474. 'raquo;' => "\xC2\xBB",
  475. 'rArr;' => "\xE2\x87\x92",
  476. 'rarr;' => "\xE2\x86\x92",
  477. 'rceil;' => "\xE2\x8C\x89",
  478. 'rdquo;' => "\xE2\x80\x9D",
  479. 'real;' => "\xE2\x84\x9C",
  480. 'REG' => "\xC2\xAE",
  481. 'reg' => "\xC2\xAE",
  482. 'REG;' => "\xC2\xAE",
  483. 'reg;' => "\xC2\xAE",
  484. 'rfloor;' => "\xE2\x8C\x8B",
  485. 'Rho;' => "\xCE\xA1",
  486. 'rho;' => "\xCF\x81",
  487. 'rlm;' => "\xE2\x80\x8F",
  488. 'rsaquo;' => "\xE2\x80\xBA",
  489. 'rsquo;' => "\xE2\x80\x99",
  490. 'sbquo;' => "\xE2\x80\x9A",
  491. 'Scaron;' => "\xC5\xA0",
  492. 'scaron;' => "\xC5\xA1",
  493. 'sdot;' => "\xE2\x8B\x85",
  494. 'sect' => "\xC2\xA7",
  495. 'sect;' => "\xC2\xA7",
  496. 'shy' => "\xC2\xAD",
  497. 'shy;' => "\xC2\xAD",
  498. 'Sigma;' => "\xCE\xA3",
  499. 'sigma;' => "\xCF\x83",
  500. 'sigmaf;' => "\xCF\x82",
  501. 'sim;' => "\xE2\x88\xBC",
  502. 'spades;' => "\xE2\x99\xA0",
  503. 'sub;' => "\xE2\x8A\x82",
  504. 'sube;' => "\xE2\x8A\x86",
  505. 'sum;' => "\xE2\x88\x91",
  506. 'sup;' => "\xE2\x8A\x83",
  507. 'sup1' => "\xC2\xB9",
  508. 'sup1;' => "\xC2\xB9",
  509. 'sup2' => "\xC2\xB2",
  510. 'sup2;' => "\xC2\xB2",
  511. 'sup3' => "\xC2\xB3",
  512. 'sup3;' => "\xC2\xB3",
  513. 'supe;' => "\xE2\x8A\x87",
  514. 'szlig' => "\xC3\x9F",
  515. 'szlig;' => "\xC3\x9F",
  516. 'Tau;' => "\xCE\xA4",
  517. 'tau;' => "\xCF\x84",
  518. 'there4;' => "\xE2\x88\xB4",
  519. 'Theta;' => "\xCE\x98",
  520. 'theta;' => "\xCE\xB8",
  521. 'thetasym;' => "\xCF\x91",
  522. 'thinsp;' => "\xE2\x80\x89",
  523. 'THORN' => "\xC3\x9E",
  524. 'thorn' => "\xC3\xBE",
  525. 'THORN;' => "\xC3\x9E",
  526. 'thorn;' => "\xC3\xBE",
  527. 'tilde;' => "\xCB\x9C",
  528. 'times' => "\xC3\x97",
  529. 'times;' => "\xC3\x97",
  530. 'TRADE;' => "\xE2\x84\xA2",
  531. 'trade;' => "\xE2\x84\xA2",
  532. 'Uacute' => "\xC3\x9A",
  533. 'uacute' => "\xC3\xBA",
  534. 'Uacute;' => "\xC3\x9A",
  535. 'uacute;' => "\xC3\xBA",
  536. 'uArr;' => "\xE2\x87\x91",
  537. 'uarr;' => "\xE2\x86\x91",
  538. 'Ucirc' => "\xC3\x9B",
  539. 'ucirc' => "\xC3\xBB",
  540. 'Ucirc;' => "\xC3\x9B",
  541. 'ucirc;' => "\xC3\xBB",
  542. 'Ugrave' => "\xC3\x99",
  543. 'ugrave' => "\xC3\xB9",
  544. 'Ugrave;' => "\xC3\x99",
  545. 'ugrave;' => "\xC3\xB9",
  546. 'uml' => "\xC2\xA8",
  547. 'uml;' => "\xC2\xA8",
  548. 'upsih;' => "\xCF\x92",
  549. 'Upsilon;' => "\xCE\xA5",
  550. 'upsilon;' => "\xCF\x85",
  551. 'Uuml' => "\xC3\x9C",
  552. 'uuml' => "\xC3\xBC",
  553. 'Uuml;' => "\xC3\x9C",
  554. 'uuml;' => "\xC3\xBC",
  555. 'weierp;' => "\xE2\x84\x98",
  556. 'Xi;' => "\xCE\x9E",
  557. 'xi;' => "\xCE\xBE",
  558. 'Yacute' => "\xC3\x9D",
  559. 'yacute' => "\xC3\xBD",
  560. 'Yacute;' => "\xC3\x9D",
  561. 'yacute;' => "\xC3\xBD",
  562. 'yen' => "\xC2\xA5",
  563. 'yen;' => "\xC2\xA5",
  564. 'yuml' => "\xC3\xBF",
  565. 'Yuml;' => "\xC5\xB8",
  566. 'yuml;' => "\xC3\xBF",
  567. 'Zeta;' => "\xCE\x96",
  568. 'zeta;' => "\xCE\xB6",
  569. 'zwj;' => "\xE2\x80\x8D",
  570. 'zwnj;' => "\xE2\x80\x8C"
  571. );
  572. for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++)
  573. {
  574. $consumed = substr($this->consumed, 1);
  575. if (isset($entities[$consumed]))
  576. {
  577. $match = $consumed;
  578. }
  579. }
  580. if ($match !== null)
  581. {
  582. $this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1);
  583. $this->position += strlen($entities[$match]) - strlen($consumed) - 1;
  584. }
  585. break;
  586. }
  587. }
  588. }