atomlib.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. <?php
  2. /**
  3. * Atom Syndication Format PHP Library
  4. *
  5. * @package AtomLib
  6. * @link http://code.google.com/p/phpatomlib/
  7. *
  8. * @author Elias Torres <elias@torrez.us>
  9. * @version 0.4
  10. * @since 2.3.0
  11. */
  12. /**
  13. * Structure that store common Atom Feed Properties
  14. *
  15. * @package AtomLib
  16. */
  17. class AtomFeed {
  18. /**
  19. * Stores Links
  20. * @var array
  21. * @access public
  22. */
  23. var $links = array();
  24. /**
  25. * Stores Categories
  26. * @var array
  27. * @access public
  28. */
  29. var $categories = array();
  30. /**
  31. * Stores Entries
  32. *
  33. * @var array
  34. * @access public
  35. */
  36. var $entries = array();
  37. }
  38. /**
  39. * Structure that store Atom Entry Properties
  40. *
  41. * @package AtomLib
  42. */
  43. class AtomEntry {
  44. /**
  45. * Stores Links
  46. * @var array
  47. * @access public
  48. */
  49. var $links = array();
  50. /**
  51. * Stores Categories
  52. * @var array
  53. * @access public
  54. */
  55. var $categories = array();
  56. }
  57. /**
  58. * AtomLib Atom Parser API
  59. *
  60. * @package AtomLib
  61. */
  62. class AtomParser {
  63. var $NS = 'http://www.w3.org/2005/Atom';
  64. var $ATOM_CONTENT_ELEMENTS = array('content','summary','title','subtitle','rights');
  65. var $ATOM_SIMPLE_ELEMENTS = array('id','updated','published','draft');
  66. var $debug = false;
  67. var $depth = 0;
  68. var $indent = 2;
  69. var $in_content;
  70. var $ns_contexts = array();
  71. var $ns_decls = array();
  72. var $content_ns_decls = array();
  73. var $content_ns_contexts = array();
  74. var $is_xhtml = false;
  75. var $is_html = false;
  76. var $is_text = true;
  77. var $skipped_div = false;
  78. var $FILE = "php://input";
  79. var $feed;
  80. var $current;
  81. /**
  82. * PHP5 constructor.
  83. */
  84. function __construct() {
  85. $this->feed = new AtomFeed();
  86. $this->current = null;
  87. $this->map_attrs_func = array( __CLASS__, 'map_attrs' );
  88. $this->map_xmlns_func = array( __CLASS__, 'map_xmlns' );
  89. }
  90. /**
  91. * PHP4 constructor.
  92. */
  93. public function AtomParser() {
  94. self::__construct();
  95. }
  96. /**
  97. * Map attributes to key="val"
  98. *
  99. * @param string $k Key
  100. * @param string $v Value
  101. * @return string
  102. */
  103. public static function map_attrs($k, $v) {
  104. return "$k=\"$v\"";
  105. }
  106. /**
  107. * Map XML namespace to string.
  108. *
  109. * @param indexish $p XML Namespace element index
  110. * @param array $n Two-element array pair. [ 0 => {namespace}, 1 => {url} ]
  111. * @return string 'xmlns="{url}"' or 'xmlns:{namespace}="{url}"'
  112. */
  113. public static function map_xmlns($p, $n) {
  114. $xd = "xmlns";
  115. if( 0 < strlen($n[0]) ) {
  116. $xd .= ":{$n[0]}";
  117. }
  118. return "{$xd}=\"{$n[1]}\"";
  119. }
  120. function _p($msg) {
  121. if($this->debug) {
  122. print str_repeat(" ", $this->depth * $this->indent) . $msg ."\n";
  123. }
  124. }
  125. function error_handler($log_level, $log_text, $error_file, $error_line) {
  126. $this->error = $log_text;
  127. }
  128. function parse() {
  129. set_error_handler(array(&$this, 'error_handler'));
  130. array_unshift($this->ns_contexts, array());
  131. if ( ! function_exists( 'xml_parser_create_ns' ) ) {
  132. trigger_error( __( "PHP's XML extension is not available. Please contact your hosting provider to enable PHP's XML extension." ) );
  133. return false;
  134. }
  135. $parser = xml_parser_create_ns();
  136. xml_set_object($parser, $this);
  137. xml_set_element_handler($parser, "start_element", "end_element");
  138. xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0);
  139. xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,0);
  140. xml_set_character_data_handler($parser, "cdata");
  141. xml_set_default_handler($parser, "_default");
  142. xml_set_start_namespace_decl_handler($parser, "start_ns");
  143. xml_set_end_namespace_decl_handler($parser, "end_ns");
  144. $this->content = '';
  145. $ret = true;
  146. $fp = fopen($this->FILE, "r");
  147. while ($data = fread($fp, 4096)) {
  148. if($this->debug) $this->content .= $data;
  149. if(!xml_parse($parser, $data, feof($fp))) {
  150. /* translators: 1: Error message, 2: Line number. */
  151. trigger_error(sprintf(__('XML Error: %1$s at line %2$s')."\n",
  152. xml_error_string(xml_get_error_code($parser)),
  153. xml_get_current_line_number($parser)));
  154. $ret = false;
  155. break;
  156. }
  157. }
  158. fclose($fp);
  159. xml_parser_free($parser);
  160. unset($parser);
  161. restore_error_handler();
  162. return $ret;
  163. }
  164. function start_element($parser, $name, $attrs) {
  165. $name_parts = explode(":", $name);
  166. $tag = array_pop($name_parts);
  167. switch($name) {
  168. case $this->NS . ':feed':
  169. $this->current = $this->feed;
  170. break;
  171. case $this->NS . ':entry':
  172. $this->current = new AtomEntry();
  173. break;
  174. };
  175. $this->_p("start_element('$name')");
  176. #$this->_p(print_r($this->ns_contexts,true));
  177. #$this->_p('current(' . $this->current . ')');
  178. array_unshift($this->ns_contexts, $this->ns_decls);
  179. $this->depth++;
  180. if(!empty($this->in_content)) {
  181. $this->content_ns_decls = array();
  182. if($this->is_html || $this->is_text)
  183. trigger_error("Invalid content in element found. Content must not be of type text or html if it contains markup.");
  184. $attrs_prefix = array();
  185. // resolve prefixes for attributes
  186. foreach($attrs as $key => $value) {
  187. $with_prefix = $this->ns_to_prefix($key, true);
  188. $attrs_prefix[$with_prefix[1]] = $this->xml_escape($value);
  189. }
  190. $attrs_str = join(' ', array_map($this->map_attrs_func, array_keys($attrs_prefix), array_values($attrs_prefix)));
  191. if(strlen($attrs_str) > 0) {
  192. $attrs_str = " " . $attrs_str;
  193. }
  194. $with_prefix = $this->ns_to_prefix($name);
  195. if(!$this->is_declared_content_ns($with_prefix[0])) {
  196. array_push($this->content_ns_decls, $with_prefix[0]);
  197. }
  198. $xmlns_str = '';
  199. if(count($this->content_ns_decls) > 0) {
  200. array_unshift($this->content_ns_contexts, $this->content_ns_decls);
  201. $xmlns_str .= join(' ', array_map($this->map_xmlns_func, array_keys($this->content_ns_contexts[0]), array_values($this->content_ns_contexts[0])));
  202. if(strlen($xmlns_str) > 0) {
  203. $xmlns_str = " " . $xmlns_str;
  204. }
  205. }
  206. array_push($this->in_content, array($tag, $this->depth, "<". $with_prefix[1] ."{$xmlns_str}{$attrs_str}" . ">"));
  207. } else if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS) || in_array($tag, $this->ATOM_SIMPLE_ELEMENTS)) {
  208. $this->in_content = array();
  209. $this->is_xhtml = $attrs['type'] == 'xhtml';
  210. $this->is_html = $attrs['type'] == 'html' || $attrs['type'] == 'text/html';
  211. $this->is_text = !in_array('type',array_keys($attrs)) || $attrs['type'] == 'text';
  212. $type = $this->is_xhtml ? 'XHTML' : ($this->is_html ? 'HTML' : ($this->is_text ? 'TEXT' : $attrs['type']));
  213. if(in_array('src',array_keys($attrs))) {
  214. $this->current->$tag = $attrs;
  215. } else {
  216. array_push($this->in_content, array($tag,$this->depth, $type));
  217. }
  218. } else if($tag == 'link') {
  219. array_push($this->current->links, $attrs);
  220. } else if($tag == 'category') {
  221. array_push($this->current->categories, $attrs);
  222. }
  223. $this->ns_decls = array();
  224. }
  225. function end_element($parser, $name) {
  226. $name_parts = explode(":", $name);
  227. $tag = array_pop($name_parts);
  228. $ccount = count($this->in_content);
  229. # if we are *in* content, then let's proceed to serialize it
  230. if(!empty($this->in_content)) {
  231. # if we are ending the original content element
  232. # then let's finalize the content
  233. if($this->in_content[0][0] == $tag &&
  234. $this->in_content[0][1] == $this->depth) {
  235. $origtype = $this->in_content[0][2];
  236. array_shift($this->in_content);
  237. $newcontent = array();
  238. foreach($this->in_content as $c) {
  239. if(count($c) == 3) {
  240. array_push($newcontent, $c[2]);
  241. } else {
  242. if($this->is_xhtml || $this->is_text) {
  243. array_push($newcontent, $this->xml_escape($c));
  244. } else {
  245. array_push($newcontent, $c);
  246. }
  247. }
  248. }
  249. if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS)) {
  250. $this->current->$tag = array($origtype, join('',$newcontent));
  251. } else {
  252. $this->current->$tag = join('',$newcontent);
  253. }
  254. $this->in_content = array();
  255. } else if($this->in_content[$ccount-1][0] == $tag &&
  256. $this->in_content[$ccount-1][1] == $this->depth) {
  257. $this->in_content[$ccount-1][2] = substr($this->in_content[$ccount-1][2],0,-1) . "/>";
  258. } else {
  259. # else, just finalize the current element's content
  260. $endtag = $this->ns_to_prefix($name);
  261. array_push($this->in_content, array($tag, $this->depth, "</$endtag[1]>"));
  262. }
  263. }
  264. array_shift($this->ns_contexts);
  265. $this->depth--;
  266. if($name == ($this->NS . ':entry')) {
  267. array_push($this->feed->entries, $this->current);
  268. $this->current = null;
  269. }
  270. $this->_p("end_element('$name')");
  271. }
  272. function start_ns($parser, $prefix, $uri) {
  273. $this->_p("starting: " . $prefix . ":" . $uri);
  274. array_push($this->ns_decls, array($prefix,$uri));
  275. }
  276. function end_ns($parser, $prefix) {
  277. $this->_p("ending: #" . $prefix . "#");
  278. }
  279. function cdata($parser, $data) {
  280. $this->_p("data: #" . str_replace(array("\n"), array("\\n"), trim($data)) . "#");
  281. if(!empty($this->in_content)) {
  282. array_push($this->in_content, $data);
  283. }
  284. }
  285. function _default($parser, $data) {
  286. # when does this gets called?
  287. }
  288. function ns_to_prefix($qname, $attr=false) {
  289. # split 'http://www.w3.org/1999/xhtml:div' into ('http','//www.w3.org/1999/xhtml','div')
  290. $components = explode(":", $qname);
  291. # grab the last one (e.g 'div')
  292. $name = array_pop($components);
  293. if(!empty($components)) {
  294. # re-join back the namespace component
  295. $ns = join(":",$components);
  296. foreach($this->ns_contexts as $context) {
  297. foreach($context as $mapping) {
  298. if($mapping[1] == $ns && strlen($mapping[0]) > 0) {
  299. return array($mapping, "$mapping[0]:$name");
  300. }
  301. }
  302. }
  303. }
  304. if($attr) {
  305. return array(null, $name);
  306. } else {
  307. foreach($this->ns_contexts as $context) {
  308. foreach($context as $mapping) {
  309. if(strlen($mapping[0]) == 0) {
  310. return array($mapping, $name);
  311. }
  312. }
  313. }
  314. }
  315. }
  316. function is_declared_content_ns($new_mapping) {
  317. foreach($this->content_ns_contexts as $context) {
  318. foreach($context as $mapping) {
  319. if($new_mapping == $mapping) {
  320. return true;
  321. }
  322. }
  323. }
  324. return false;
  325. }
  326. function xml_escape($content)
  327. {
  328. return str_replace(array('&','"',"'",'<','>'),
  329. array('&amp;','&quot;','&apos;','&lt;','&gt;'),
  330. $content );
  331. }
  332. }