rdf_parser.php 44 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297
  1. <?php
  2. // ##################################################################################
  3. // Title : Class Rdf_parser
  4. // Version : 1.5.0
  5. // Author : Jason Diammond -repat RDF parser-
  6. // : Luis Argerich -PHP version of repat- (lrargerich@yahoo.com)
  7. // : Matt A. Tobin -Continued PHP Compat- (email@mattatobin.com)
  8. // Last modification date : See history below..
  9. // Description : A port to PHP of the Repat an RDF parser.
  10. // This parser based on expat parses RDF files producing events
  11. // proper of RDF documents.
  12. // ##################################################################################
  13. // History:
  14. // 06-13-2002 : First version of this class.
  15. // 07-17-2002 : Minor bugfix (Leandro Mariano Lopez)
  16. // 08-16-2006 : Allowed for user callback function to be in a class
  17. // (Justin Scott)
  18. // 10-05-2017 : Fixed issues with PHP 7 namely the ereg() polyfill
  19. // 12-21-2018 : Fix rdf parser lib for outdated usage of call_user_func
  20. // 02-26-2020 : PHP 7.4 Compat
  21. // ??-??-2021 : PHP 8.x Compat
  22. // 02-17-2022 : Use constants for common chars
  23. // ##################################################################################
  24. // To-Dos:
  25. // Keep it working..
  26. // ##################################################################################
  27. // How to use it:
  28. // Read the documentation in rdf_parser.html
  29. // ##################################################################################
  30. class Rdf_parser {
  31. const EMPTY_STRING = '';
  32. const DOT = '.';
  33. const DASH = '-';
  34. const UNDERSCORE = '_';
  35. const COLON = ':';
  36. const XML_NAMESPACE_URI = 'http://www.w3.org/XML/1998/namespace';
  37. const XML_LANG = 'lang';
  38. const RDF_NAMESPACE_URI = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
  39. const RDF_RDF = 'RDF';
  40. const RDF_DESCRIPTION = 'Description';
  41. const RDF_ID = 'ID';
  42. const RDF_ABOUT = 'about';
  43. const RDF_ABOUT_EACH = 'aboutEach';
  44. const RDF_ABOUT_EACH_PREFIX = 'aboutEachPrefix';
  45. const RDF_BAG_ID = 'bagID';
  46. const RDF_RESOURCE = 'resource';
  47. const RDF_VALUE = 'value';
  48. const RDF_PARSE_TYPE = 'parseType';
  49. const RDF_PARSE_TYPE_LITERAL = 'Literal';
  50. const RDF_PARSE_TYPE_RESOURCE = 'Resource';
  51. const RDF_TYPE = 'type';
  52. const RDF_BAG = 'Bag';
  53. const RDF_SEQ = 'Seq';
  54. const RDF_ALT = 'Alt';
  55. const RDF_LI = 'li';
  56. const RDF_STATEMENT = 'Statement';
  57. const RDF_SUBJECT = 'subject';
  58. const RDF_PREDICATE = 'predicate';
  59. const RDF_OBJECT = 'object';
  60. const NAMESPACE_SEPARATOR_CHAR = '^';
  61. const NAMESPACE_SEPARATOR_STRING = '^';
  62. const IN_TOP_LEVEL = 0;
  63. const IN_RDF = 1;
  64. const IN_DESCRIPTION = 2;
  65. const IN_PROPERTY_UNKNOWN_OBJECT = 3;
  66. const IN_PROPERTY_RESOURCE = 4;
  67. const IN_PROPERTY_EMPTY_RESOURCE = 5;
  68. const IN_PROPERTY_LITERAL = 6;
  69. const IN_PROPERTY_PARSE_TYPE_LITERAL = 7;
  70. const IN_PROPERTY_PARSE_TYPE_RESOURCE = 8;
  71. const IN_XML = 9;
  72. const IN_UNKNOWN = 10;
  73. const RDF_SUBJECT_TYPE_URI = 0;
  74. const RDF_SUBJECT_TYPE_DISTRIBUTED = 1;
  75. const RDF_SUBJECT_TYPE_PREFIX = 2;
  76. const RDF_SUBJECT_TYPE_ANONYMOUS = 3;
  77. const RDF_OBJECT_TYPE_RESOURCE = 0;
  78. const RDF_OBJECT_TYPE_LITERAL = 1;
  79. const RDF_OBJECT_TYPE_XML = 2;
  80. public $rdf_parser;
  81. // --------------------------------------------------------------------------
  82. public function rdf_parser_create($encoding) {
  83. $parser = xml_parser_create_ns($encoding, self::NAMESPACE_SEPARATOR_CHAR);
  84. xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
  85. $this->rdf_parser["xml_parser"] = $parser;
  86. xml_set_object($this->rdf_parser["xml_parser"], $this);
  87. xml_set_element_handler($this->rdf_parser["xml_parser"], "_start_element_handler", "_end_element_handler");
  88. xml_set_character_data_handler($this->rdf_parser["xml_parser"], "_character_data_handler");
  89. return $this->rdf_parser;
  90. }
  91. public function rdf_parser_free() {
  92. $z = 3;
  93. // xml_parser_free( $this->rdf_parser["xml_parser"] );
  94. $this->rdf_parser["base_uri"] = self::EMPTY_STRING;
  95. $this->_delete_elements($this->rdf_parser);
  96. unset($this->rdf_parser);
  97. }
  98. public function rdf_set_user_data(&$user_data) {
  99. $this->rdf_parser["user_data"] = & $user_data;
  100. }
  101. public function rdf_get_user_data() {
  102. return ($this->rdf_parser["$user_data"]);
  103. }
  104. public function rdf_set_statement_handler($handler) {
  105. $this->rdf_parser["statement_handler"] = $handler;
  106. }
  107. public function rdf_set_parse_type_literal_handler($start, $end) {
  108. $this->rdf_parser["start_parse_type_literal_handler"] = $start;
  109. $this->rdf_parser["end_parse_type_literal_handler"] = $end;
  110. }
  111. public function rdf_set_element_handler($start, $end) {
  112. $this->rdf_parser["_start_element_handler"] = $start;
  113. $this->rdf_parser["_end_element_handler"] = $end;
  114. }
  115. public function rdf_set_character_data_handler($handler) {
  116. $this->rdf_parser["_character_data_handler"] = $handler;
  117. }
  118. public function rdf_set_warning_handler($handler) {
  119. $this->rdf_parser["warning_handler"] = $handler;
  120. }
  121. public function rdf_parse($s, $len, $is_final) {
  122. return XML_Parse($this->rdf_parser["xml_parser"], $s, $is_final);
  123. }
  124. public function rdf_get_xml_parser() {
  125. return ($this->rdf_parser["xml_parser"]);
  126. }
  127. public function rdf_set_base($base) {
  128. /* check for out of memory */
  129. $this->rdf_parser["base_uri"] = $base;
  130. return 0;
  131. }
  132. public function rdf_get_base() {
  133. return $this->rdf_parser["base_uri"];
  134. }
  135. public function rdf_resolve_uri($uri_reference, &$buffer) {
  136. _resolve_uri_reference($this->rdf_parser["base_uri"], $uri_reference, $buffer, strlen($buffer));
  137. }
  138. // --------------------------------------------------------------------------
  139. private function _new_element() {
  140. $e["parent"] = Array(); // Parent is a blank Array
  141. //$this->clear_element($e["parent"]);
  142. $e["state"] = 0;
  143. $e["has_property_atributes"] = 0;
  144. $e["has_member_attributes"] = 0;
  145. $e["subject_type"] = 0;
  146. $e["subject"] = self::EMPTY_STRING;
  147. $e["predicate"] = self::EMPTY_STRING;
  148. $e["ordinal"] = 0;
  149. $e["members"] = 0;
  150. $e["data"] = self::EMPTY_STRING;
  151. $e["xml_lang"] = self::EMPTY_STRING;
  152. $e["bag_id"] = self::EMPTY_STRING;
  153. $e["statements"] = 0;
  154. $e["statement_id"] = self::EMPTY_STRING;
  155. return $e;
  156. }
  157. private function _copy_element($source, &$destination) {
  158. if ($source) {
  159. $destination["parent"] = $source;
  160. $destination["state"] = $source["state"];
  161. $destination["xml_lang"] = $source["xml_lang"];
  162. }
  163. }
  164. private function _clear_element(&$e) {
  165. $e["subject"] = self::EMPTY_STRING;
  166. $e["predicate"] = self::EMPTY_STRING;
  167. $e["data"] = self::EMPTY_STRING;
  168. $e["bag_id"] = self::EMPTY_STRING;
  169. $e["statement_id"] = self::EMPTY_STRING;
  170. if (isset($e["parent"])) {
  171. if ($e["parent"]) {
  172. if ($e["parent"]["xml_lang"] != $e["xml_lang"]) {
  173. $e["xml_lang"] = self::EMPTY_STRING;
  174. }
  175. }
  176. else {
  177. $e["xml_lang"] = self::EMPTY_STRING;
  178. }
  179. }
  180. else {
  181. $e["xml_lang"] = self::EMPTY_STRING;
  182. }
  183. //memset( e, 0, strlen( _rdf_element ) );
  184. $e["parent"] = Array();
  185. $e["state"] = 0;
  186. $e["has_property_attributes"] = 0;
  187. $e["has_member_attributes"] = 0;
  188. $e["subject_type"] = 0;
  189. $e["subject"] = self::EMPTY_STRING;
  190. $e["predicate"] = self::EMPTY_STRING;
  191. $e["ordinal"] = 0;
  192. $e["members"] = 0;
  193. $e["data"] = self::EMPTY_STRING;
  194. $e["xml_lang"] = self::EMPTY_STRING;
  195. $e["bag_id"] = self::EMPTY_STRING;
  196. $e["statements"] = 0;
  197. $e["statement_id"] = self::EMPTY_STRING;
  198. }
  199. private function _push_element() {
  200. if (!isset($this->rdf_parser["free"])) {
  201. $this->rdf_parser["free"] = Array();
  202. }
  203. if (count($this->rdf_parser["free"]) > 0) {
  204. $e = $this->rdf_parser["free"];
  205. if (isset($e["parent"])) {
  206. $this->rdf_parser["free"] = $e["parent"];
  207. }
  208. else {
  209. $this->rdf_parser["free"] = $this->_new_element();
  210. }
  211. }
  212. else {
  213. $e = $this->_new_element();
  214. }
  215. if (!isset($this->rdf_parser["top"])) {
  216. $this->rdf_parser["top"] = Array();
  217. }
  218. $this->_copy_element($this->rdf_parser["top"], $e);
  219. $this->rdf_parser["top"] = $e;
  220. }
  221. private function _pop_element() {
  222. $e = $this->rdf_parser["top"];
  223. $this->rdf_parser["top"] = $e["parent"];
  224. $this->_clear_element($e);
  225. $this->rdf_parser["free"] = $e;
  226. }
  227. private function _delete_elements() {
  228. }
  229. private function _is_rdf_property_attribute_resource($local_name) {
  230. return ($local_name == self::RDF_TYPE);
  231. }
  232. private function _is_rdf_property_attribute_literal($local_name) {
  233. return ($local_name == self::RDF_VALUE);
  234. }
  235. private function _is_rdf_ordinal($local_name) {
  236. $ordinal = - 1;
  237. if ($local_name[0] == self::UNDERSCORE) {
  238. $ordinal = substr($local_name, 1) + 1;
  239. }
  240. return ($ordinal > 0) ? $ordinal : 0;
  241. }
  242. private function _is_rdf_property_attribute($local_name) {
  243. return $this->_is_rdf_property_attribute_resource($local_name) || $this->_is_rdf_property_attribute_literal($local_name);
  244. }
  245. private function _is_rdf_property_element($local_name) {
  246. return
  247. ($local_name == self::RDF_TYPE) || ($local_name == self::RDF_SUBJECT) || ($local_name == self::RDF_PREDICATE) ||
  248. ($local_name == self::RDF_OBJECT) || ($local_name == self::RDF_VALUE) || ($local_name == self::RDF_LI) ||
  249. ($local_name[0] == self::UNDERSCORE);
  250. }
  251. private function _istalnum($val) {
  252. return preg_match("/[A-Za-z0-9]/", $val);
  253. }
  254. private function _istalpha($val) {
  255. return preg_match("/[A-Za-z]/", $val);
  256. }
  257. private function _is_absolute_uri($uri) {
  258. $result = false;
  259. $uri_p = 0;
  260. if ($uri && $this->_istalpha($uri[$uri_p])) {
  261. ++$uri_p;
  262. while (($uri_p < strlen($uri)) && ($this->_istalnum($uri[$uri_p]) || ($uri[$uri_p] == '+') || ($uri[$uri_p] == self::DASH) || ($uri[$uri_p] == self::DOT))) {
  263. ++$uri_p;
  264. }
  265. $result = ($uri[$uri_p] == self::COLON);
  266. }
  267. return $result;
  268. }
  269. /*
  270. This function returns an associative array returning any of the various components of the URL that are present. This includes the
  271. $arr=parse_url($url)
  272. scheme - e.g. http
  273. host
  274. port
  275. user
  276. pass
  277. path
  278. query - after the question mark ?
  279. fragment - after the hashmark #
  280. */
  281. private function _parse_uri($uri, $buffer, $len, &$scheme, &$authority, &$path, &$query, &$fragment) {
  282. $parsed = parse_url($uri);
  283. if (isset($parsed["scheme"])) {
  284. $scheme = $parsed["scheme"];
  285. }
  286. else {
  287. $scheme = self::EMPTY_STRING;
  288. }
  289. if (isset($parsed["host"])) {
  290. $host = $parsed["host"];
  291. }
  292. else {
  293. $host = self::EMPTY_STRING;
  294. }
  295. if (isset($parsed["host"])) {
  296. $authority = $parsed["host"];
  297. }
  298. else {
  299. $authority = self::EMPTY_STRING;
  300. }
  301. if (isset($parsed["path"])) {
  302. $path = $parsed["path"];
  303. }
  304. else {
  305. $path = self::EMPTY_STRING;
  306. }
  307. if (isset($parsed["query"])) {
  308. $query = $parsed["query"];
  309. }
  310. else {
  311. $query = self::EMPTY_STRING;
  312. }
  313. if (isset($parsed["fragment"])) {
  314. $fragment = $parsed["fragment"];
  315. }
  316. else {
  317. $fragment = self::EMPTY_STRING;
  318. }
  319. }
  320. private function _resolve_uri_reference($base_uri, $reference_uri, &$buffer, $length) {
  321. $base_buffer = self::EMPTY_STRING;
  322. $reference_buffer = self::EMPTY_STRING;
  323. $path_buffer = self::EMPTY_STRING;
  324. $buffer = self::EMPTY_STRING;
  325. $this->_parse_uri($reference_uri, $reference_buffer, strlen($reference_buffer) , $reference_scheme, $reference_authority, $reference_path, $reference_query, $reference_fragment);
  326. if ($reference_scheme == self::EMPTY_STRING && $reference_authority == self::EMPTY_STRING && $reference_path == self::EMPTY_STRING && $reference_query == self::EMPTY_STRING) {
  327. $buffer = $base_uri;
  328. if ($reference_fragment != self::EMPTY_STRING) {
  329. $buffer .= "#";
  330. $buffer .= $reference_fragment;
  331. }
  332. }
  333. elseif ($reference_scheme != self::EMPTY_STRING) {
  334. $buffer = $reference_uri;
  335. }
  336. else {
  337. $this->_parse_uri($base_uri, $base_buffer, strlen($base_buffer) , $base_scheme, $base_authority, $base_path, $base_query, $base_fragment);
  338. $result_scheme = $base_scheme;
  339. if ($reference_authority != self::EMPTY_STRING) {
  340. $result_authority = $reference_authority;
  341. }
  342. else {
  343. $result_authority = $base_authority;
  344. if ($reference_path != self::EMPTY_STRING && (($reference_path[0] == '/') || ($reference_path[0] == '\\'))) {
  345. $result_path = $reference_path;
  346. }
  347. else {
  348. $p = self::EMPTY_STRING;
  349. $result_path = $path_buffer;
  350. $path_buffer = self::EMPTY_STRING;
  351. $p = strstr($base_path, '/');
  352. if (!$p) {
  353. $p = strstr($base_path, '\\');
  354. }
  355. if ($p) {
  356. $path_buffer .= $base_path;
  357. //while( s <= p )
  358. //{
  359. // *d++ = *s++;
  360. //}
  361. //*d++ = 0;
  362. }
  363. if ($reference_path != self::EMPTY_STRING) {
  364. $path_buffer .= $reference_path;
  365. }
  366. //remove all occurrences of "./"
  367. //print($path_buffer);
  368. $path_buffer = preg_replace("/\/\.\//", "/", $path_buffer);
  369. $path_buffer = preg_replace("/\/([^\/\.])*\/..$/", "/", $path_buffer);
  370. while (preg_match("/\.\./", $path_buffer)) {
  371. $path_buffer = preg_replace("/\/([^\/\.]*)\/..\//", "/", $path_buffer);
  372. }
  373. $path_buffer = preg_replace("/\.$/", "", $path_buffer);
  374. }
  375. }
  376. // This replaces the C pointer assignament
  377. $result_path = $path_buffer;
  378. if ($result_scheme != self::EMPTY_STRING) {
  379. $buffer = $result_scheme;
  380. $buffer .= ":";
  381. }
  382. if ($result_authority != self::EMPTY_STRING) {
  383. $buffer .= "//";
  384. $buffer .= $result_authority;
  385. }
  386. if ($result_path != self::EMPTY_STRING) {
  387. $buffer .= $result_path;
  388. }
  389. if ($reference_query != self::EMPTY_STRING) {
  390. $buffer .= "?";
  391. $buffer .= $reference_query;
  392. }
  393. if ($reference_fragment != self::EMPTY_STRING) {
  394. $buffer .= "#";
  395. $buffer .= $reference_fragment;
  396. }
  397. }
  398. }
  399. private function is_valid_id($id) {
  400. $result = false;
  401. $p = $id;
  402. $p_p = 0;
  403. if ($id != self::EMPTY_STRING) {
  404. if ($this->_istalpha($p) || $p[0] == self::UNDERSCORE || $p[0] == self::COLON) {
  405. $result = true;
  406. while ($result != false && ($p[++$p_p] != 0)) {
  407. if (!($this->_istalnum($p[$p_p]) || $p[$p_p] == self::DOT || $p[$p_p] == self::DASH || $p[$p_p] == self::UNDERSCORE || $p[$p_p] == self::COLON)) {
  408. $result = false;
  409. }
  410. }
  411. }
  412. }
  413. return $result;
  414. }
  415. private function _resolve_id($id, &$buffer, $length) {
  416. $id_buffer = self::EMPTY_STRING;
  417. if ($this->is_valid_id($id) == true) {
  418. $id_buffer = "#$id";
  419. }
  420. else {
  421. $this->report_warning("bad ID attribute: " . $id_buffer . "#_bad_ID_attribute_");
  422. }
  423. $this->_resolve_uri_reference($this->rdf_parser["base_uri"], $id_buffer, $buffer, $length);
  424. }
  425. private function _split_name($name, &$buffer, $len, &$namespace_uri, &$local_name) {
  426. static $nul = 0;
  427. $buffer = $name;
  428. if (strstr($buffer, self::NAMESPACE_SEPARATOR_CHAR)) {
  429. $cosas = explode(self::NAMESPACE_SEPARATOR_CHAR, $buffer);
  430. $namespace_uri = $cosas[0];
  431. $local_name = $cosas[1];
  432. }
  433. else {
  434. if (($buffer[0] == 'x') && ($buffer[1] == 'm') && ($buffer[2] == 'l') && ($buffer[3] == self::COLON)) {
  435. $namespace_uri = self::XML_NAMESPACE_URI;
  436. $local_name = substr($buffer, 4);
  437. }
  438. else {
  439. $namespace_uri = self::EMPTY_STRING;
  440. $local_name = $buffer;
  441. }
  442. }
  443. }
  444. private function _generate_anonymous_uri(&$buf, $len) {
  445. $id = self::EMPTY_STRING;
  446. if (!isset($this->rdf_parser["anonymous_id"])) {
  447. $this->rdf_parser["anonymous_id"] = 0;
  448. }
  449. $this->rdf_parser["anonymous_id"]++;
  450. $id = "#genid" . $this->rdf_parser["anonymous_id"];
  451. $this->_resolve_uri_reference($this->rdf_parser["base_uri"], $id, $buf, $len);
  452. }
  453. private function _report_statement($subject_type, $subject, $predicate, $ordinal, $object_type, $object, $xml_lang, $bag_id, $statements, $statement_id) {
  454. $statement_id_type = self::RDF_SUBJECT_TYPE_URI;
  455. $statement_id_buffer = self::EMPTY_STRING;
  456. $predicate_buffer = self::EMPTY_STRING;
  457. if ($this->rdf_parser["statement_handler"]) {
  458. $this->rdf_parser["user_data"] = call_user_func_array($this->rdf_parser["statement_handler"], array(&$this->rdf_parser["user_data"], $subject_type, $subject, $predicate, $ordinal, $object_type, $object, $xml_lang));
  459. // $this->rdf_parser["statement_handler"]($this->rdf_parser["user_data"],$subject_type,$subject,$predicate,$ordinal,$object_type,$object,$xml_lang )
  460. if ($bag_id) {
  461. if ($statements == self::EMPTY_STRING) {
  462. $this->_report_statement(self::RDF_SUBJECT_TYPE_URI, $bag_id, self::RDF_NAMESPACE_URI . self::RDF_TYPE, 0, self::RDF_OBJECT_TYPE_RESOURCE, self::RDF_NAMESPACE_URI . self::RDF_BAG, self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING);
  463. }
  464. if (!$statement_id) {
  465. $statement_id_type = self::RDF_SUBJECT_TYPE_ANONYMOUS;
  466. $this->_generate_anonymous_uri($statement_id_buffer, strlen($statement_id_buffer));
  467. $statement_id = $statement_id_buffer;
  468. }
  469. $statements++;
  470. $predicate_buffer = "self::RDF_NAMESPACE_URI_" . $statements;
  471. $this->_report_statement(self::RDF_SUBJECT_TYPE_URI, $bag_id, $predicate_buffer, $statements, self::RDF_OBJECT_TYPE_RESOURCE, $statement_id, self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING);
  472. }
  473. if ($statement_id) {
  474. // rdf:type = rdf:Statement
  475. $this->_report_statement(
  476. $statement_id_type,
  477. $statement_id,
  478. self::RDF_NAMESPACE_URI . self::RDF_TYPE,
  479. 0,
  480. self::RDF_OBJECT_TYPE_RESOURCE,
  481. self::RDF_NAMESPACE_URI . self::RDF_STATEMENT,
  482. self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING
  483. );
  484. // rdf:subject
  485. $this->_report_statement(
  486. $statement_id_type,
  487. $statement_id,
  488. self::RDF_NAMESPACE_URI . self::RDF_SUBJECT,
  489. 0,
  490. self::RDF_OBJECT_TYPE_RESOURCE,
  491. $subject,
  492. self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING
  493. );
  494. // rdf:predicate
  495. $this->_report_statement(
  496. $statement_id_type,
  497. $statement_id,
  498. self::RDF_NAMESPACE_URI . self::RDF_PREDICATE,
  499. 0,
  500. self::RDF_OBJECT_TYPE_RESOURCE,
  501. $predicate,
  502. self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING
  503. );
  504. // rdf:object
  505. $this->_report_statement(
  506. $statement_id_type,
  507. $statement_id,
  508. self::RDF_NAMESPACE_URI . self::RDF_OBJECT,
  509. 0,
  510. $object_type,
  511. $object,
  512. self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING, self::EMPTY_STRING
  513. );
  514. }
  515. }
  516. }
  517. private function _report_start_parse_type_literal() {
  518. if ($this->rdf_parser["start_parse_type_literal_handler"]) {
  519. $this->rdf_parser["start_parse_type_literal_handler"]($this->rdf_parser["user_data"]);
  520. }
  521. }
  522. private function _report_end_parse_type_literal() {
  523. if ($this->rdf_parser["end_parse_type_literal_handler"]) {
  524. $this->rdf_parser["end_parse_type_literal_handler"]($this->rdf_parser["user_data"]);
  525. }
  526. }
  527. private function _handle_property_attributes($subject_type, $subject, $attributes, $xml_lang, $bag_id, $statements) {
  528. $i = 0;
  529. $attribute = self::EMPTY_STRING;
  530. $predicate = self::EMPTY_STRING;
  531. $attribute_namespace_uri = self::EMPTY_STRING;
  532. $attribute_local_name = self::EMPTY_STRING;
  533. $attribute_value = self::EMPTY_STRING;
  534. $ordinal = 0;
  535. for ($i = 0;isset($attributes[$i]);$i += 2) {
  536. $this->_split_name($attributes[$i], $attribute, strlen($attribute) , $attribute_namespace_uri, $attribute_local_name);
  537. $attribute_value = $attributes[$i + 1];
  538. $predicate = $attribute_namespace_uri;
  539. $predicate .= $attribute_local_name;
  540. if (self::RDF_NAMESPACE_URI == $attribute_namespace_uri) {
  541. if ($this->_is_rdf_property_attribute_literal($attribute_local_name)) {
  542. $this->_report_statement($subject_type, $subject, $predicate, 0, self::RDF_OBJECT_TYPE_LITERAL, $attribute_value, $xml_lang, $bag_id, $statements, self::EMPTY_STRING);
  543. }
  544. elseif ($this->_is_rdf_property_attribute_resource($attribute_local_name)) {
  545. $this->_report_statement($subject_type, $subject, $predicate, 0, self::RDF_OBJECT_TYPE_RESOURCE, $attribute_value, self::EMPTY_STRING, $bag_id, $statements, self::EMPTY_STRING);
  546. }
  547. elseif (($ordinal = $this->_is_rdf_ordinal($attribute_local_name)) != 0) {
  548. $this->_report_statement($subject_type, $subject, $predicate, $ordinal, self::RDF_OBJECT_TYPE_LITERAL, $attribute_value, $xml_lang, $bag_id, $statements, self::EMPTY_STRING);
  549. }
  550. }
  551. elseif (self::XML_NAMESPACE_URI == $attribute_namespace_uri) {
  552. //do nothing
  553. }
  554. elseif ($attribute_namespace_uri) {
  555. // is it required that property attributes be in an explicit namespace?
  556. $this->_report_statement($subject_type, $subject, $predicate, 0, self::RDF_OBJECT_TYPE_LITERAL, $attribute_value, $xml_lang, $bag_id, $statements, self::EMPTY_STRING);
  557. }
  558. }
  559. }
  560. private function _report_start_element($name, $attributes) {
  561. if (isset($this->rdf_parser["start_element_handler"])) {
  562. $this->rdf_parser["start_element_handler"]($this->rdf_parser["user_data"], $name, $attributes);
  563. }
  564. }
  565. private function _report_end_element($name) {
  566. if (isset($this->rdf_parser["end_element_handler"])) {
  567. $this->rdf_parser["end_element_handler"]($this->rdf_parser["user_data"], $name);
  568. }
  569. }
  570. private function _report_character_data($s, $len) {
  571. if (isset($this->rdf_parser["character_data_handler"])) {
  572. $this->rdf_parser["character_data_handler"]($this->rdf_parser["user_data"], $s, $len);
  573. }
  574. }
  575. private function _report_warning($warning) {
  576. // rdf_parser->top->state = self::IN_UNKNOWN;
  577. if (isset($this->rdf_parser["warning_handler"])) {
  578. $this->rdf_parser["warning_handler"]($warning);
  579. }
  580. }
  581. private function _handle_resource_element($namespace_uri, $local_name, $attributes, $parent) {
  582. $subjects_found = 0;
  583. $aux = $attributes;
  584. $aux2 = Array();
  585. foreach ($attributes as $atkey => $atvalue) {
  586. $aux2[] = $atkey;
  587. $aux2[] = $atvalue;
  588. }
  589. $attributes = $aux2;
  590. $id = self::EMPTY_STRING;
  591. $about = self::EMPTY_STRING;
  592. $about_each = self::EMPTY_STRING;
  593. $about_each_prefix = self::EMPTY_STRING;
  594. $bag_id = self::EMPTY_STRING;
  595. $i = 0;
  596. $attribute = self::EMPTY_STRING;
  597. $attribute_namespace_uri = self::EMPTY_STRING;
  598. $attribute_local_name = self::EMPTY_STRING;
  599. $attribute_value = self::EMPTY_STRING;
  600. $id_buffer = self::EMPTY_STRING;
  601. $type = self::EMPTY_STRING;
  602. $this->rdf_parser["top"]["has_property_attributes"] = false;
  603. $this->rdf_parser["top"]["has_member_attributes"] = false;
  604. // examine each attribute for the standard RDF "keywords"
  605. for ($i = 0;isset($attributes[$i]);$i += 2) {
  606. $this->_split_name($attributes[$i], $attribute, strlen($attribute) , $attribute_namespace_uri, $attribute_local_name);
  607. $attribute_value = $attributes[$i + 1];
  608. // if the attribute is not in any namespace
  609. // or the attribute is in the RDF namespace
  610. if (($attribute_namespace_uri == self::EMPTY_STRING) || ($attribute_namespace_uri == self::RDF_NAMESPACE_URI)) {
  611. if ($attribute_local_name == self::RDF_ID) {
  612. $id = $attribute_value;
  613. ++$subjects_found;
  614. }
  615. elseif ($attribute_local_name == self::RDF_ABOUT) {
  616. $about = $attribute_value;
  617. ++$subjects_found;
  618. }
  619. elseif ($attribute_local_name == self::RDF_ABOUT_EACH) {
  620. $about_each = $attribute_value;
  621. ++$subjects_found;
  622. }
  623. elseif ($attribute_local_name == self::RDF_ABOUT_EACH_PREFIX) {
  624. $about_each_prefix = $attribute_value;
  625. ++$subjects_found;
  626. }
  627. elseif ($attribute_local_name == self::RDF_BAG_ID) {
  628. $bag_id = $attribute_value;
  629. }
  630. elseif ($this->_is_rdf_property_attribute($attribute_local_name)) {
  631. $this->rdf_parser["top"]["has_property_attributes"] = true;
  632. }
  633. elseif ($this->_is_rdf_ordinal($attribute_local_name)) {
  634. $this->rdf_parser["top"]["has_property_attributes"] = true;
  635. $this->rdf_parser["top"]["has_member_attributes"] = true;
  636. }
  637. else {
  638. $this->_report_warning("unknown or out of context rdf attribute:" . $attribute_local_name);
  639. }
  640. }
  641. elseif ($attribute_namespace_uri == self::XML_NAMESPACE_URI) {
  642. if ($attribute_local_name == self::XML_LANG) {
  643. $this->rdf_parser["top"]["xml_lang"] = $attribute_value;
  644. }
  645. }
  646. elseif ($attribute_namespace_uri) {
  647. $this->rdf_parser["top"]["has_property_attributes"] = true;
  648. }
  649. }
  650. // if no subjects were found, generate one.
  651. if ($subjects_found == 0) {
  652. $this->_generate_anonymous_uri($id_buffer, strlen($id_buffer));
  653. $this->rdf_parser["top"]["subject"] = $id_buffer;
  654. $this->rdf_parser["top"]["subject_type"] = self::RDF_SUBJECT_TYPE_ANONYMOUS;
  655. }
  656. elseif ($subjects_found > 1) {
  657. $this->_report_warning("ID, about, aboutEach, and aboutEachPrefix are mutually exclusive");
  658. return;
  659. }
  660. elseif ($id) {
  661. $this->_resolve_id($id, $id_buffer, strlen($id_buffer));
  662. $this->rdf_parser["top"]["subject_type"] = self::RDF_SUBJECT_TYPE_URI;
  663. $this->rdf_parser["top"]["subject"] = $id_buffer;
  664. }
  665. elseif ($about) {
  666. $this->_resolve_uri_reference($this->rdf_parser["base_uri"], $about, $id_buffer, strlen($id_buffer));
  667. $this->rdf_parser["top"]["subject_type"] = self::RDF_SUBJECT_TYPE_URI;
  668. $this->rdf_parser["top"]["subject"] = $id_buffer;
  669. }
  670. elseif ($about_each) {
  671. $this->rdf_parser["top"]["subject_type"] = self::RDF_SUBJECT_TYPE_DISTRIBUTED;
  672. $this->rdf_parser["top"]["subject"] = $about_each;
  673. }
  674. elseif ($about_each_prefix) {
  675. $this->rdf_parser["top"]["subject_type"] = self::RDF_SUBJECT_TYPE_PREFIX;
  676. $this->rdf_parser["top"]["subject"] = $about_each_prefix;
  677. }
  678. // if the subject is empty, assign it the document uri
  679. if ($this->rdf_parser["top"]["subject"] == self::EMPTY_STRING) {
  680. $len = 0;
  681. $this->rdf_parser["top"]["subject"] = $this->rdf_parser["base_uri"];
  682. // now remove the trailing '#'
  683. $len = strlen($this->rdf_parser["top"]["subject"]);
  684. }
  685. if ($bag_id) {
  686. $this->_resolve_id($bag_id, $id_buffer, strlen($id_buffer));
  687. $this->rdf_parser["top"]["bag_id"] = $id_buffer;
  688. }
  689. // only report the type for non-rdf:Description elements.
  690. if (($local_name != self::RDF_DESCRIPTION) || ($namespace_uri != self::RDF_NAMESPACE_URI)) {
  691. $type = $namespace_uri;
  692. $type .= $local_name;
  693. $this->_report_statement(
  694. $this->rdf_parser["top"]["subject_type"],
  695. $this->rdf_parser["top"]["subject"],
  696. self::RDF_NAMESPACE_URI . self::RDF_TYPE,
  697. 0,
  698. self::RDF_OBJECT_TYPE_RESOURCE,
  699. $type,
  700. self::EMPTY_STRING,
  701. $this->rdf_parser["top"]["bag_id"],
  702. $this->rdf_parser["top"]["statements"],
  703. self::EMPTY_STRING
  704. );
  705. }
  706. // if this element is the child of some property,
  707. // report the appropriate statement.
  708. if ($parent) {
  709. $this->_report_statement(
  710. $parent["parent"]["subject_type"],
  711. $parent["parent"]["subject"],
  712. $parent["predicate"],
  713. $parent["ordinal"],
  714. self::RDF_OBJECT_TYPE_RESOURCE,
  715. $this->rdf_parser["top"]["subject"],
  716. self::EMPTY_STRING,
  717. $parent["parent"]["bag_id"],
  718. $parent["parent"]["statements"],
  719. $parent["statement_id"]
  720. );
  721. }
  722. if ($this->rdf_parser["top"]["has_property_attributes"]) {
  723. $this->_handle_property_attributes(
  724. $this->rdf_parser["top"]["subject_type"],
  725. $this->rdf_parser["top"]["subject"],
  726. $attributes,
  727. $this->rdf_parser["top"]["xml_lang"],
  728. $this->rdf_parser["top"]["bag_id"],
  729. $this->rdf_parser["top"]["statements"]
  730. );
  731. }
  732. }
  733. private function _handle_property_element(&$namespace_uri, &$local_name, &$attributes) {
  734. $buffer = self::EMPTY_STRING;
  735. $i = 0;
  736. $aux = $attributes;
  737. $aux2 = Array();
  738. foreach ($attributes as $atkey => $atvalue) {
  739. $aux2[] = $atkey;
  740. $aux2[] = $atvalue;
  741. }
  742. $attributes = $aux2;
  743. $attribute_namespace_uri = self::EMPTY_STRING;
  744. $attribute_local_name = self::EMPTY_STRING;
  745. $attribute_value = self::EMPTY_STRING;
  746. $resource = self::EMPTY_STRING;
  747. $statement_id = self::EMPTY_STRING;
  748. $bag_id = self::EMPTY_STRING;
  749. $parse_type = self::EMPTY_STRING;
  750. $this->rdf_parser["top"]["ordinal"] = 0;
  751. if ($namespace_uri == self::RDF_NAMESPACE_URI) {
  752. if (($this->rdf_parser["top"]["ordinal"] = ($this->_is_rdf_ordinal($local_name)) != 0)) {
  753. if ($this->rdf_parser["top"]["ordinal"] > $this->rdf_parser["top"]["parent"]["members"]) {
  754. $this->rdf_parser["top"]["parent"]["members"] = $this->rdf_parser["top"]["ordinal"];
  755. }
  756. }
  757. elseif (!$this->_is_rdf_property_element($local_name)) {
  758. $this->_report_warning("unknown or out of context rdf property element: " . $local_name);
  759. return;
  760. }
  761. }
  762. $buffer = $namespace_uri;
  763. if (($namespace_uri == self::RDF_NAMESPACE_URI) && ($local_name == self::RDF_LI)) {
  764. //$ordinal=self::EMPTY_STRING;
  765. $this->rdf_parser["top"]["parent"]["members"]++;
  766. $this->rdf_parser["top"]["ordinal"] = $this->rdf_parser["top"]["parent"]["members"];
  767. $this->rdf_parser["top"]["ordinal"] = $this->rdf_parser["top"]["ordinal"];
  768. //$ordinal{ 0 } = self::UNDERSCORE ;
  769. $buffer .= self::UNDERSCORE . $this->rdf_parser["top"]["ordinal"];
  770. }
  771. else {
  772. $buffer .= $local_name;
  773. }
  774. $this->rdf_parser["top"]["predicate"] = $buffer;
  775. $this->rdf_parser["top"]["has_property_attributes"] = false;
  776. $this->rdf_parser["top"]["has_member_attributes"] = false;
  777. for ($i = 0;isset($attributes[$i]);$i += 2) {
  778. $this->_split_name($attributes[$i], $buffer, strlen($buffer) , $attribute_namespace_uri, $attribute_local_name);
  779. $attribute_value = $attributes[$i + 1];
  780. // if the attribute is not in any namespace
  781. // or the attribute is in the RDF namespace
  782. if (($attribute_namespace_uri == self::EMPTY_STRING) || ($attribute_namespace_uri == self::RDF_NAMESPACE_URI)) {
  783. if (($attribute_local_name == self::RDF_ID)) {
  784. $statement_id = $attribute_value;
  785. }
  786. elseif ($attribute_local_name == self::RDF_PARSE_TYPE) {
  787. $parse_type = $attribute_value;
  788. }
  789. elseif ($attribute_local_name == self::RDF_RESOURCE) {
  790. $resource = $attribute_value;
  791. }
  792. elseif ($attribute_local_name == self::RDF_BAG_ID) {
  793. $bag_id = $attribute_value;
  794. }
  795. elseif ($this->_is_rdf_property_attribute($attribute_local_name)) {
  796. $this->rdf_parser["top"]["has_property_attributes"] = true;
  797. }
  798. else {
  799. $this->_report_warning("unknown rdf attribute: " . $attribute_local_name);
  800. return;
  801. }
  802. }
  803. elseif ($attribute_namespace_uri == self::XML_NAMESPACE_URI) {
  804. if ($attribute_local_name == self::XML_LANG) {
  805. $this->rdf_parser["top"]["xml_lang"] = $attribute_value;
  806. }
  807. }
  808. elseif ($attribute_namespace_uri) {
  809. $this->rdf_parser["top"]["has_property_attributes"] = true;
  810. }
  811. }
  812. // this isn't allowed by the M&S but I think it should be
  813. if ($statement_id && $resource) {
  814. $this->_report_warning("rdf:ID and rdf:resource are mutually exclusive");
  815. return;
  816. }
  817. if ($statement_id) {
  818. $this->_resolve_id($statement_id, $buffer, strlen($buffer));
  819. $this->rdf_parser["top"]["statement_id"] = $buffer;
  820. }
  821. if ($parse_type) {
  822. if ($resource) {
  823. $this->_report_warning("property elements with rdf:parseType do not allow rdf:resource");
  824. return;
  825. }
  826. if ($bag_id) {
  827. $this->_report_warning("property elements with rdf:parseType do not allow rdf:bagID");
  828. return;
  829. }
  830. if ($this->rdf_parser["top"]["has_property_attributes"]) {
  831. $this->_report_warning("property elements with rdf:parseType do not allow property attributes");
  832. return;
  833. }
  834. if ($attribute_value == self::RDF_PARSE_TYPE_RESOURCE) {
  835. $this->_generate_anonymous_uri($buffer, strlen($buffer));
  836. // since we are sure that this is now a resource property we can report it
  837. $this->_report_statement(
  838. $this->rdf_parser["top"]["parent"]["subject_type"],
  839. $this->rdf_parser["top"]["parent"]["subject"],
  840. $this->rdf_parser["top"]["predicate"],
  841. 0,
  842. self::RDF_OBJECT_TYPE_RESOURCE,
  843. $buffer,
  844. self::EMPTY_STRING,
  845. $this->rdf_parser["top"]["parent"]["bag_id"],
  846. $this->rdf_parser["top"]["parent"]["statements"],
  847. $statement_id
  848. );
  849. $this->_push_element();
  850. $this->rdf_parser["top"]["state"] = self::IN_PROPERTY_PARSE_TYPE_RESOURCE;
  851. $this->rdf_parser["top"]["subject_type"] = self::RDF_SUBJECT_TYPE_ANONYMOUS;
  852. $this->rdf_parser["top"]["subject"] = $buffer;
  853. $this->rdf_parser["top"]["bag_id"] = self::EMPTY_STRING;
  854. }
  855. else {
  856. $this->_report_statement(
  857. $this->rdf_parser["top"]["parent"]["subject_type"],
  858. $this->rdf_parser["top"]["parent"]["subject"],
  859. $this->rdf_parser["top"]["predicate"],
  860. 0,
  861. self::RDF_OBJECT_TYPE_XML,
  862. self::EMPTY_STRING,
  863. self::EMPTY_STRING,
  864. $this->rdf_parser["top"]["parent"]["bag_id"],
  865. $this->rdf_parser["top"]["parent"]["statements"],
  866. $statement_id
  867. );
  868. $this->rdf_parser["top"]["state"] = self::IN_PROPERTY_PARSE_TYPE_LITERAL;
  869. $this->_report_start_parse_type_literal();
  870. }
  871. }
  872. elseif ($resource || $bag_id || $this->rdf_parser["top"]["has_property_attributes"]) {
  873. if ($resource != self::EMPTY_STRING) {
  874. $subject_type = self::RDF_SUBJECT_TYPE_URI;
  875. $this->_resolve_uri_reference($this->rdf_parser["base_uri"], $resource, $buffer, strlen($buffer));
  876. }
  877. else {
  878. $subject_type = self::RDF_SUBJECT_TYPE_ANONYMOUS;
  879. $this->_generate_anonymous_uri($buffer, strlen($buffer));
  880. }
  881. $this->rdf_parser["top"]["state"] = self::IN_PROPERTY_EMPTY_RESOURCE;
  882. // since we are sure that this is now a resource property we can report it.
  883. $this->_report_statement(
  884. $this->rdf_parser["top"]["parent"]["subject_type"],
  885. $this->rdf_parser["top"]["parent"]["subject"],
  886. $this->rdf_parser["top"]["predicate"],
  887. $this->rdf_parser["top"]["ordinal"],
  888. self::RDF_OBJECT_TYPE_RESOURCE,
  889. $buffer,
  890. self::EMPTY_STRING,
  891. $this->rdf_parser["top"]["parent"]["bag_id"],
  892. $this->rdf_parser["top"]["parent"]["statements"],
  893. self::EMPTY_STRING
  894. ); // should we allow IDs?
  895. if ($bag_id) {
  896. $this->_resolve_id($bag_id, $buffer, strlen($buffer));
  897. $this->rdf_parser["top"]["bag_id"] = $buffer;
  898. }
  899. if ($this->rdf_parser["top"]["has_property_attributes"]) {
  900. $this->_handle_property_attributes(
  901. $subject_type,
  902. $buffer,
  903. $attributes,
  904. $this->rdf_parser["top"]["xml_lang"],
  905. $this->rdf_parser["top"]["bag_id"],
  906. $this->rdf_parser["top"]["statements"]
  907. );
  908. }
  909. }
  910. }
  911. private function _start_element_handler($parser, $name, $attributes) {
  912. $buffer = self::EMPTY_STRING;
  913. $namespace_uri = self::EMPTY_STRING;
  914. $local_name = self::EMPTY_STRING;
  915. $this->_push_element();
  916. $this->_split_name($name, $buffer, strlen($buffer) , $namespace_uri, $local_name);
  917. switch ($this->rdf_parser["top"]["state"]) {
  918. case self::IN_TOP_LEVEL:
  919. if (self::RDF_NAMESPACE_URI . self::NAMESPACE_SEPARATOR_STRING . self::RDF_RDF == $name) {
  920. $this->rdf_parser["top"]["state"] = self::IN_RDF;
  921. }
  922. else {
  923. $this->_report_start_element($name, $attributes);
  924. }
  925. break;
  926. case self::IN_RDF:
  927. $this->rdf_parser["top"]["state"] = self::IN_DESCRIPTION;
  928. $this->_handle_resource_element($namespace_uri, $local_name, $attributes, self::EMPTY_STRING);
  929. break;
  930. case self::IN_DESCRIPTION:
  931. case self::IN_PROPERTY_PARSE_TYPE_RESOURCE:
  932. $this->rdf_parser["top"]["state"] = self::IN_PROPERTY_UNKNOWN_OBJECT;
  933. $this->_handle_property_element($namespace_uri, $local_name, $attributes);
  934. break;
  935. case self::IN_PROPERTY_UNKNOWN_OBJECT:
  936. /* if we're in a property with an unknown object type and we encounter
  937. an element, the object must be a resource, */
  938. $this->rdf_parser["top"]["data"] = self::EMPTY_STRING;
  939. $this->rdf_parser["top"]["parent"]["state"] = self::IN_PROPERTY_RESOURCE;
  940. $this->rdf_parser["top"]["state"] = self::IN_DESCRIPTION;
  941. $this->_handle_resource_element(
  942. $namespace_uri,
  943. $local_name,
  944. $attributes,
  945. $this->rdf_parser["top"]["parent"]
  946. );
  947. break;
  948. case self::IN_PROPERTY_LITERAL:
  949. $this->_report_warning("no markup allowed in literals");
  950. break;
  951. case self::IN_PROPERTY_PARSE_TYPE_LITERAL:
  952. $this->rdf_parser["top"]["state"] = self::IN_XML;
  953. /* fall through */
  954. case self::IN_XML:
  955. $this->_report_start_element($name, $attributes);
  956. break;
  957. case self::IN_PROPERTY_RESOURCE:
  958. $this->_report_warning("only one element allowed inside a property element");
  959. break;
  960. case self::IN_PROPERTY_EMPTY_RESOURCE:
  961. $this->_report_warning("no content allowed in property with rdf:resource, rdf:bagID, or property attributes");
  962. break;
  963. case self::IN_UNKNOWN:
  964. break;
  965. }
  966. }
  967. /*
  968. this is only called when we're in the self::IN_PROPERTY_UNKNOWN_OBJECT state.
  969. the only time we won't know what type of object a statement has is
  970. when we encounter property statements without property attributes or
  971. content:
  972. <foo:property />
  973. <foo:property ></foo:property>
  974. <foo:property> </foo:property>
  975. notice that the state doesn't switch to self::IN_PROPERTY_LITERAL when
  976. there is only whitespace between the start and end tags. this isn't
  977. a very useful statement since the object is anonymous and can't
  978. have any statements with it as the subject but it is allowed.
  979. */
  980. private function _end_empty_resource_property() {
  981. $buffer = self::EMPTY_STRING;
  982. $this->_generate_anonymous_uri($buffer, strlen($buffer));
  983. $this->_report_statement(
  984. $this->rdf_parser["top"]["parent"]["subject_type"],
  985. $this->rdf_parser["top"]["parent"]["subject"],
  986. $this->rdf_parser["top"]["predicate"],
  987. $this->rdf_parser["top"]["ordinal"],
  988. self::RDF_OBJECT_TYPE_RESOURCE,
  989. $buffer, $this->rdf_parser["top"]["xml_lang"],
  990. $this->rdf_parser["top"]["parent"]["bag_id"],
  991. $this->rdf_parser["top"]["parent"]["statements"],
  992. $this->rdf_parser["top"]["statement_id"]
  993. );
  994. }
  995. /*
  996. property elements with text only as content set the state to
  997. self::IN_PROPERTY_LITERAL. as character data is received from expat,
  998. it is saved in a buffer and reported when the end tag is
  999. received.
  1000. */
  1001. private function _end_literal_property() {
  1002. if (!isset($this->rdf_parser["top"]["statement_id"])) {
  1003. $this->rdf_parser["top"]["statement_id"] = self::EMPTY_STRING;
  1004. }
  1005. if (!isset($this->rdf_parser["top"]["parent"]["subject_type"])) {
  1006. $this->rdf_parser["top"]["parent"]["subject_type"] = self::EMPTY_STRING;
  1007. }
  1008. if (!isset($this->rdf_parser["top"]["parent"]["subject"])) {
  1009. $this->rdf_parser["top"]["parent"]["subject"] = self::EMPTY_STRING;
  1010. }
  1011. if (!isset($this->rdf_parser["top"]["parent"]["bag_id"])) {
  1012. $this->rdf_parser["top"]["parent"]["bag_id"] = self::EMPTY_STRING;
  1013. }
  1014. if (!isset($this->rdf_parser["top"]["parent"]["statements"])) {
  1015. $this->rdf_parser["top"]["parent"]["statements"] = 0;
  1016. }
  1017. if (!isset($this->rdf_parser["top"]["predicate"])) {
  1018. $this->rdf_parser["top"]["predicate"] = self::EMPTY_STRING;
  1019. }
  1020. if (!isset($this->rdf_parser["top"]["ordinal"])) {
  1021. $this->rdf_parser["top"]["ordinal"] = 0;
  1022. }
  1023. $this->_report_statement(
  1024. $this->rdf_parser["top"]["parent"]["subject_type"],
  1025. $this->rdf_parser["top"]["parent"]["subject"],
  1026. $this->rdf_parser["top"]["predicate"],
  1027. $this->rdf_parser["top"]["ordinal"],
  1028. self::RDF_OBJECT_TYPE_LITERAL,
  1029. $this->rdf_parser["top"]["data"],
  1030. $this->rdf_parser["top"]["xml_lang"],
  1031. $this->rdf_parser["top"]["parent"]["bag_id"],
  1032. $this->rdf_parser["top"]["parent"]["statements"],
  1033. $this->rdf_parser["top"]["statement_id"]
  1034. );
  1035. }
  1036. private function _end_element_handler($parser, $name) {
  1037. switch ($this->rdf_parser["top"]["state"]) {
  1038. case self::IN_TOP_LEVEL:
  1039. /* fall through */
  1040. case self::IN_XML:
  1041. $this->_report_end_element($name);
  1042. break;
  1043. case self::IN_PROPERTY_UNKNOWN_OBJECT:
  1044. $this->_end_empty_resource_property();
  1045. break;
  1046. case self::IN_PROPERTY_LITERAL:
  1047. $this->_end_literal_property();
  1048. break;
  1049. case self::IN_PROPERTY_PARSE_TYPE_RESOURCE:
  1050. $this->_pop_element();
  1051. break;
  1052. case self::IN_PROPERTY_PARSE_TYPE_LITERAL:
  1053. $this->_report_end_parse_type_literal();
  1054. break;
  1055. case self::IN_RDF:
  1056. case self::IN_DESCRIPTION:
  1057. case self::IN_PROPERTY_RESOURCE:
  1058. case self::IN_PROPERTY_EMPTY_RESOURCE:
  1059. case self::IN_UNKNOWN:
  1060. break;
  1061. }
  1062. $this->_pop_element();
  1063. }
  1064. private function _character_data_handler($parser, $s) {
  1065. $len = strlen($s);
  1066. switch ($this->rdf_parser["top"]["state"]) {
  1067. case self::IN_PROPERTY_LITERAL:
  1068. case self::IN_PROPERTY_UNKNOWN_OBJECT:
  1069. if (isset($this->rdf_parser["top"]["data"])) {
  1070. $n = strlen($this->rdf_parser["top"]["data"]);
  1071. $this->rdf_parser["top"]["data"] .= $s;
  1072. }
  1073. else {
  1074. $this->rdf_parser["top"]["data"] = $s;
  1075. }
  1076. if ($this->rdf_parser["top"]["state"] == self::IN_PROPERTY_UNKNOWN_OBJECT) {
  1077. /* look for non-whitespace */
  1078. for ($i = 0;(($i < $len) && (preg_match("/ |\n|\t/", $s[$i])));$i++);
  1079. $i++;
  1080. /* if we found non-whitespace, this is a literal */
  1081. if ($i <= $len) {
  1082. $this->rdf_parser["top"]["state"] = self::IN_PROPERTY_LITERAL;
  1083. }
  1084. }
  1085. break;
  1086. case self::IN_TOP_LEVEL:
  1087. case self::IN_PROPERTY_PARSE_TYPE_LITERAL:
  1088. case self::IN_XML:
  1089. $this->_report_character_data($s, strlen($s));
  1090. break;
  1091. case self::IN_RDF:
  1092. case self::IN_DESCRIPTION:
  1093. case self::IN_PROPERTY_RESOURCE:
  1094. case self::IN_PROPERTY_EMPTY_RESOURCE:
  1095. case self::IN_PROPERTY_PARSE_TYPE_RESOURCE:
  1096. case self::IN_UNKNOWN:
  1097. break;
  1098. }
  1099. }
  1100. }
  1101. ?>