details.php 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. <?php
  2. function clean_tag_content($text) {
  3. $text = html_entity_decode(utf8_encode($text), ENT_COMPAT | ENT_XHTML, 'utf-8');
  4. return htmlentities($text, ENT_COMPAT | ENT_HTML5 | ENT_SUBSTITUTE, 'utf-8');
  5. }
  6. function clean_html($html) {
  7. $html = preg_replace('/\s+/', ' ', $html);
  8. $in_tag = false;
  9. $stack = '';
  10. $cleaned = '';
  11. for ($i = 0, $l = strlen($html); $i < $l; $i++) {
  12. switch ($html[$i]) {
  13. case '<':
  14. $in_tag = true;
  15. $cleaned .= clean_tag_content($stack) . '<';
  16. break;
  17. case '>':
  18. $in_tag = false;
  19. $cleaned .= '>';
  20. $stack = '';
  21. break;
  22. default:
  23. if ($in_tag)
  24. $cleaned .= $html[$i];
  25. else
  26. $stack .= $html[$i];
  27. }
  28. }
  29. if (!$in_tag)
  30. $cleaned .= clean_tag_content($stack);
  31. return $cleaned;
  32. }
  33. // Fetch details page
  34. assert(isset($_GET['id']));
  35. assert(is_numeric($_GET['id']));
  36. $url = 'http://www.tvgids.nl/programma/' . $_GET['id'];
  37. $page = file_get_contents($url);
  38. // Parse detailed description, preserving a selected set of HTML tags
  39. assert(preg_match('%<div id="prog-content">\s*(.*?)\s*<br class="brclear"%s', $page, $m1));
  40. $description = strip_tags($m1[1], '<p><strong><em><b><i><font><a><span><img><br>');
  41. $description = str_replace('showVideoPlaybutton()', '', $description);
  42. $description = clean_html($description);
  43. //$description = preg_replace('/\s+/', ' ', $description);
  44. //$description = htmlentities($description, ENT_COMPAT | ENT_HTML5 | ENT_SUBSTITUTE, 'ISO-8859-1');
  45. //$description = str_replace(array('&lt;', '&gt;', '&sol;'), array('<', '>', '/'), $description);
  46. // Parse properties list
  47. assert(preg_match('%<ul\s+id="prog-info-content-colleft">\s*(.*?)\s*</ul>' .
  48. '(?:\s*<ul\s+id="prog-info-content-colright">\s*(.*?)\s*</ul>)?%s', $page, $m2));
  49. assert(preg_match_all('%<li><strong>([\w ]+):</strong>(.*?)</li>%', $m2[1] . $m2[2], $m3));
  50. $properties = array();
  51. $movie_search_url = 'http://www.imdb.com/xml/find?json=1&nr=1&tt=on&q=';
  52. $title = null;
  53. foreach ($m3[1] as $i => $name) {
  54. $value = $m3[2][$i];
  55. // Add IMDB URL for movie/series
  56. if ($value == 'Film' || $value == 'Serie/Soap') {
  57. $results = json_decode(file_get_contents($movie_search_url . urlencode($title)), true);
  58. if (count($results) > 0) {
  59. $lst = reset($results);
  60. $id = $lst[0]['id'];
  61. $value .= ' (<a href="http://www.imdb.com/title/' . $id . '" target="_blank">IMDB</a>)';
  62. }
  63. } elseif ($name == 'Titel') {
  64. $title = $value;
  65. }
  66. $properties[] = array('name' => $name, 'value' => $value);
  67. }
  68. header('Content-Type: application/json; charset=utf-8');
  69. echo json_encode(compact('description', 'properties'), JSON_UNESCAPED_SLASHES);
  70. ?>