details.php 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869
  1. <?php
  2. function clean_tag_content($text) {
  3. $text = html_entity_decode(utf8_encode($text), ENT_COMPAT | ENT_XHTML, 'utf-8');
  4. return htmlentities($text, ENT_COMPAT | ENT_HTML5 | ENT_SUBSTITUTE, 'utf-8');
  5. }
  6. function clean_html($html) {
  7. $html = preg_replace('/\s+/', ' ', $html);
  8. $in_tag = false;
  9. $stack = '';
  10. $cleaned = '';
  11. for ($i = 0, $l = strlen($html); $i < $l; $i++) {
  12. switch ($html[$i]) {
  13. case '<':
  14. $in_tag = true;
  15. $cleaned .= clean_tag_content($stack) . '<';
  16. break;
  17. case '>':
  18. $in_tag = false;
  19. $cleaned .= '>';
  20. $stack = '';
  21. break;
  22. default:
  23. if ($in_tag)
  24. $cleaned .= $html[$i];
  25. else
  26. $stack .= $html[$i];
  27. }
  28. }
  29. if (!$in_tag)
  30. $cleaned .= clean_tag_content($stack);
  31. //echo $cleaned . "\n\n";
  32. return $cleaned;
  33. }
  34. // Fetch details page
  35. assert(isset($_GET['id']));
  36. assert(is_numeric($_GET['id']));
  37. $url = 'http://www.tvgids.nl/programma/' . $_GET['id'];
  38. $page = file_get_contents($url);
  39. // Parse detailed description, preserving a selected set of HTML tags
  40. preg_match('/<div\s+id="prog-content">\s*(.*?)\s*<div\s+class="prog-functionbar">/s', $page, $m1);
  41. assert($m1);
  42. $description = strip_tags($m1[1], '<p><strong><em><b><i><font><a><span><img><br>');
  43. $description = str_replace('showVideoPlaybutton()', '', $description);
  44. $description = clean_html($description);
  45. //$description = preg_replace('/\s+/', ' ', $description);
  46. //$description = htmlentities($description, ENT_COMPAT | ENT_HTML5 | ENT_SUBSTITUTE, 'ISO-8859-1');
  47. //$description = str_replace(array('&lt;', '&gt;', '&sol;'), array('<', '>', '/'), $description);
  48. // Parse properties list
  49. preg_match('/<ul\s+id="prog-info-content-colleft">\s*(.*?)\s*<\/ul>/s', $page, $m2);
  50. assert($m2);
  51. preg_match_all('/<li><strong>(\w+):<\/strong>(.*?)<\/li>/', $m2[1], $m3);
  52. assert($m3);
  53. $properties = array();
  54. foreach ($m3[1] as $i => $name)
  55. $properties[] = array('name' => $name, 'value' => $m3[2][$i]);
  56. header('Content-Type: application/json; charset=utf-8');
  57. echo json_encode(compact('description', 'properties'), JSON_UNESCAPED_SLASHES);
  58. ?>