fork(11) download
  1. <?php
  2.  
  3. /* アメブロスクレイピング用クラス */
  4. class AmebloRSS {
  5.  
  6. private $theme_id = '';
  7. private $ameba_id = '';
  8.  
  9. public function setAmebaID($ameba_id) {
  10.  
  11. if (!is_string($ameba_id))
  12. throw new InvalidArgumentException('$ameba_id is not string');
  13.  
  14. $this->ameba_id = $ameba_id;
  15.  
  16. }
  17.  
  18. public function setTheme($theme) {
  19.  
  20. if ($this->ameba_id==='')
  21. throw new Exception('AmebaID required');
  22. if (!is_string($theme))
  23. throw new InvalidArgumentException('$theme is not string');
  24.  
  25. $url = 'http://a...content-available-to-author-only...o.jp/'.$this->ameba_id.'/';
  26. $res = file_get_contents($url);
  27. $pattern = '@<li><a href="'.$url.'theme-(\\d+).html">'.$theme.' \\( \\d+ \\)</a></li>@u';
  28.  
  29. if (!preg_match($pattern,$res,$matches))
  30. throw new Exception('Theme not found');
  31.  
  32. $this->theme_id = $matches[1];
  33.  
  34. }
  35.  
  36. public function getArticles($page=1) {
  37.  
  38. $res = $this->getContents($page);
  39.  
  40. if (!preg_match_all('@<div class="entry(?: new)?">(.*?)</div><!--//\\.entry-->@us',$res,$matches,PREG_SET_ORDER))
  41. throw new Exception('Failed to fetch entries');
  42.  
  43. $p_date = '@'.
  44. '<div class="entry_head">\\s*?'.
  45. '<span class="date">(.*?)</span>\\s*?'.
  46. '</div><!--//\\.entry_head-->'.
  47. '@us';
  48. $p_title = '@'.
  49. '<h3 class="title">.*?'.
  50. '<a href="[^"]+">(.*?)</a>.*?'.
  51. '</h3>'.
  52. '@us';
  53. $p_text = '@'.
  54. '<div class="subContents">\\s'.
  55. '(?:\\s<!-- google_ad_section_start\\([^)]+\\) -->\\s|\\s\\s)?'.
  56. '(.*?)'.
  57. '(?:\\s<!-- google_ad_section_end\\([^)]+\\) -->\\s|\\s\\s)?'.
  58. '<!--entryBottom-->'.
  59. '@us';
  60.  
  61. $articles = array();
  62.  
  63. foreach ($matches as $key => $match) {
  64.  
  65. if (!preg_match($p_date,$match[1],$m_date))
  66. throw new Exception('Failed to fetch date on entry No.'.($key+1));
  67.  
  68. if (!preg_match($p_title,$match[1],$m_title))
  69. throw new Exception('Failed to fetch title on entry No.'.($key+1));
  70.  
  71. if (!preg_match($p_text,$match[1],$m_text))
  72. throw new Exception('Failed to fetch text on entry No.'.($key+1));
  73.  
  74. $article = new stdClass;
  75. $article->date = $m_date[1];
  76. $article->title = $m_title[1];
  77. $article->text = $m_text[1];
  78. $articles[] = $article;
  79.  
  80. }
  81.  
  82. return $articles;
  83.  
  84. }
  85.  
  86. private function getContents($page=1) {
  87.  
  88. $page = (int)$page;
  89.  
  90. if ($this->ameba_id==='')
  91. throw new Exception('AmebaID required');
  92.  
  93. $url = 'http://a...content-available-to-author-only...o.jp/'.$this->ameba_id.'/';
  94.  
  95. if ($this->theme_id!=='')
  96. $url .= 'theme'.$page.'-'.$this->theme_id.'.html';
  97. else
  98. $url .= 'page-'.$page.'.html';
  99.  
  100. return file_get_contents($url);
  101.  
  102. }
  103.  
  104. }
  105.  
  106. /* しょこたんのブログのテーマ「イラスト」で1ページ目を取得 */
  107. try {
  108.  
  109. $a = new AmebloRSS;
  110. $a->setAmebaID('nakagawa-shoko');
  111. $a->setTheme('イラスト');
  112. var_dump($a->getArticles(1));
  113.  
  114. } catch (Exception $e) {
  115.  
  116. echo 'Error: '.$e->getMessage();
  117.  
  118. }
Not running #stdin #stdout 0s 0KB
stdin
Standard input is empty
stdout
Standard output is empty