<?php
/* アメブロスクレイピング用クラス */
class AmebloRSS {
private $theme_id = '';
private $ameba_id = '';
public function setAmebaID($ameba_id) {
throw new InvalidArgumentException('$ameba_id is not string');
$this->ameba_id = $ameba_id;
}
public function setTheme($theme) {
if ($this->ameba_id==='')
throw new Exception('AmebaID required');
throw new InvalidArgumentException('$theme is not string');
$url = 'http://a...content-available-to-author-only...o.jp/'.$this->ameba_id.'/';
$pattern = '@<li><a href="'.$url.'theme-(\\d+).html">'.$theme.' \\( \\d+ \\)</a></li>@u';
throw new Exception('Theme not found');
$this->theme_id = $matches[1];
}
public function getArticles($page=1) {
$res = $this->getContents($page);
if (!preg_match_all('@<div class="entry(?: new)?">(.*?)</div><!--//\\.entry-->@us',$res,$matches,PREG_SET_ORDER
)) throw new Exception('Failed to fetch entries');
$p_date = '@'.
'<div class="entry_head">\\s*?'.
'<span class="date">(.*?)</span>\\s*?'.
'</div><!--//\\.entry_head-->'.
'@us';
$p_title = '@'.
'<h3 class="title">.*?'.
'<a href="[^"]+">(.*?)</a>.*?'.
'</h3>'.
'@us';
$p_text = '@'.
'<div class="subContents">\\s'.
'(?:\\s<!-- google_ad_section_start\\([^)]+\\) -->\\s|\\s\\s)?'.
'(.*?)'.
'(?:\\s<!-- google_ad_section_end\\([^)]+\\) -->\\s|\\s\\s)?'.
'<!--entryBottom-->'.
'@us';
foreach ($matches as $key => $match) {
throw new Exception('Failed to fetch date on entry No.'.($key+1));
throw new Exception('Failed to fetch title on entry No.'.($key+1));
throw new Exception('Failed to fetch text on entry No.'.($key+1));
$article = new stdClass;
$article->date = $m_date[1]; $article->title = $m_title[1];
$article->text = $m_text[1];
$articles[] = $article;
}
return $articles;
}
private function getContents($page=1) {
$page = (int)$page;
if ($this->ameba_id==='')
throw new Exception('AmebaID required');
$url = 'http://a...content-available-to-author-only...o.jp/'.$this->ameba_id.'/';
if ($this->theme_id!=='')
$url .= 'theme'.$page.'-'.$this->theme_id.'.html';
else
$url .= 'page-'.$page.'.html';
}
}
/* しょこたんのブログのテーマ「イラスト」で1ページ目を取得 */
try {
$a = new AmebloRSS;
$a->setAmebaID('nakagawa-shoko');
$a->setTheme('イラスト');
} catch (Exception $e) {
echo 'Error: '.$e->getMessage();
}