using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Xml;
using System.Xml.Linq;
// System.Web
using System.Net;
// SGMLReader
// http://d...content-available-to-author-only...h.com/SgmlReader
using Sgml;
namespace Ranking
{
class Program
{
static void Main(string[] args)
{
string rankingUrl = "http://w...content-available-to-author-only...o.jp/ranking";
var doc = LoadHtml(rankingUrl);
var ranking = from x in doc.Descendants("div")
let a = x.Attribute("class")
where a != null && a.Value == "ranking_box"
// <a href="watch/sm10000000"... → sm10000000
select x.Descendants("a").First().Attribute("href").Value.Substring(6);
/* ranking.xml
* <ranking>
* <video id="sm*">
* <tag>*</tag>...
* </video>...
* </ranking>
*/
var cache = new XDocument(
new XElement("ranking",
from id in ranking
select new XElement("video",
new XAttribute("id", id),
from tag in GetTags(id)
select new XElement("tag") { Value = tag })));
cache.Save("ranking.xml");
}
static IEnumerable<string> GetTags(string videoId)
{
string getthumbinfoUrl = "http://e...content-available-to-author-only...o.jp/api/getthumbinfo/";
var doc = XDocument.Load(getthumbinfoUrl + videoId);
return from x in doc.Descendants("tag")
select x.Value;
}
static XDocument LoadHtml(string url)
{
using (var stream = new WebClient().OpenRead(url))
using (var reader = new StreamReader(stream, Encoding.UTF8))
{
return ParseHtml(reader);
}
}
static XDocument ParseHtml(TextReader reader)
{
var sgmlReader = new SgmlReader()
{
DocType = "HTML",
WhitespaceHandling = WhitespaceHandling.All,
CaseFolding = CaseFolding.ToLower,
InputStream = reader,
};
return XDocument.Load(sgmlReader);
}
}
}