fork download
  1. using System;
  2. using System.IO;
  3. using mshtml;
  4. using System.Xml;
  5.  
  6. class Html2Xml
  7. {
  8. static void Main()
  9. {
  10. StreamReader srReadToEnd = new StreamReader((System.IO.Stream)File.OpenRead('test.html'), System.Text.Encoding.GetEncoding('windows-1251'));
  11. string html;
  12. html = srReadToEnd.ReadToEnd();
  13. srReadToEnd.Close();
  14.  
  15. IHTMLDocument2 document = new HTMLDocumentClass();
  16. document.write(html);
  17.  
  18. XmlDocument doc = new XmlDocument();
  19. XmlNode root = doc.CreateElement('document');
  20.  
  21. getChild((XmlNode)root, (IHTMLDOMNode)document.body.parentElement);
  22. doc.AppendChild(root);
  23. Console.Write(root.OuterXml);
  24. }
  25.  
  26. static void getChild(XmlNode node, IHTMLDOMNode domnode)
  27. {
  28. string nodeName = domnode.nodeName.ToLower();
  29. if(nodeName == '#comment')
  30. {
  31. XmlNode comment = node.OwnerDocument.CreateNode(XmlNodeType.Comment, '', '');
  32. comment.InnerText = domnode.nodeValue.ToString().Replace('--', '=='); // побочный эффект
  33. node.AppendChild(comment);
  34. }
  35. else if(nodeName == '#text')
  36. {
  37. XmlNode text = node.OwnerDocument.CreateNode(XmlNodeType.Text, '', '');
  38. text.InnerText = domnode.nodeValue.ToString();
  39. node.AppendChild(text);
  40. }
  41. else if(nodeName.IndexOf('/') >= 0)
  42. {
  43. XmlNode comment = node.OwnerDocument.CreateNode(XmlNodeType.Comment, '', '');
  44. comment.InnerText = '[ERROR]' + nodeName + '[/ERROR]';
  45. node.AppendChild(comment);
  46. //Console.WriteLine('ERROR: ' + nodeName);
  47. }
  48. else
  49. {
  50. XmlNode nodeCurrent = node.OwnerDocument.CreateElement(nodeName);
  51.  
  52. IHTMLAttributeCollection collectionAttr = (IHTMLAttributeCollection)domnode.attributes;
  53. if(collectionAttr != null)
  54. {
  55. for(int i = 0; i < collectionAttr.length; i ++)
  56. {
  57. object index = i;
  58. IHTMLDOMAttribute attribute = (IHTMLDOMAttribute)collectionAttr.item(ref index);
  59. if(attribute.nodeValue != null && attribute.nodeValue.ToString() != '')
  60. {
  61. XmlAttribute attr = node.OwnerDocument.CreateAttribute(attribute.nodeName.ToLower());
  62. attr.Value = attribute.nodeValue.ToString().Replace('about:blank', ''); // побочный эффект
  63. nodeCurrent.Attributes.Append(attr);
  64. }
  65. }
  66. }
  67. IHTMLDOMChildrenCollection collection = (IHTMLDOMChildrenCollection)domnode.childNodes;
  68. for(int i = 0; i < collection.length; i ++)
  69. {
  70. getChild((XmlNode)nodeCurrent, (IHTMLDOMNode)collection.item(i));
  71. }
  72. node.AppendChild(nodeCurrent);
  73. }
  74. }
  75. }
Compilation error #stdin compilation error #stdout 0s 0KB
stdin
Standard input is empty
compilation info
prog.cs(10,85): error CS1012: Too many characters in character literal
prog.cs(10,132): error CS1012: Too many characters in character literal
prog.cs(19,42): error CS1012: Too many characters in character literal
prog.cs(29,24): error CS1012: Too many characters in character literal
prog.cs(31,82): error CS1011: Empty character literal
prog.cs(31,86): error CS1011: Empty character literal
prog.cs(32,70): error CS1012: Too many characters in character literal
prog.cs(32,76): error CS1012: Too many characters in character literal
prog.cs(35,29): error CS1012: Too many characters in character literal
prog.cs(37,76): error CS1011: Empty character literal
prog.cs(37,80): error CS1011: Empty character literal
prog.cs(43,82): error CS1011: Empty character literal
prog.cs(43,86): error CS1011: Empty character literal
prog.cs(44,33): error CS1012: Too many characters in character literal
prog.cs(44,56): error CS1012: Too many characters in character literal
prog.cs(59,89): error CS1011: Empty character literal
prog.cs(62,77): error CS1012: Too many characters in character literal
prog.cs(62,92): error CS1011: Empty character literal
Compilation failed: 18 error(s), 0 warnings
stdout
Standard output is empty