fork download
  1. library(XML)
  2.  
  3. #Read in PDF file
  4.  
  5. PDF <- xmlTreeParse("CRF/Datasets/test.xml", useInternalNodes=TRUE)
  6.  
  7. #Get the page/text/location information
  8.  
  9. pages <- getNodeSet(PDF, "//Page[@number]")
  10. page <- lapply(pages, function(x) xmlAttrs(x)["number"])
  11.  
  12. values <- lapply(pages, xpathApply, path="//Page/Content/Para/Box/Word/Text", xmlValue)
  13.  
  14. pos <- lapply(pages, xpathApply, path="//Page/Content/Para/Box/Word/Box[@*]", xmlAttrs)
Success #stdin #stdout #stderr 0.35s 47440KB
stdin
Standard input is empty
stdout
Standard output is empty
stderr
Error: XML content does not seem to be XML: 'CRF/Datasets/test.xml'
Execution halted