library( stringr)
library( dplyr)
library( purrr)
count <- function ( x) {
little <- str_extract_all( x, "[A-Z][a-z]" ) [ [ 1 ] ]
ones <- str_replace_all( x, "[A-Z][a-z]?[0-9]+" , "" )
if ( ones != "" ) {
notones <- str_extract_all( x, "[A-Z][a-z]?[0-9]+" ) [ [ 1 ] ]
for ( i in seq_along( notones) )
x
<- str_replace
( x
, notones
[ i
] , tolower ( notones
[ i
] ) ) ones <- str_extract_all( ones, "[A-Z][a-z]?" ) [ [ 1 ] ]
one <- map_chr( ones, function ( x) return ( str_c( x, "1" ) ) )
for ( i in seq_along( ones) )
x
<- str_replace
( x
, ones
[ i
] , tolower ( one
[ i
] ) ) for ( i in seq_along( little) )
x
<- str_replace
( x
, toupper ( little
[ i
] ) , little
[ i
] ) }
repeat {
paren <- str_extract( x, "[\\ (][A-Za-z0-9]+[\\ )][0-9]+" )
if ( is.na ( paren) )
break
noparen <- str_replace_all( paren, "\\ (|\\ )" , "" )
noparen <- str_sub( noparen, 1 , - 2 )
num <- as.numeric ( str_extract_all( paren, "[0-9]+" ) [ [ 1 ] ] )
num2 <- num[ - length( num) ] * num[ length( num) ]
num <- as.character ( num) ; num2 <- as.character ( num2)
for ( i in seq_along( num2) )
noparen <- str_replace( noparen, num[ i] , num2[ i] )
paren <- str_replace_all( paren, c( "\\ (" = "\\ \\ (" , "\\ )" = "\\ \\ )" ) )
x <- str_replace( x, paren, noparen)
}
x <- str_extract_all( x, "([A-Z][a-z]?)|([0-9]+)" ) [ [ 1 ] ]
x <- data.frame ( matrix( x, ncol = 2 , byrow = T) , stringsAsFactors = F)
x[ , 2 ] <- as.numeric ( x[ , 2 ] )
x %<>% group_by( X1) %>% summarise( sum( X2) )
return ( invisible( apply( x, 1 , cat, sep = c( ":\t " , "" ) , "\n " ) ) )
}
count( "C6H12O6" ) ; cat( "\n " )
count( "CCl2F2" ) ; cat( "\n " )
count( "NaHCO3" ) ; cat( "\n " )
count( "C4H8(OH)2" ) ; cat( "\n " )
count( "PbCl(NH3)2(COOH)2" ) ; cat( "\n " )
bGlicmFyeShzdHJpbmdyKQpsaWJyYXJ5KGRwbHlyKQpsaWJyYXJ5KHB1cnJyKQoKY291bnQgPC0gZnVuY3Rpb24oeCkgewogIGxpdHRsZSA8LSBzdHJfZXh0cmFjdF9hbGwoeCwgIltBLVpdW2Etel0iKVtbMV1dCiAgb25lcyA8LSBzdHJfcmVwbGFjZV9hbGwoeCwgIltBLVpdW2Etel0/WzAtOV0rIiwgIiIpCiAgaWYob25lcyAhPSAiIikgewogICAgbm90b25lcyA8LSBzdHJfZXh0cmFjdF9hbGwoeCwgIltBLVpdW2Etel0/WzAtOV0rIilbWzFdXQogICAgZm9yKGkgaW4gc2VxX2Fsb25nKG5vdG9uZXMpKQogICAgICB4IDwtIHN0cl9yZXBsYWNlKHgsIG5vdG9uZXNbaV0sIHRvbG93ZXIobm90b25lc1tpXSkpCiAgICBvbmVzIDwtIHN0cl9leHRyYWN0X2FsbChvbmVzLCAiW0EtWl1bYS16XT8iKVtbMV1dCiAgICBvbmUgPC0gbWFwX2NocihvbmVzLCBmdW5jdGlvbih4KSByZXR1cm4oc3RyX2MoeCwgIjEiKSkpCiAgICBmb3IoaSBpbiBzZXFfYWxvbmcob25lcykpCiAgICAgIHggPC0gc3RyX3JlcGxhY2UoeCwgb25lc1tpXSwgdG9sb3dlcihvbmVbaV0pKQogICAgeCA8LSB0b3VwcGVyKHgpCiAgICBmb3IoaSBpbiBzZXFfYWxvbmcobGl0dGxlKSkKICAgICAgeCA8LSBzdHJfcmVwbGFjZSh4LCB0b3VwcGVyKGxpdHRsZVtpXSksIGxpdHRsZVtpXSkKICB9CiAgcmVwZWF0IHsKICAgIHBhcmVuIDwtIHN0cl9leHRyYWN0KHgsICJbXFwoXVtBLVphLXowLTldK1tcXCldWzAtOV0rIikKICAgIGlmKGlzLm5hKHBhcmVuKSkKICAgICAgYnJlYWsKICAgIG5vcGFyZW4gPC0gc3RyX3JlcGxhY2VfYWxsKHBhcmVuLCAiXFwofFxcKSIsICIiKQogICAgbm9wYXJlbiA8LSBzdHJfc3ViKG5vcGFyZW4sIDEsIC0yKQogICAgbnVtIDwtIGFzLm51bWVyaWMoc3RyX2V4dHJhY3RfYWxsKHBhcmVuLCAiWzAtOV0rIilbWzFdXSkKICAgIG51bTIgPC0gbnVtWy1sZW5ndGgobnVtKV0gKiBudW1bbGVuZ3RoKG51bSldCiAgICBudW0gPC0gYXMuY2hhcmFjdGVyKG51bSk7IG51bTIgPC0gYXMuY2hhcmFjdGVyKG51bTIpCiAgICBmb3IoaSBpbiBzZXFfYWxvbmcobnVtMikpCiAgICAgIG5vcGFyZW4gPC0gc3RyX3JlcGxhY2Uobm9wYXJlbiwgbnVtW2ldLCBudW0yW2ldKQogICAgcGFyZW4gPC0gc3RyX3JlcGxhY2VfYWxsKHBhcmVuLCBjKCJcXCgiID0gIlxcXFwoIiwgIlxcKSIgPSAiXFxcXCkiKSkKICAgIHggPC0gc3RyX3JlcGxhY2UoeCwgcGFyZW4sIG5vcGFyZW4pCiAgfQogIHggPC0gc3RyX2V4dHJhY3RfYWxsKHgsICIoW0EtWl1bYS16XT8pfChbMC05XSspIilbWzFdXQogIHggPC0gZGF0YS5mcmFtZShtYXRyaXgoeCwgbmNvbCA9IDIsIGJ5cm93ID0gVCksIHN0cmluZ3NBc0ZhY3RvcnMgPSBGKQogIHhbLDJdIDwtIGFzLm51bWVyaWMoeFssMl0pCiAgeCAlPD4lIGdyb3VwX2J5KFgxKSAlPiUgc3VtbWFyaXNlKHN1bShYMikpCiAgcmV0dXJuKGludmlzaWJsZShhcHBseSh4LCAxLCBjYXQsIHNlcCA9IGMoIjpcdCIsICIiKSwgIlxuIikpKQp9Cgpjb3VudCgiQzZIMTJPNiIpOyBjYXQoIlxuIikKY291bnQoIkNDbDJGMiIpOyBjYXQoIlxuIikKY291bnQoIk5hSENPMyIpOyBjYXQoIlxuIikKY291bnQoIkM0SDgoT0gpMiIpOyBjYXQoIlxuIikKY291bnQoIlBiQ2woTkgzKTIoQ09PSCkyIik7IGNhdCgiXG4iKQ==