import pandas as pd
def analyze_zero_variance( df_list) :
"""
Analyzes columns with zero variance in each DataFrame, calculating statistics across all DataFrames for common columns.
Args:
df_list: List of Pandas DataFrames.
Returns:
None. Prints the analysis and modifies DataFrames in place.
"""
REPOSITORY_ZEROVAR_THRESHOLD_VALUE = 0.7
# Get common columns across all DataFrames
common_columns = set ( df_list[ 0 ] .columns ) .intersection (
*[ set ( df.columns ) for df in df_list[ 1 :] ] )
print ( common_columns)
# Store statistics for common columns
column_stats = { }
for column in common_columns:
variances = [ df[ column] .var ( ) for df in df_list]
column_stats[ column] = { 'zero_variance_count' : sum (
variance < 1e-8 for variance in variances) }
# Print analysis for common columns
print ( "Zero Variance Analysis across DataFrames:" )
for column, stats in column_stats.items ( ) :
print (
f"Column: {column}, Zero Variance Count: {stats['zero_variance_count']}" )
# Drop columns with zero variance in more than the threshold of DataFrames
columns_to_drop = [ column for column, stats in column_stats.items (
) if stats[ 'zero_variance_count' ] > len ( df_list) * REPOSITORY_ZEROVAR_THRESHOLD_VALUE]
if columns_to_drop:
print (
f"Dropping columns with zero variance in more than {REPOSITORY_ZEROVAR_THRESHOLD_VALUE * 100}% of DataFrames: {columns_to_drop}" )
for i, df in enumerate ( df_list) :
df_list[ i] = df.drop ( columns= columns_to_drop)
return columns_to_drop
# Example usage:
df1 = pd.DataFrame ( { 'A' : [ 1 , 2 , 3 ] , 'B' : [ 0 , 0 , 0 ] } )
df2 = pd.DataFrame ( { 'A' : [ 1 , 2 , 3 ] , 'B' : [ 0 , 0 , 0 ] } )
columns_to_drop = analyze_zero_variance( [ df1, df2] )
# Output:
# Common columns: {'B', 'A'}
print ( "Columns to drop:" , columns_to_drop)
aW1wb3J0IHBhbmRhcyBhcyBwZAoKCmRlZiBhbmFseXplX3plcm9fdmFyaWFuY2UoZGZfbGlzdCk6CiAgICAiIiIKICAgIEFuYWx5emVzIGNvbHVtbnMgd2l0aCB6ZXJvIHZhcmlhbmNlIGluIGVhY2ggRGF0YUZyYW1lLCBjYWxjdWxhdGluZyBzdGF0aXN0aWNzIGFjcm9zcyBhbGwgRGF0YUZyYW1lcyBmb3IgY29tbW9uIGNvbHVtbnMuCgogICAgQXJnczoKICAgICAgICBkZl9saXN0OiBMaXN0IG9mIFBhbmRhcyBEYXRhRnJhbWVzLgoKICAgIFJldHVybnM6CiAgICAgICAgTm9uZS4gUHJpbnRzIHRoZSBhbmFseXNpcyBhbmQgbW9kaWZpZXMgRGF0YUZyYW1lcyBpbiBwbGFjZS4KICAgICIiIgogICAgUkVQT1NJVE9SWV9aRVJPVkFSX1RIUkVTSE9MRF9WQUxVRSA9IDAuNwoKICAgICMgR2V0IGNvbW1vbiBjb2x1bW5zIGFjcm9zcyBhbGwgRGF0YUZyYW1lcwogICAgY29tbW9uX2NvbHVtbnMgPSBzZXQoZGZfbGlzdFswXS5jb2x1bW5zKS5pbnRlcnNlY3Rpb24oCiAgICAgICAgKltzZXQoZGYuY29sdW1ucykgZm9yIGRmIGluIGRmX2xpc3RbMTpdXSkKICAgIHByaW50KGNvbW1vbl9jb2x1bW5zKQoKICAgICMgU3RvcmUgc3RhdGlzdGljcyBmb3IgY29tbW9uIGNvbHVtbnMKICAgIGNvbHVtbl9zdGF0cyA9IHt9CiAgICBmb3IgY29sdW1uIGluIGNvbW1vbl9jb2x1bW5zOgogICAgICAgIHZhcmlhbmNlcyA9IFtkZltjb2x1bW5dLnZhcigpIGZvciBkZiBpbiBkZl9saXN0XQogICAgICAgIGNvbHVtbl9zdGF0c1tjb2x1bW5dID0geyd6ZXJvX3ZhcmlhbmNlX2NvdW50Jzogc3VtKAogICAgICAgICAgICB2YXJpYW5jZSA8IDFlLTggZm9yIHZhcmlhbmNlIGluIHZhcmlhbmNlcyl9CgogICAgIyBQcmludCBhbmFseXNpcyBmb3IgY29tbW9uIGNvbHVtbnMKICAgIHByaW50KCJaZXJvIFZhcmlhbmNlIEFuYWx5c2lzIGFjcm9zcyBEYXRhRnJhbWVzOiIpCiAgICBmb3IgY29sdW1uLCBzdGF0cyBpbiBjb2x1bW5fc3RhdHMuaXRlbXMoKToKICAgICAgICBwcmludCgKICAgICAgICAgICAgZiJDb2x1bW46IHtjb2x1bW59LCBaZXJvIFZhcmlhbmNlIENvdW50OiB7c3RhdHNbJ3plcm9fdmFyaWFuY2VfY291bnQnXX0iKQoKICAgICMgRHJvcCBjb2x1bW5zIHdpdGggemVybyB2YXJpYW5jZSBpbiBtb3JlIHRoYW4gdGhlIHRocmVzaG9sZCBvZiBEYXRhRnJhbWVzCiAgICBjb2x1bW5zX3RvX2Ryb3AgPSBbY29sdW1uIGZvciBjb2x1bW4sIHN0YXRzIGluIGNvbHVtbl9zdGF0cy5pdGVtcygKICAgICkgaWYgc3RhdHNbJ3plcm9fdmFyaWFuY2VfY291bnQnXSA+IGxlbihkZl9saXN0KSAqIFJFUE9TSVRPUllfWkVST1ZBUl9USFJFU0hPTERfVkFMVUVdCiAgICBpZiBjb2x1bW5zX3RvX2Ryb3A6CiAgICAgICAgcHJpbnQoCiAgICAgICAgICAgIGYiRHJvcHBpbmcgY29sdW1ucyB3aXRoIHplcm8gdmFyaWFuY2UgaW4gbW9yZSB0aGFuIHtSRVBPU0lUT1JZX1pFUk9WQVJfVEhSRVNIT0xEX1ZBTFVFICogMTAwfSUgb2YgRGF0YUZyYW1lczoge2NvbHVtbnNfdG9fZHJvcH0iKQogICAgICAgIGZvciBpLCBkZiBpbiBlbnVtZXJhdGUoZGZfbGlzdCk6CiAgICAgICAgICAgIGRmX2xpc3RbaV0gPSBkZi5kcm9wKGNvbHVtbnM9Y29sdW1uc190b19kcm9wKQogICAgcmV0dXJuIGNvbHVtbnNfdG9fZHJvcAoKCiMgRXhhbXBsZSB1c2FnZToKZGYxID0gcGQuRGF0YUZyYW1lKHsnQSc6IFsxLCAyLCAzXSwgJ0InOiBbMCwgMCwgMF19KQpkZjIgPSBwZC5EYXRhRnJhbWUoeydBJzogWzEsIDIsIDNdLCAnQic6IFswLCAwLCAwXX0pCgpjb2x1bW5zX3RvX2Ryb3AgPSBhbmFseXplX3plcm9fdmFyaWFuY2UoW2RmMSwgZGYyXSkKCiMgT3V0cHV0OgojIENvbW1vbiBjb2x1bW5zOiB7J0InLCAnQSd9CgpwcmludCgiQ29sdW1ucyB0byBkcm9wOiIsIGNvbHVtbnNfdG9fZHJvcCkK