In [1]:
import os
import pandas
import pandas_profiling
import pandas as pd
In [2]:
from rdkit import Chem
from rdkit import RDConfig
from rdkit.Chem import rdBase 
from rdkit.Chem import Descriptors
from rdkit.Chem.Descriptors import _descList
from rdkit.ML.Descriptors import MoleculeDescriptors
In [3]:
print(rdBase.rdkitVersion)
datadir =  os.path.join( RDConfig.RDDocsDir, "Book/data/cdk2.sdf" )
2018.03.1
In [4]:
mols = [mol for mol in Chem.SDMolSupplier(datadir) if mol != None]
In [5]:
desc_name = [desc[0] for desc in _descList]
calc = MoleculeDescriptors.MolecularDescriptorCalculator(desc_name)
descs_list = [calc.CalcDescriptors(mol) for mol in mols]
In [6]:
print(len(descs_list))
print(len(descs_list[0]))
47
200
In [7]:
data = {}
for name in desc_name:
    data[name] = []
In [8]:
for descs in descs_list:
    for i, desc in enumerate(descs):
        data[desc_name[i]].append(desc)
In [9]:
df = pd.DataFrame(data)
In [10]:
print(df.shape)
(47, 200)
In [11]:
pandas_profiling.ProfileReport(df)
Out[11]:

Overview

Dataset info

Number of variables 200
Number of observations 47
Total Missing (%) 0.0%
Total size in memory 73.5 KiB
Average record size in memory 1.6 KiB

Variables types

Numeric 88
Categorical 0
Boolean 30
Date 0
Text (Unique) 0
Rejected 82
Unsupported 0

Warnings

Variables

BalabanJ
Numeric

Distinct count 47
Unique (%) 100.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 1.8103
Minimum 1.3819
Maximum 2.2301
Zeros (%) 0.0%

Quantile statistics

Minimum 1.3819
5-th percentile 1.4422
Q1 1.7177
Median 1.8139
Q3 1.9602
95-th percentile 2.1122
Maximum 2.2301
Range 0.84812
Interquartile range 0.24256

Descriptive statistics

Standard deviation 0.2222
Coef of variation 0.12274
Kurtosis -0.66544
Mean 1.8103
MAD 0.17499
Skewness -0.19796
Sum 85.084
Variance 0.049374
Memory size 456.0 B
Value Count Frequency (%)  
1.3819373749903585 1 2.1%
 
1.7615013546589138 1 2.1%
 
1.958285072599852 1 2.1%
 
1.9152512300224747 1 2.1%
 
1.7558787168786358 1 2.1%
 
1.9027960101581969 1 2.1%
 
1.772708398629469 1 2.1%
 
2.2094714831600495 1 2.1%
 
1.888811319292155 1 2.1%
 
1.4616715821922848 1 2.1%
 
Other values (37) 37 78.7%
 

Minimum 5 values

Value Count Frequency (%)  
1.3819373749903585 1 2.1%
 
1.4197594996856846 1 2.1%
 
1.4403844276954734 1 2.1%
 
1.4465903403303453 1 2.1%
 
1.4575107872011897 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
2.1033169853464777 1 2.1%
 
2.11063432318103 1 2.1%
 
2.112830775176637 1 2.1%
 
2.2094714831600495 1 2.1%
 
2.230053773831562 1 2.1%
 

BertzCT
Numeric

Distinct count 47
Unique (%) 100.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 910.07
Minimum 431.69
Maximum 1429.2
Zeros (%) 0.0%

Quantile statistics

Minimum 431.69
5-th percentile 537.16
Q1 779.62
Median 923.54
Q3 1067.9
95-th percentile 1183.5
Maximum 1429.2
Range 997.48
Interquartile range 288.25

Descriptive statistics

Standard deviation 214.91
Coef of variation 0.23615
Kurtosis -0.11151
Mean 910.07
MAD 167.91
Skewness -0.27822
Sum 42773
Variance 46186
Memory size 456.0 B
Value Count Frequency (%)  
1190.5524115796081 1 2.1%
 
919.782813876305 1 2.1%
 
863.473476037215 1 2.1%
 
751.5230858248627 1 2.1%
 
595.0654715822411 1 2.1%
 
1073.5346388880255 1 2.1%
 
1010.8350980656253 1 2.1%
 
1010.997808105413 1 2.1%
 
919.8612768919813 1 2.1%
 
576.8514995553256 1 2.1%
 
Other values (37) 37 78.7%
 

Minimum 5 values

Value Count Frequency (%)  
431.6869031615997 1 2.1%
 
520.3906562729576 1 2.1%
 
532.2473335919425 1 2.1%
 
548.6198751811987 1 2.1%
 
576.8514995553256 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
1156.4204088488125 1 2.1%
 
1167.0019697152761 1 2.1%
 
1190.5524115796081 1 2.1%
 
1244.460132560447 1 2.1%
 
1429.1689099763698 1 2.1%
 

Chi0
Numeric

Distinct count 44
Unique (%) 93.6%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 17.254
Minimum 11.665
Maximum 21.999
Zeros (%) 0.0%

Quantile statistics

Minimum 11.665
5-th percentile 12.385
Q1 15.315
Median 17.552
Q3 19.427
95-th percentile 21.677
Maximum 21.999
Range 10.334
Interquartile range 4.1118

Descriptive statistics

Standard deviation 2.9364
Coef of variation 0.17018
Kurtosis -0.8809
Mean 17.254
MAD 2.4626
Skewness -0.1985
Sum 810.95
Variance 8.6224
Memory size 456.0 B
Value Count Frequency (%)  
18.026733258565955 2 4.3%
 
15.97432701575866 2 4.3%
 
12.372032720186702 2 4.3%
 
14.535169427003234 1 2.1%
 
19.562267164498692 1 2.1%
 
21.69794165489025 1 2.1%
 
15.6480536021256 1 2.1%
 
15.363596551749426 1 2.1%
 
17.551677284948287 1 2.1%
 
21.79431797206757 1 2.1%
 
Other values (34) 34 72.3%
 

Minimum 5 values

Value Count Frequency (%)  
11.664925939000154 1 2.1%
 
12.372032720186702 2 4.3%
 
12.41384908344359 1 2.1%
 
12.535169427003233 1 2.1%
 
12.535169427003234 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
21.424074484064487 1 2.1%
 
21.629027554137913 1 2.1%
 
21.69794165489025 1 2.1%
 
21.79431797206757 1 2.1%
 
21.99927104214099 1 2.1%
 

Chi0n
Highly correlated

This variable is highly correlated with Chi0 and should be ignored for analysis

Correlation 0.95439

Chi0v
Highly correlated

This variable is highly correlated with Chi0n and should be ignored for analysis

Correlation 0.97055

Chi1
Highly correlated

This variable is highly correlated with Chi0v and should be ignored for analysis

Correlation 0.96555

Chi1n
Highly correlated

This variable is highly correlated with Chi1 and should be ignored for analysis

Correlation 0.93096

Chi1v
Highly correlated

This variable is highly correlated with Chi1 and should be ignored for analysis

Correlation 0.92115

Chi2n
Highly correlated

This variable is highly correlated with Chi1n and should be ignored for analysis

Correlation 0.9855

Chi2v
Highly correlated

This variable is highly correlated with Chi1v and should be ignored for analysis

Correlation 0.98354

Chi3n
Highly correlated

This variable is highly correlated with Chi2n and should be ignored for analysis

Correlation 0.96028

Chi3v
Highly correlated

This variable is highly correlated with Chi2v and should be ignored for analysis

Correlation 0.96444

Chi4n
Highly correlated

This variable is highly correlated with Chi3n and should be ignored for analysis

Correlation 0.97588

Chi4v
Highly correlated

This variable is highly correlated with Chi3v and should be ignored for analysis

Correlation 0.97064

EState_VSA1
Numeric

Distinct count 16
Unique (%) 34.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 5.4634
Minimum 0
Maximum 22.981
Zeros (%) 48.9%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 0
Median 4.9233
Q3 10.023
95-th percentile 16.531
Maximum 22.981
Range 22.981
Interquartile range 10.023

Descriptive statistics

Standard deviation 6.3781
Coef of variation 1.1674
Kurtosis -0.21888
Mean 5.4634
MAD 5.3701
Skewness 0.86892
Sum 256.78
Variance 40.68
Memory size 456.0 B
Value Count Frequency (%)  
0.0 23 48.9%
 
10.023291153407584 8 17.0%
 
6.031114512338072 3 6.4%
 
6.103966387748303 1 2.1%
 
17.450662825810237 1 2.1%
 
16.02530658838506 1 2.1%
 
4.923311048817671 1 2.1%
 
9.837253136417502 1 2.1%
 
22.981138120153723 1 2.1%
 
15.982845722151419 1 2.1%
 
Other values (6) 6 12.8%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 23 48.9%
 
4.923311048817671 1 2.1%
 
5.817220841045895 1 2.1%
 
5.907179729351506 1 2.1%
 
5.969305287951849 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
15.982845722151419 1 2.1%
 
16.02530658838506 1 2.1%
 
16.74839682433057 1 2.1%
 
17.450662825810237 1 2.1%
 
22.981138120153723 1 2.1%
 

EState_VSA10
Numeric

Distinct count 20
Unique (%) 42.6%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 9.2329
Minimum 0
Maximum 23.425
Zeros (%) 17.0%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 4.7945
Median 9.5891
Q3 13.212
95-th percentile 19.991
Maximum 23.425
Range 23.425
Interquartile range 8.4178

Descriptive statistics

Standard deviation 6.3439
Coef of variation 0.68709
Kurtosis -0.48901
Mean 9.2329
MAD 5.026
Skewness 0.26443
Sum 433.95
Variance 40.245
Memory size 456.0 B
Value Count Frequency (%)  
0.0 8 17.0%
 
9.589074368143644 7 14.9%
 
13.212334168400758 7 14.9%
 
4.794537184071822 6 12.8%
 
5.106527394840706 3 6.4%
 
9.184952231746642 2 4.3%
 
14.908855452837393 1 2.1%
 
18.00687135247258 1 2.1%
 
22.559621226162943 1 2.1%
 
12.808212032003757 1 2.1%
 
Other values (10) 10 21.3%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 8 17.0%
 
4.794537184071822 6 12.8%
 
4.907065243988282 1 2.1%
 
5.106527394840706 3 6.4%
 
8.417796984328938 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
18.53211525309451 1 2.1%
 
19.703392636909214 1 2.1%
 
20.11411936859394 1 2.1%
 
22.559621226162943 1 2.1%
 
23.425388958082166 1 2.1%
 

EState_VSA11
Constant

This variable is constant and should be ignored for analysis

Constant value 0

EState_VSA2
Numeric

Distinct count 34
Unique (%) 72.3%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 13.552
Minimum 0
Maximum 39.385
Zeros (%) 8.5%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 5.9278
Median 11.724
Q3 17.939
95-th percentile 29.564
Maximum 39.385
Range 39.385
Interquartile range 12.011

Descriptive statistics

Standard deviation 9.1961
Coef of variation 0.67858
Kurtosis 0.42712
Mean 13.552
MAD 7.2973
Skewness 0.79539
Sum 636.94
Variance 84.568
Memory size 456.0 B
Value Count Frequency (%)  
5.783244946364939 5 10.6%
 
0.0 4 8.5%
 
10.80266320486928 3 6.4%
 
23.333576669940456 2 4.3%
 
5.907179729351506 2 4.3%
 
5.948339280986494 2 4.3%
 
6.606881964512918 2 4.3%
 
11.594566004035068 1 2.1%
 
15.785682840813315 1 2.1%
 
30.13636057446112 1 2.1%
 
Other values (24) 24 51.1%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 4 8.5%
 
4.895483475517775 1 2.1%
 
5.783244946364939 5 10.6%
 
5.907179729351506 2 4.3%
 
5.948339280986494 2 4.3%
 

Maximum 5 values

Value Count Frequency (%)  
27.51232375670481 1 2.1%
 
28.22851544154929 1 2.1%
 
30.13636057446112 1 2.1%
 
35.194554474408555 1 2.1%
 
39.38454138488406 1 2.1%
 

EState_VSA3
Numeric

Distinct count 41
Unique (%) 87.2%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 23.795
Minimum 5.5731
Maximum 47.768
Zeros (%) 0.0%

Quantile statistics

Minimum 5.5731
5-th percentile 11.138
Q1 16.938
Median 23.651
Q3 29.993
95-th percentile 41.157
Maximum 47.768
Range 42.195
Interquartile range 13.054

Descriptive statistics

Standard deviation 10.13
Coef of variation 0.42571
Kurtosis -0.57013
Mean 23.795
MAD 8.4334
Skewness 0.25848
Sum 1118.3
Variance 102.61
Memory size 456.0 B
Value Count Frequency (%)  
33.76559673947147 3 6.4%
 
11.26049080475283 3 6.4%
 
29.568654285493672 2 4.3%
 
16.93822404106412 2 4.3%
 
29.115114325973902 1 2.1%
 
22.02328619531015 1 2.1%
 
17.19917704736705 1 2.1%
 
24.855714870646676 1 2.1%
 
28.202145247774475 1 2.1%
 
23.65074823933228 1 2.1%
 
Other values (31) 31 66.0%
 

Minimum 5 values

Value Count Frequency (%)  
5.573104530069267 1 2.1%
 
5.948339280986494 1 2.1%
 
11.126902983393991 1 2.1%
 
11.16387793838399 1 2.1%
 
11.26049080475283 3 6.4%
 

Maximum 5 values

Value Count Frequency (%)  
35.51699356648017 1 2.1%
 
41.04689981183387 1 2.1%
 
41.20437984116373 1 2.1%
 
42.564349214943235 1 2.1%
 
47.76806352096108 1 2.1%
 

EState_VSA4
Numeric

Distinct count 31
Unique (%) 66.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 14.665
Minimum 0
Maximum 48.234
Zeros (%) 14.9%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 5.5635
Median 11.335
Q3 21.988
95-th percentile 35.546
Maximum 48.234
Range 48.234
Interquartile range 16.424

Descriptive statistics

Standard deviation 11.92
Coef of variation 0.81286
Kurtosis 0.84907
Mean 14.665
MAD 9.3584
Skewness 0.98201
Sum 689.25
Variance 142.1
Memory size 456.0 B
Value Count Frequency (%)  
0.0 7 14.9%
 
5.563451491696996 4 8.5%
 
12.841643245852019 3 6.4%
 
11.250837766380558 3 6.4%
 
11.3129633249809 3 6.4%
 
28.416347479996674 2 4.3%
 
12.611123474374185 1 2.1%
 
11.33456349545129 1 2.1%
 
19.448525210364938 1 2.1%
 
17.58469956131 1 2.1%
 
Other values (21) 21 44.7%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 7 14.9%
 
4.472719515832414 1 2.1%
 
4.877147193701299 1 2.1%
 
5.563451491696996 4 8.5%
 
5.687386274683562 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
28.450472974722597 1 2.1%
 
33.62857851615511 1 2.1%
 
36.368140367983614 1 2.1%
 
47.321176000749816 1 2.1%
 
48.23438569513149 1 2.1%
 

EState_VSA5
Numeric

Distinct count 31
Unique (%) 66.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 15.165
Minimum 0
Maximum 51.367
Zeros (%) 19.1%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 5.8149
Median 12.133
Q3 21.764
95-th percentile 43.254
Maximum 51.367
Range 51.367
Interquartile range 15.949

Descriptive statistics

Standard deviation 13.288
Coef of variation 0.87623
Kurtosis 0.31663
Mean 15.165
MAD 10.672
Skewness 0.9114
Sum 712.77
Variance 176.58
Memory size 456.0 B
Value Count Frequency (%)  
0.0 9 19.1%
 
6.3273200747645415 3 6.4%
 
19.18040611960041 2 4.3%
 
19.26246486877803 2 4.3%
 
32.10410811463005 2 4.3%
 
24.26546827384644 2 4.3%
 
11.336785877934737 2 4.3%
 
12.13273413692322 2 4.3%
 
17.533629449547814 1 2.1%
 
44.23684225155327 1 2.1%
 
Other values (21) 21 44.7%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 9 19.1%
 
4.899909730850478 1 2.1%
 
5.008912523954532 1 2.1%
 
5.563451491696996 1 2.1%
 
6.06636706846161 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
32.10410811463005 2 4.3%
 
41.307765179992415 1 2.1%
 
44.08838122925306 1 2.1%
 
44.23684225155327 1 2.1%
 
51.36657298340809 1 2.1%
 

EState_VSA6
Numeric

Distinct count 29
Unique (%) 61.7%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 15.512
Minimum 0
Maximum 42.042
Zeros (%) 17.0%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 6.3273
Median 18.33
Q3 24.265
95-th percentile 30.423
Maximum 42.042
Range 42.042
Interquartile range 17.938

Descriptive statistics

Standard deviation 10.945
Coef of variation 0.70558
Kurtosis -0.82097
Mean 15.512
MAD 9.3732
Skewness 0.13124
Sum 729.06
Variance 119.79
Memory size 456.0 B
Value Count Frequency (%)  
0.0 8 17.0%
 
6.3273200747645415 6 12.8%
 
24.26546827384644 3 6.4%
 
18.33268902674367 2 4.3%
 
12.13273413692322 2 4.3%
 
30.33183534230805 2 4.3%
 
6.196843571613076 2 4.3%
 
24.526421280149375 1 2.1%
 
18.3295777085363 1 2.1%
 
18.329577708536295 1 2.1%
 
Other values (19) 19 40.4%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 8 17.0%
 
6.06636706846161 1 2.1%
 
6.196843571613076 2 4.3%
 
6.3273200747645415 6 12.8%
 
12.13273413692322 2 4.3%
 

Maximum 5 values

Value Count Frequency (%)  
29.77600754746382 1 2.1%
 
30.33183534230805 2 4.3%
 
30.462311845459517 1 2.1%
 
33.33787564355514 1 2.1%
 
42.04232950574588 1 2.1%
 

EState_VSA7
Numeric

Distinct count 26
Unique (%) 55.3%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 17.535
Minimum 0
Maximum 80.148
Zeros (%) 29.8%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 0
Median 12.394
Q3 28.91
95-th percentile 46.776
Maximum 80.148
Range 80.148
Interquartile range 28.91

Descriptive statistics

Standard deviation 18.307
Coef of variation 1.044
Kurtosis 2.8178
Mean 17.535
MAD 14.077
Skewness 1.4641
Sum 824.15
Variance 335.13
Memory size 456.0 B
Value Count Frequency (%)  
0.0 14 29.8%
 
12.13273413692322 4 8.5%
 
24.26546827384644 3 6.4%
 
6.923737199690624 2 4.3%
 
18.19910120538483 2 4.3%
 
30.33183534230805 2 4.3%
 
42.680042321019044 1 2.1%
 
23.579163975850747 1 2.1%
 
80.14755589061275 1 2.1%
 
20.771211599071872 1 2.1%
 
Other values (16) 16 34.0%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 14 29.8%
 
6.06636706846161 1 2.1%
 
6.3273200747645415 1 2.1%
 
6.923737199690624 2 4.3%
 
7.04767198267719 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
36.39820241076966 1 2.1%
 
42.680042321019044 1 2.1%
 
48.53093654769288 1 2.1%
 
72.11857899371999 1 2.1%
 
80.14755589061275 1 2.1%
 

EState_VSA8
Numeric

Distinct count 33
Unique (%) 70.2%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 21.262
Minimum 0
Maximum 51.566
Zeros (%) 2.1%

Quantile statistics

Minimum 0
5-th percentile 10.301
Q1 15.32
Median 20.269
Q3 25.437
95-th percentile 38.919
Maximum 51.566
Range 51.566
Interquartile range 10.117

Descriptive statistics

Standard deviation 9.9968
Coef of variation 0.47017
Kurtosis 1.207
Mean 21.262
MAD 7.4506
Skewness 0.8501
Sum 999.33
Variance 99.936
Memory size 456.0 B
Value Count Frequency (%)  
10.30076712495354 4 8.5%
 
15.514152220608405 3 6.4%
 
19.935914083788834 3 6.4%
 
25.252702687795164 3 6.4%
 
10.633577208012662 2 4.3%
 
20.268724166847957 2 4.3%
 
20.339650593411957 2 4.3%
 
15.284745645900749 2 4.3%
 
15.617555728959871 2 4.3%
 
7.04767198267719 1 2.1%
 
Other values (23) 23 48.9%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 1 2.1%
 
7.04767198267719 1 2.1%
 
10.30076712495354 4 8.5%
 
10.633577208012662 2 4.3%
 
15.14483896975307 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
34.1161985662292 1 2.1%
 
37.71824690777751 1 2.1%
 
39.432987170235535 1 2.1%
 
45.19981260681264 1 2.1%
 
51.56572130715875 1 2.1%
 

EState_VSA9
Numeric

Distinct count 15
Unique (%) 31.9%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 6.4339
Minimum 0
Maximum 16.204
Zeros (%) 25.5%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 2.2086
Median 4.7369
Q3 10.471
95-th percentile 15.775
Maximum 16.204
Range 16.204
Interquartile range 8.262

Descriptive statistics

Standard deviation 5.2172
Coef of variation 0.8109
Kurtosis -0.94327
Mean 6.4339
MAD 4.4307
Skewness 0.36317
Sum 302.39
Variance 27.219
Memory size 456.0 B
Value Count Frequency (%)  
0.0 12 25.5%
 
4.736862953800049 10 21.3%
 
10.470530430962235 6 12.8%
 
5.138973737607942 3 6.4%
 
16.20419790812442 2 4.3%
 
4.417150937053347 2 4.3%
 
11.600939890232516 2 4.3%
 
9.87583669140799 2 4.3%
 
14.951935562841626 2 4.3%
 
15.207393384762284 1 2.1%
 
Other values (5) 5 10.6%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 12 25.5%
 
4.417150937053347 2 4.3%
 
4.736862953800049 10 21.3%
 
5.106527394840706 1 2.1%
 
5.138973737607942 3 6.4%
 

Maximum 5 values

Value Count Frequency (%)  
11.600939890232516 2 4.3%
 
14.951935562841626 2 4.3%
 
15.207393384762284 1 2.1%
 
16.018090827285864 1 2.1%
 
16.20419790812442 2 4.3%
 

ExactMolWt
Highly correlated

This variable is highly correlated with Chi2v and should be ignored for analysis

Correlation 0.91876

FpDensityMorgan1
Numeric

Distinct count 38
Unique (%) 80.9%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 1.1857
Minimum 0.875
Maximum 1.5294
Zeros (%) 0.0%

Quantile statistics

Minimum 0.875
5-th percentile 0.97667
Q1 1.1001
Median 1.16
Q3 1.2638
95-th percentile 1.485
Maximum 1.5294
Range 0.65441
Interquartile range 0.16366

Descriptive statistics

Standard deviation 0.15282
Coef of variation 0.12889
Kurtosis 0.084907
Mean 1.1857
MAD 0.11605
Skewness 0.49546
Sum 55.727
Variance 0.023353
Memory size 456.0 B
Value Count Frequency (%)  
1.1851851851851851 3 6.4%
 
1.0 3 6.4%
 
1.1304347826086956 2 4.3%
 
1.1071428571428572 2 4.3%
 
1.5294117647058822 2 4.3%
 
1.1538461538461537 2 4.3%
 
1.3888888888888888 2 4.3%
 
1.2777777777777777 1 2.1%
 
1.103448275862069 1 2.1%
 
1.0833333333333333 1 2.1%
 
Other values (28) 28 59.6%
 

Minimum 5 values

Value Count Frequency (%)  
0.875 1 2.1%
 
0.9130434782608695 1 2.1%
 
0.9666666666666667 1 2.1%
 
1.0 3 6.4%
 
1.0384615384615385 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
1.3888888888888888 2 4.3%
 
1.4 1 2.1%
 
1.45 1 2.1%
 
1.5 1 2.1%
 
1.5294117647058822 2 4.3%
 

FpDensityMorgan2
Highly correlated

This variable is highly correlated with FpDensityMorgan1 and should be ignored for analysis

Correlation 0.94207

FpDensityMorgan3
Highly correlated

This variable is highly correlated with FpDensityMorgan2 and should be ignored for analysis

Correlation 0.95303

FractionCSP3
Numeric

Distinct count 28
Unique (%) 59.6%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 0.24271
Minimum 0
Maximum 0.75
Zeros (%) 14.9%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 0.060662
Median 0.23529
Q3 0.4
95-th percentile 0.56486
Maximum 0.75
Range 0.75
Interquartile range 0.33934

Descriptive statistics

Standard deviation 0.19734
Coef of variation 0.81309
Kurtosis -0.59846
Mean 0.24271
MAD 0.16869
Skewness 0.48978
Sum 11.407
Variance 0.038945
Memory size 456.0 B
Value Count Frequency (%)  
0.0 7 14.9%
 
0.3888888888888889 3 6.4%
 
0.4 3 6.4%
 
0.2857142857142857 3 6.4%
 
0.05555555555555555 2 4.3%
 
0.11764705882352941 2 4.3%
 
0.17647058823529413 2 4.3%
 
0.23529411764705882 2 4.3%
 
0.42105263157894735 2 4.3%
 
0.0625 2 4.3%
 
Other values (18) 19 40.4%
 

Minimum 5 values

Value Count Frequency (%)  
0.0 7 14.9%
 
0.05 1 2.1%
 
0.05555555555555555 2 4.3%
 
0.058823529411764705 2 4.3%
 
0.0625 2 4.3%
 

Maximum 5 values

Value Count Frequency (%)  
0.5 1 2.1%
 
0.5217391304347826 1 2.1%
 
0.5833333333333334 1 2.1%
 
0.6363636363636364 1 2.1%
 
0.75 1 2.1%
 

HallKierAlpha
Numeric

Distinct count 43
Unique (%) 91.5%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean -2.79
Minimum -3.82
Maximum -1.34
Zeros (%) 0.0%

Quantile statistics

Minimum -3.82
5-th percentile -3.638
Q1 -3.19
Median -2.88
Q3 -2.38
95-th percentile -1.89
Maximum -1.34
Range 2.48
Interquartile range 0.81

Descriptive statistics

Standard deviation 0.55408
Coef of variation -0.1986
Kurtosis -0.14428
Mean -2.79
MAD 0.4417
Skewness 0.33081
Sum -131.13
Variance 0.30701
Memory size 456.0 B
Value Count Frequency (%)  
-2.82 2 4.3%
 
-2.91 2 4.3%
 
-1.89 2 4.3%
 
-2.67 2 4.3%
 
-2.6699999999999995 1 2.1%
 
-2.3799999999999994 1 2.1%
 
-2.3800000000000003 1 2.1%
 
-2.63 1 2.1%
 
-2.2299999999999995 1 2.1%
 
-2.8799999999999994 1 2.1%
 
Other values (33) 33 70.2%
 

Minimum 5 values

Value Count Frequency (%)  
-3.8200000000000007 1 2.1%
 
-3.759999999999999 1 2.1%
 
-3.6499999999999995 1 2.1%
 
-3.61 1 2.1%
 
-3.4899999999999998 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
-2.11 1 2.1%
 
-1.92 1 2.1%
 
-1.89 2 4.3%
 
-1.8499999999999999 1 2.1%
 
-1.34 1 2.1%
 

HeavyAtomCount
Highly correlated

This variable is highly correlated with ExactMolWt and should be ignored for analysis

Correlation 0.95542

HeavyAtomMolWt
Highly correlated

This variable is highly correlated with HeavyAtomCount and should be ignored for analysis

Correlation 0.93953

Ipc
Numeric

Distinct count 46
Unique (%) 97.9%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 2605100
Minimum 8906.1
Maximum 19964000
Zeros (%) 0.0%

Quantile statistics

Minimum 8906.1
5-th percentile 19126
Q1 167480
Median 549150
Q3 2700500
95-th percentile 15678000
Maximum 19964000
Range 19955000
Interquartile range 2533000

Descriptive statistics

Standard deviation 4731100
Coef of variation 1.8161
Kurtosis 6.1848
Mean 2605100
MAD 3051900
Skewness 2.6
Sum 122440000
Variance 22384000000000
Memory size 456.0 B
Value Count Frequency (%)  
26268.36467421516 2 4.3%
 
210862.91909084813 1 2.1%
 
490184.0314000329 1 2.1%
 
12924894.787254732 1 2.1%
 
4335492.234215635 1 2.1%
 
549153.7430934445 1 2.1%
 
13745.612645350257 1 2.1%
 
1253536.0756285046 1 2.1%
 
44578.78719746831 1 2.1%
 
3163133.936023198 1 2.1%
 
Other values (36) 36 76.6%
 

Minimum 5 values

Value Count Frequency (%)  
8906.082188589473 1 2.1%
 
13745.612645350257 1 2.1%
 
17122.061661363517 1 2.1%
 
23801.971190025786 1 2.1%
 
26205.446935544096 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
8057941.354783535 1 2.1%
 
12924894.787254732 1 2.1%
 
16857273.83867562 1 2.1%
 
17146666.61370909 1 2.1%
 
19964091.21966194 1 2.1%
 

Kappa1
Highly correlated

This variable is highly correlated with HeavyAtomMolWt and should be ignored for analysis

Correlation 0.92616

Kappa2
Highly correlated

This variable is highly correlated with Kappa1 and should be ignored for analysis

Correlation 0.92852

Kappa3
Highly correlated

This variable is highly correlated with Kappa2 and should be ignored for analysis

Correlation 0.93825

LabuteASA
Highly correlated

This variable is highly correlated with Kappa1 and should be ignored for analysis

Correlation 0.9632

MaxAbsEStateIndex
Numeric

Distinct count 47
Unique (%) 100.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 11.053
Minimum 4.6384
Maximum 14.487
Zeros (%) 0.0%

Quantile statistics

Minimum 4.6384
5-th percentile 5.1329
Q1 10.31
Median 12.002
Q3 12.761
95-th percentile 14.075
Maximum 14.487
Range 9.8488
Interquartile range 2.4518

Descriptive statistics

Standard deviation 2.6954
Coef of variation 0.24387
Kurtosis 0.63909
Mean 11.053
MAD 2.0424
Skewness -1.293
Sum 519.49
Variance 7.2654
Memory size 456.0 B
Value Count Frequency (%)  
11.902578892668178 1 2.1%
 
12.263774644116907 1 2.1%
 
12.749223172293608 1 2.1%
 
11.56040299056675 1 2.1%
 
9.939345202212733 1 2.1%
 
12.388163081590662 1 2.1%
 
5.750416666666666 1 2.1%
 
5.60375 1 2.1%
 
14.487162462207102 1 2.1%
 
12.991849332325524 1 2.1%
 
Other values (37) 37 78.7%
 

Minimum 5 values

Value Count Frequency (%)  
4.638408919123204 1 2.1%
 
4.918883692365836 1 2.1%
 
4.931147329302092 1 2.1%
 
5.60375 1 2.1%
 
5.750416666666666 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
13.429094456729738 1 2.1%
 
13.85597556836481 1 2.1%
 
14.168993034733024 1 2.1%
 
14.195828845427059 1 2.1%
 
14.487162462207102 1 2.1%
 

MaxAbsPartialCharge
Numeric

Distinct count 46
Unique (%) 97.9%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 0.42703
Minimum 0.31988
Maximum 0.54476
Zeros (%) 0.0%

Quantile statistics

Minimum 0.31988
5-th percentile 0.3246
Q1 0.36086
Median 0.46794
Q3 0.49192
95-th percentile 0.50419
Maximum 0.54476
Range 0.22488
Interquartile range 0.13106

Descriptive statistics

Standard deviation 0.07015
Coef of variation 0.16427
Kurtosis -1.6377
Mean 0.42703
MAD 0.065711
Skewness -0.22502
Sum 20.071
Variance 0.004921
Memory size 456.0 B
Value Count Frequency (%)  
0.49676780293422446 2 4.3%
 
0.3942429776457718 1 2.1%
 
0.475649792651802 1 2.1%
 
0.5073749843389314 1 2.1%
 
0.36396344145897525 1 2.1%
 
0.47377150619523367 1 2.1%
 
0.49452310882520173 1 2.1%
 
0.33512722119697014 1 2.1%
 
0.5073731770211586 1 2.1%
 
0.36117679620892407 1 2.1%
 
Other values (36) 36 76.6%
 

Minimum 5 values

Value Count Frequency (%)  
0.319875074570425 1 2.1%
 
0.3199298862486836 1 2.1%
 
0.3241691545836485 1 2.1%
 
0.3256216644678915 1 2.1%
 
0.3314884111964799 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
0.49676780293422446 2 4.3%
 
0.4967682000196908 1 2.1%
 
0.5073731770211586 1 2.1%
 
0.5073749843389314 1 2.1%
 
0.5447586967593686 1 2.1%
 

MaxEStateIndex
Highly correlated

This variable is highly correlated with MaxAbsEStateIndex and should be ignored for analysis

Correlation 1

MaxPartialCharge
Numeric

Distinct count 47
Unique (%) 100.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 0.25511
Minimum 0.19659
Maximum 0.33949
Zeros (%) 0.0%

Quantile statistics

Minimum 0.19659
5-th percentile 0.20886
Q1 0.22727
Median 0.24718
Q3 0.2663
95-th percentile 0.33059
Maximum 0.33949
Range 0.1429
Interquartile range 0.039029

Descriptive statistics

Standard deviation 0.035007
Coef of variation 0.13723
Kurtosis 0.40272
Mean 0.25511
MAD 0.026073
Skewness 0.87782
Sum 11.99
Variance 0.0012255
Memory size 456.0 B
Value Count Frequency (%)  
0.3243585838266846 1 2.1%
 
0.24674795609732067 1 2.1%
 
0.24683321626963028 1 2.1%
 
0.226584694111752 1 2.1%
 
0.2467477700118147 1 2.1%
 
0.23754770438026707 1 2.1%
 
0.3394868459051923 1 2.1%
 
0.26896746349428763 1 2.1%
 
0.2272748198382875 1 2.1%
 
0.20375863775158506 1 2.1%
 
Other values (37) 37 78.7%
 

Minimum 5 values

Value Count Frequency (%)  
0.1965913936066256 1 2.1%
 
0.19784209098929767 1 2.1%
 
0.20375863775158506 1 2.1%
 
0.22076670748686214 1 2.1%
 
0.22636219130311688 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
0.31614116540951925 1 2.1%
 
0.3243585838266846 1 2.1%
 
0.33325471439319077 1 2.1%
 
0.3332795472970587 1 2.1%
 
0.3394868459051923 1 2.1%
 

MinAbsEStateIndex
Numeric

Distinct count 47
Unique (%) 100.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 0.1263
Minimum 0.00043514
Maximum 0.5685
Zeros (%) 0.0%

Quantile statistics

Minimum 0.00043514
5-th percentile 0.0063875
Q1 0.025311
Median 0.099763
Q3 0.17353
95-th percentile 0.44703
Maximum 0.5685
Range 0.56807
Interquartile range 0.14822

Descriptive statistics

Standard deviation 0.13397
Coef of variation 1.0607
Kurtosis 3.1728
Mean 0.1263
MAD 0.095823
Skewness 1.7752
Sum 5.9363
Variance 0.017948
Memory size 456.0 B
Value Count Frequency (%)  
0.19934258734357946 1 2.1%
 
0.0778170141513399 1 2.1%
 
0.05385601550443608 1 2.1%
 
0.027342559319922355 1 2.1%
 
0.005501753170404022 1 2.1%
 
0.1404626293686535 1 2.1%
 
0.13897014361300064 1 2.1%
 
0.00043514406068534406 1 2.1%
 
0.17398289871504136 1 2.1%
 
0.11772864701436103 1 2.1%
 
Other values (37) 37 78.7%
 

Minimum 5 values

Value Count Frequency (%)  
0.00043514406068534406 1 2.1%
 
0.0033228752168148468 1 2.1%
 
0.005501753170404022 1 2.1%
 
0.008454270597127334 1 2.1%
 
0.008618863517784181 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
0.26564319097254896 1 2.1%
 
0.4215512256836208 1 2.1%
 
0.4579482132359113 1 2.1%
 
0.5107353342571599 1 2.1%
 
0.5685010393046108 1 2.1%
 

MinAbsPartialCharge
Highly correlated

This variable is highly correlated with MaxPartialCharge and should be ignored for analysis

Correlation 1

MinEStateIndex
Numeric

Distinct count 47
Unique (%) 100.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean -1.1712
Minimum -4.423
Maximum 0.5685
Zeros (%) 0.0%

Quantile statistics

Minimum -4.423
5-th percentile -3.7964
Q1 -3.1858
Median -0.40149
Q3 -0.074142
95-th percentile 0.44703
Maximum 0.5685
Range 4.9915
Interquartile range 3.1116

Descriptive statistics

Standard deviation 1.6197
Coef of variation -1.383
Kurtosis -0.94199
Mean -1.1712
MAD 1.3992
Skewness -0.90899
Sum -55.047
Variance 2.6235
Memory size 456.0 B
Value Count Frequency (%)  
0.19934258734357946 1 2.1%
 
-3.7366326423246066 1 2.1%
 
-0.42344381620900173 1 2.1%
 
-0.4901570767195764 1 2.1%
 
-0.5121725101220622 1 2.1%
 
-0.706792345539039 1 2.1%
 
-0.21605552818260243 1 2.1%
 
-0.3286177324592723 1 2.1%
 
0.027342559319922355 1 2.1%
 
-1.3454907659967874 1 2.1%
 
Other values (37) 37 78.7%
 

Minimum 5 values

Value Count Frequency (%)  
-4.422979484231964 1 2.1%
 
-3.862497818514457 1 2.1%
 
-3.815147725160698 1 2.1%
 
-3.7527354218202444 1 2.1%
 
-3.7459285144410086 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
0.21332792684975233 1 2.1%
 
0.4215512256836208 1 2.1%
 
0.4579482132359113 1 2.1%
 
0.5107353342571599 1 2.1%
 
0.5685010393046108 1 2.1%
 

MinPartialCharge
Numeric

Distinct count 46
Unique (%) 97.9%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean -0.42703
Minimum -0.54476
Maximum -0.31988
Zeros (%) 0.0%

Quantile statistics

Minimum -0.54476
5-th percentile -0.50419
Q1 -0.49192
Median -0.46794
Q3 -0.36086
95-th percentile -0.3246
Maximum -0.31988
Range 0.22488
Interquartile range 0.13106

Descriptive statistics

Standard deviation 0.07015
Coef of variation -0.16427
Kurtosis -1.6377
Mean -0.42703
MAD 0.065711
Skewness 0.22502
Sum -20.071
Variance 0.004921
Memory size 456.0 B
Value Count Frequency (%)  
-0.49676780293422446 2 4.3%
 
-0.3942429776457718 1 2.1%
 
-0.475649792651802 1 2.1%
 
-0.5073749843389314 1 2.1%
 
-0.36396344145897525 1 2.1%
 
-0.47377150619523367 1 2.1%
 
-0.49452310882520173 1 2.1%
 
-0.33512722119697014 1 2.1%
 
-0.5073731770211586 1 2.1%
 
-0.36117679620892407 1 2.1%
 
Other values (36) 36 76.6%
 

Minimum 5 values

Value Count Frequency (%)  
-0.5447586967593686 1 2.1%
 
-0.5073749843389314 1 2.1%
 
-0.5073731770211586 1 2.1%
 
-0.4967682000196908 1 2.1%
 
-0.49676780293422446 2 4.3%
 

Maximum 5 values

Value Count Frequency (%)  
-0.3314884111964799 1 2.1%
 
-0.3256216644678915 1 2.1%
 
-0.3241691545836485 1 2.1%
 
-0.3199298862486836 1 2.1%
 
-0.319875074570425 1 2.1%
 

MolLogP
Numeric

Distinct count 47
Unique (%) 100.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 2.4404
Minimum -0.4075
Maximum 4.4085
Zeros (%) 0.0%

Quantile statistics

Minimum -0.4075
5-th percentile 0.58772
Q1 1.8761
Median 2.5965
Q3 3.1886
95-th percentile 4.0081
Maximum 4.4085
Range 4.816
Interquartile range 1.3125

Descriptive statistics

Standard deviation 1.0694
Coef of variation 0.43821
Kurtosis -0.034565
Mean 2.4404
MAD 0.86096
Skewness -0.39568
Sum 114.7
Variance 1.1436
Memory size 456.0 B
Value Count Frequency (%)  
3.767900000000002 1 2.1%
 
2.3758999999999992 1 2.1%
 
4.233000000000002 1 2.1%
 
1.8941999999999999 1 2.1%
 
1.8188999999999997 1 2.1%
 
2.1258999999999997 1 2.1%
 
2.2289 1 2.1%
 
2.6415000000000006 1 2.1%
 
2.1138 1 2.1%
 
4.055600000000002 1 2.1%
 
Other values (37) 37 78.7%
 

Minimum 5 values

Value Count Frequency (%)  
-0.40750000000000036 1 2.1%
 
0.4928999999999992 1 2.1%
 
0.5389999999999999 1 2.1%
 
0.7013999999999998 1 2.1%
 
0.7535000000000005 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
3.843320000000001 1 2.1%
 
3.8972000000000016 1 2.1%
 
4.055600000000002 1 2.1%
 
4.233000000000002 1 2.1%
 
4.408500000000003 1 2.1%
 

MolMR
Highly correlated

This variable is highly correlated with LabuteASA and should be ignored for analysis

Correlation 0.99153

MolWt
Highly correlated

This variable is highly correlated with MolMR and should be ignored for analysis

Correlation 0.94531

NHOHCount
Numeric

Distinct count 6
Unique (%) 12.8%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 3.1489
Minimum 1
Maximum 6
Zeros (%) 0.0%

Quantile statistics

Minimum 1
5-th percentile 1
Q1 2
Median 3
Q3 4
95-th percentile 5
Maximum 6
Range 5
Interquartile range 2

Descriptive statistics

Standard deviation 1.2155
Coef of variation 0.38599
Kurtosis -0.35013
Mean 3.1489
MAD 0.94975
Skewness 0.0060791
Sum 148
Variance 1.4773
Memory size 456.0 B
Value Count Frequency (%)  
3 16 34.0%
 
4 12 25.5%
 
2 8 17.0%
 
5 5 10.6%
 
1 5 10.6%
 
6 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
1 5 10.6%
 
2 8 17.0%
 
3 16 34.0%
 
4 12 25.5%
 
5 5 10.6%
 

Maximum 5 values

Value Count Frequency (%)  
2 8 17.0%
 
3 16 34.0%
 
4 12 25.5%
 
5 5 10.6%
 
6 1 2.1%
 

NOCount
Numeric

Distinct count 7
Unique (%) 14.9%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 6.9362
Minimum 4
Maximum 10
Zeros (%) 0.0%

Quantile statistics

Minimum 4
5-th percentile 5
Q1 6
Median 7
Q3 8
95-th percentile 9
Maximum 10
Range 6
Interquartile range 2

Descriptive statistics

Standard deviation 1.3895
Coef of variation 0.20032
Kurtosis -0.3232
Mean 6.9362
MAD 1.0602
Skewness -0.033703
Sum 326
Variance 1.9306
Memory size 456.0 B
Value Count Frequency (%)  
7 15 31.9%
 
6 10 21.3%
 
8 8 17.0%
 
9 6 12.8%
 
5 5 10.6%
 
4 2 4.3%
 
10 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
4 2 4.3%
 
5 5 10.6%
 
6 10 21.3%
 
7 15 31.9%
 
8 8 17.0%
 

Maximum 5 values

Value Count Frequency (%)  
6 10 21.3%
 
7 15 31.9%
 
8 8 17.0%
 
9 6 12.8%
 
10 1 2.1%
 

NumAliphaticCarbocycles
Numeric

Distinct count 4
Unique (%) 8.5%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 0.34043
Minimum 0
Maximum 3
Zeros (%) 72.3%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 0
Median 0
Q3 1
95-th percentile 1
Maximum 3
Range 3
Interquartile range 1

Descriptive statistics

Standard deviation 0.63508
Coef of variation 1.8656
Kurtosis 5.9535
Mean 0.34043
MAD 0.49253
Skewness 2.2336
Sum 16
Variance 0.40333
Memory size 456.0 B
Value Count Frequency (%)  
0 34 72.3%
 
1 11 23.4%
 
3 1 2.1%
 
2 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
0 34 72.3%
 
1 11 23.4%
 
2 1 2.1%
 
3 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
0 34 72.3%
 
1 11 23.4%
 
2 1 2.1%
 
3 1 2.1%
 

NumAliphaticHeterocycles
Numeric

Distinct count 3
Unique (%) 6.4%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 0.59574
Minimum 0
Maximum 2
Zeros (%) 55.3%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 0
Median 0
Q3 1
95-th percentile 2
Maximum 2
Range 2
Interquartile range 1

Descriptive statistics

Standard deviation 0.7419
Coef of variation 1.2453
Kurtosis -0.68229
Mean 0.59574
MAD 0.65912
Skewness 0.82353
Sum 28
Variance 0.55042
Memory size 456.0 B
Value Count Frequency (%)  
0 26 55.3%
 
1 14 29.8%
 
2 7 14.9%
 

Minimum 5 values

Value Count Frequency (%)  
0 26 55.3%
 
1 14 29.8%
 
2 7 14.9%
 

Maximum 5 values

Value Count Frequency (%)  
0 26 55.3%
 
1 14 29.8%
 
2 7 14.9%
 

NumAliphaticRings
Numeric

Distinct count 4
Unique (%) 8.5%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 0.93617
Minimum 0
Maximum 3
Zeros (%) 29.8%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 0
Median 1
Q3 1
95-th percentile 2
Maximum 3
Range 3
Interquartile range 1

Descriptive statistics

Standard deviation 0.76341
Coef of variation 0.81546
Kurtosis -0.2581
Mean 0.93617
MAD 0.55772
Skewness 0.41571
Sum 44
Variance 0.58279
Memory size 456.0 B
Value Count Frequency (%)  
1 23 48.9%
 
0 14 29.8%
 
2 9 19.1%
 
3 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
0 14 29.8%
 
1 23 48.9%
 
2 9 19.1%
 
3 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
0 14 29.8%
 
1 23 48.9%
 
2 9 19.1%
 
3 1 2.1%
 

NumAromaticCarbocycles
Numeric

Distinct count 3
Unique (%) 6.4%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 1.1702
Minimum 0
Maximum 2
Zeros (%) 19.1%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 1
Median 1
Q3 2
95-th percentile 2
Maximum 2
Range 2
Interquartile range 1

Descriptive statistics

Standard deviation 0.73186
Coef of variation 0.62541
Kurtosis -1.0515
Mean 1.1702
MAD 0.60027
Skewness -0.27878
Sum 55
Variance 0.53562
Memory size 456.0 B
Value Count Frequency (%)  
1 21 44.7%
 
2 17 36.2%
 
0 9 19.1%
 

Minimum 5 values

Value Count Frequency (%)  
0 9 19.1%
 
1 21 44.7%
 
2 17 36.2%
 

Maximum 5 values

Value Count Frequency (%)  
0 9 19.1%
 
1 21 44.7%
 
2 17 36.2%
 

NumAromaticHeterocycles
Numeric

Distinct count 4
Unique (%) 8.5%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 1.4681
Minimum 0
Maximum 3
Zeros (%) 10.6%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 1
Median 2
Q3 2
95-th percentile 2.7
Maximum 3
Range 3
Interquartile range 1

Descriptive statistics

Standard deviation 0.77603
Coef of variation 0.5286
Kurtosis -0.31754
Mean 1.4681
MAD 0.67089
Skewness -0.18079
Sum 69
Variance 0.60222
Memory size 456.0 B
Value Count Frequency (%)  
2 21 44.7%
 
1 18 38.3%
 
0 5 10.6%
 
3 3 6.4%
 

Minimum 5 values

Value Count Frequency (%)  
0 5 10.6%
 
1 18 38.3%
 
2 21 44.7%
 
3 3 6.4%
 

Maximum 5 values

Value Count Frequency (%)  
0 5 10.6%
 
1 18 38.3%
 
2 21 44.7%
 
3 3 6.4%
 

NumAromaticRings
Numeric

Distinct count 4
Unique (%) 8.5%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 2.6383
Minimum 1
Maximum 4
Zeros (%) 0.0%

Quantile statistics

Minimum 1
5-th percentile 2
Q1 2
Median 3
Q3 3
95-th percentile 4
Maximum 4
Range 3
Interquartile range 1

Descriptive statistics

Standard deviation 0.67326
Coef of variation 0.25519
Kurtosis -0.25202
Mean 2.6383
MAD 0.58579
Skewness 0.13762
Sum 124
Variance 0.45328
Memory size 456.0 B
Value Count Frequency (%)  
3 23 48.9%
 
2 19 40.4%
 
4 4 8.5%
 
1 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
1 1 2.1%
 
2 19 40.4%
 
3 23 48.9%
 
4 4 8.5%
 

Maximum 5 values

Value Count Frequency (%)  
1 1 2.1%
 
2 19 40.4%
 
3 23 48.9%
 
4 4 8.5%
 

NumHAcceptors
Numeric

Distinct count 8
Unique (%) 17.0%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 5.4681
Minimum 2
Maximum 9
Zeros (%) 0.0%

Quantile statistics

Minimum 2
5-th percentile 3
Q1 4
Median 6
Q3 6
95-th percentile 7.7
Maximum 9
Range 7
Interquartile range 2

Descriptive statistics

Standard deviation 1.5583
Coef of variation 0.28498
Kurtosis 0.16816
Mean 5.4681
MAD 1.2467
Skewness -0.046074
Sum 257
Variance 2.4283
Memory size 456.0 B
Value Count Frequency (%)  
6 14 29.8%
 
5 9 19.1%
 
4 9 19.1%
 
7 8 17.0%
 
9 2 4.3%
 
3 2 4.3%
 
2 2 4.3%
 
8 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
2 2 4.3%
 
3 2 4.3%
 
4 9 19.1%
 
5 9 19.1%
 
6 14 29.8%
 

Maximum 5 values

Value Count Frequency (%)  
5 9 19.1%
 
6 14 29.8%
 
7 8 17.0%
 
8 1 2.1%
 
9 2 4.3%
 

NumHDonors
Numeric

Distinct count 5
Unique (%) 10.6%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 2.6383
Minimum 1
Maximum 5
Zeros (%) 0.0%

Quantile statistics

Minimum 1
5-th percentile 1
Q1 2
Median 3
Q3 3
95-th percentile 4
Maximum 5
Range 4
Interquartile range 1

Descriptive statistics

Standard deviation 0.87042
Coef of variation 0.32992
Kurtosis 0.39502
Mean 2.6383
MAD 0.70167
Skewness -0.033012
Sum 124
Variance 0.75763
Memory size 456.0 B
Value Count Frequency (%)  
3 24 51.1%
 
2 13 27.7%
 
1 5 10.6%
 
4 4 8.5%
 
5 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
1 5 10.6%
 
2 13 27.7%
 
3 24 51.1%
 
4 4 8.5%
 
5 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
1 5 10.6%
 
2 13 27.7%
 
3 24 51.1%
 
4 4 8.5%
 
5 1 2.1%
 

NumHeteroatoms
Numeric

Distinct count 6
Unique (%) 12.8%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 7.617
Minimum 5
Maximum 10
Zeros (%) 0.0%

Quantile statistics

Minimum 5
5-th percentile 5
Q1 7
Median 7
Q3 9
95-th percentile 10
Maximum 10
Range 5
Interquartile range 2

Descriptive statistics

Standard deviation 1.5259
Coef of variation 0.20033
Kurtosis -0.95524
Mean 7.617
MAD 1.2947
Skewness 0.07476
Sum 358
Variance 2.3284
Memory size 456.0 B
Value Count Frequency (%)  
7 14 29.8%
 
9 8 17.0%
 
10 7 14.9%
 
8 7 14.9%
 
6 7 14.9%
 
5 4 8.5%
 

Minimum 5 values

Value Count Frequency (%)  
5 4 8.5%
 
6 7 14.9%
 
7 14 29.8%
 
8 7 14.9%
 
9 8 17.0%
 

Maximum 5 values

Value Count Frequency (%)  
6 7 14.9%
 
7 14 29.8%
 
8 7 14.9%
 
9 8 17.0%
 
10 7 14.9%
 

NumRadicalElectrons
Constant

This variable is constant and should be ignored for analysis

Constant value 0

NumRotatableBonds
Numeric

Distinct count 9
Unique (%) 19.1%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 4.3617
Minimum 1
Maximum 10
Zeros (%) 0.0%

Quantile statistics

Minimum 1
5-th percentile 2
Q1 3
Median 4
Q3 6
95-th percentile 8
Maximum 10
Range 9
Interquartile range 3

Descriptive statistics

Standard deviation 1.9828
Coef of variation 0.4546
Kurtosis 0.11394
Mean 4.3617
MAD 1.6342
Skewness 0.67728
Sum 205
Variance 3.9315
Memory size 456.0 B
Value Count Frequency (%)  
3 12 25.5%
 
5 8 17.0%
 
6 7 14.9%
 
2 7 14.9%
 
4 6 12.8%
 
8 3 6.4%
 
7 2 4.3%
 
10 1 2.1%
 
1 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
1 1 2.1%
 
2 7 14.9%
 
3 12 25.5%
 
4 6 12.8%
 
5 8 17.0%
 

Maximum 5 values

Value Count Frequency (%)  
5 8 17.0%
 
6 7 14.9%
 
7 2 4.3%
 
8 3 6.4%
 
10 1 2.1%
 

NumSaturatedCarbocycles
Numeric

Distinct count 4
Unique (%) 8.5%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 0.21277
Minimum 0
Maximum 3
Zeros (%) 85.1%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 0
Median 0
Q3 0
95-th percentile 1
Maximum 3
Range 3
Interquartile range 0

Descriptive statistics

Standard deviation 0.58741
Coef of variation 2.7608
Kurtosis 11.889
Mean 0.21277
MAD 0.36215
Skewness 3.2952
Sum 10
Variance 0.34505
Memory size 456.0 B
Value Count Frequency (%)  
0 40 85.1%
 
1 5 10.6%
 
3 1 2.1%
 
2 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
0 40 85.1%
 
1 5 10.6%
 
2 1 2.1%
 
3 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
0 40 85.1%
 
1 5 10.6%
 
2 1 2.1%
 
3 1 2.1%
 

NumSaturatedHeterocycles
Boolean

Distinct count 2
Unique (%) 4.3%
Missing (%) 0.0%
Missing (n) 0
Mean 0.14894
0
40
1
 
7
Value Count Frequency (%)  
0 40 85.1%
 
1 7 14.9%
 

NumSaturatedRings
Numeric

Distinct count 4
Unique (%) 8.5%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 0.3617
Minimum 0
Maximum 3
Zeros (%) 70.2%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 0
Median 0
Q3 1
95-th percentile 1
Maximum 3
Range 3
Interquartile range 1

Descriptive statistics

Standard deviation 0.64016
Coef of variation 1.7699
Kurtosis 5.3794
Mean 0.3617
MAD 0.50792
Skewness 2.1049
Sum 17
Variance 0.40981
Memory size 456.0 B
Value Count Frequency (%)  
0 33 70.2%
 
1 12 25.5%
 
3 1 2.1%
 
2 1 2.1%
 

Minimum 5 values

Value Count Frequency (%)  
0 33 70.2%
 
1 12 25.5%
 
2 1 2.1%
 
3 1 2.1%
 

Maximum 5 values

Value Count Frequency (%)  
0 33 70.2%
 
1 12 25.5%
 
2 1 2.1%
 
3 1 2.1%
 

NumValenceElectrons
Highly correlated

This variable is highly correlated with MolWt and should be ignored for analysis

Correlation 0.94986

PEOE_VSA1
Numeric

Distinct count 27
Unique (%) 57.4%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 14.831
Minimum 4.7369
Maximum 30.208
Zeros (%) 0.0%

Quantile statistics

Minimum 4.7369
5-th percentile 4.9108
Q1 10.425
Median 15.051
Q3 20.126
95-th percentile 25.361
Maximum 30.208
Range 25.471
Interquartile range 9.7006

Descriptive statistics

Standard deviation 6.5659
Coef of variation 0.44272
Kurtosis -0.33004
Mean 14.831
MAD 5.1797
Skewness 0.23423
Sum 697.06
Variance 43.111
Memory size 456.0 B
Value Count Frequency (%)  
5.316788604006331 5 10.6%
 
15.037630078753587 5 10.6%
 
10.633577208012662 5 10.6%
 
20.307204250644723 3 6.4%
 
15.454508951909443 3 6.4%
 
4.736862953800049 3 6.4%
 
20.9343443329662 2 4.3%
 
10.216698334856808 2 4.3%
 
19.774493032553636 1 2.1%
 
29.733715726129624 1 2.1%
 
Other values (17) 17 36.2%
 

Minimum 5 values

Value Count Frequency (%)  
4.736862953800049 3 6.4%
 
5.316788604006331 5 10.6%
 
9.733939541059678 1 2.1%
 
10.05365155780638 1 2.1%
 
10.216698334856808 2 4.3%
 

Maximum 5 values

Value Count Frequency (%)  
21.52098651213075 1 2.1%
 
24.636642852425943 1 2.1%
 
25.67120728676625 1 2.1%
 
29.733715726129624 1 2.1%
 
30.208268829557248 1 2.1%
 

PEOE_VSA10
Numeric

Distinct count 18
Unique (%) 38.3%
Missing (%) 0.0%
Missing (n) 0
Infinite (%) 0.0%
Infinite (n) 0
Mean 5.9961
Minimum 0
Maximum 40.877
Zeros (%) 42.6%

Quantile statistics

Minimum 0
5-th percentile 0
Q1 0
Median 5.6939
Q3 6.6069
95-th percentile 21.184
Maximum 40.877
Range 40.877
Interquartile range 6.6069

Descriptive statistics

Standard deviation 8.2757
Coef of variation 1.3802
Kurtosis 7.2434
Mean 5.9961
MAD 5.2544
Skewness 2.4214
Sum 281.81
Variance 68.488
Memory size 456.0 B