Frequency Tables
The method .freq()
produces tables with relative frequency, joint
relative frequencies, and conditional relative frequencies. The joint
and conditional cases can combine multiple variables. The number of
cases (N), standard deviation (Std.Dev.), and lower (low) and upper
(high) 95% confidence intervals bounds based on normal approximation are
provided.
import tidypolars4sci as tp
from tidypolars4sci.data import mtcars as df
# Notes:
# - .arrange() below is just sorting the rows to facilitate visualization.
# - .print() is used just to display the full table.
# Relative frequencies: P(cyl)
df.freq('cyl').print()
shape: (3, 6)
┌──────────────────────────────────────────────┐
│ cyl N Freq Std.Dev. low high │
│ i64 i64 f64 f64 f64 f64 │
╞══════════════════════════════════════════════╡
│ 4 11 34.38 14.32 6.31 62.44 │
│ 6 7 21.88 15.62 -8.75 52.50 │
│ 8 14 43.75 13.26 17.76 69.74 │
└──────────────────────────────────────────────┘
shape: (6, 7)
┌─────────────────────────────────────────────────────┐
│ cyl am N Freq Std.Dev. low high │
│ i64 i64 i64 f64 f64 f64 f64 │
╞═════════════════════════════════════════════════════╡
│ 4 0 3 9.38 16.83 -23.61 42.36 │
│ 4 1 8 25.00 15.31 -5.01 55.01 │
│ 6 0 4 12.50 16.54 -19.91 44.91 │
│ 6 1 3 9.38 16.83 -23.61 42.36 │
│ 8 0 12 37.50 13.98 10.11 64.89 │
│ 8 1 2 6.25 17.12 -27.30 39.80 │
└─────────────────────────────────────────────────────┘
# Conditional relative frequencies given one variable: P(cyl | am)
df.freq('cyl', 'am').arrange('am').print()
shape: (6, 7)
┌─────────────────────────────────────────────────────┐
│ am cyl N Freq Std.Dev. low high │
│ i64 i64 i64 f64 f64 f64 f64 │
╞═════════════════════════════════════════════════════╡
│ 0 4 3 15.79 21.05 -25.47 57.05 │
│ 0 6 4 21.05 20.38 -18.90 61.01 │
│ 0 8 12 63.16 13.93 35.86 90.45 │
│ 1 4 8 61.54 17.20 27.83 95.25 │
│ 1 6 3 23.08 24.33 -24.60 70.75 │
│ 1 8 2 15.38 25.51 -34.62 65.39 │
└─────────────────────────────────────────────────────┘
# Conditional relative frequencies given two variables: P(cyl | am, carb)
df.freq('cyl', ['am', 'carb']).arrange('am', 'carb').print()
shape: (13, 8)
┌──────────────────────────────────────────────────────────────┐
│ am carb cyl N Freq Std.Dev. low high │
│ i64 i64 i64 i64 f64 f64 f64 f64 │
╞══════════════════════════════════════════════════════════════╡
│ 0 1 4 1 33.33 47.14 -59.06 125.73 │
│ 0 1 6 2 66.67 33.33 1.33 132.00 │
│ 0 2 4 2 33.33 33.33 -32.00 98.67 │
│ 0 2 8 4 66.67 23.57 20.47 112.86 │
│ 0 3 8 3 100.00 0.00 100.00 100.00 │
│ 0 4 6 2 28.57 31.94 -34.04 91.18 │
│ 0 4 8 5 71.43 20.20 31.83 111.03 │
│ 1 1 4 4 100.00 0.00 100.00 100.00 │
│ 1 2 4 4 100.00 0.00 100.00 100.00 │
│ 1 4 6 2 66.67 33.33 1.33 132.00 │
│ 1 4 8 1 33.33 47.14 -59.06 125.73 │
│ 1 6 6 1 100.00 0.00 100.00 100.00 │
│ 1 8 8 1 100.00 0.00 100.00 100.00 │
└──────────────────────────────────────────────────────────────┘
# Joint conditional relative frequencies given two variables: P(cyl, vs | am, carb)
df.freq(['cyl', 'vs'], ['am', 'carb']).arrange('am', 'carb').print()
shape: (14, 9)
┌────────────────────────────────────────────────────────────────────┐
│ am carb cyl vs N Freq Std.Dev. low high │
│ i64 i64 i64 i64 i64 f64 f64 f64 f64 │
╞════════════════════════════════════════════════════════════════════╡
│ 0 1 4 1 1 33.33 47.14 -59.06 125.73 │
│ 0 1 6 1 2 66.67 33.33 1.33 132.00 │
│ 0 2 4 1 2 33.33 33.33 -32.00 98.67 │
│ 0 2 8 0 4 66.67 23.57 20.47 112.86 │
│ 0 3 8 0 3 100.00 0.00 100.00 100.00 │
│ 0 4 6 1 2 28.57 31.94 -34.04 91.18 │
│ 0 4 8 0 5 71.43 20.20 31.83 111.03 │
│ 1 1 4 1 4 100.00 0.00 100.00 100.00 │
│ 1 2 4 0 1 25.00 43.30 -59.87 109.87 │
│ 1 2 4 1 3 75.00 25.00 26.00 124.00 │
│ 1 4 6 0 2 66.67 33.33 1.33 132.00 │
│ 1 4 8 0 1 33.33 47.14 -59.06 125.73 │
│ 1 6 6 0 1 100.00 0.00 100.00 100.00 │
│ 1 8 8 0 1 100.00 0.00 100.00 100.00 │
└────────────────────────────────────────────────────────────────────┘