Dave Jarvis' Repositories

git clone https://repo.autonoma.ca/repo/segmenter.git
# Given a CSV file of the form <text,natural number>, this writes a CSV
# file of relative probabilities, based on the maximum natural number found
# in the CSV file.
#
# Based on code by Dennis Williamson.
#
# Usage: awk -f probability.awk < filename.csv | sort -k2,2n -k1,1
#
BEGIN {
  OFS = FS = ","
}

{ a[$1] = $2 } $2 > max { max = $2 }

END {
  for( word in a ) print word, a[word] / max
}