#!/usr/bin/ruby -w
# -*- ruby -*-

require 'glark/log'

# -------------------------------------------------------
# File tester
# -------------------------------------------------------

class FileTester 
  include Loggable

  BINARY     = "binary"
  DIRECTORY  = "directory"
  NONE       = "none"
  TEXT       = "text"
  UNKNOWN    = "unknown"
  UNREADABLE = "unreadable"

  # the percentage of characters that we allow to be odd in a text file
  @@ODD_FACTOR = 0.3

  # how many bytes (characters) of a file we test
  @@TEST_LENGTH = 1024

  @@KNOWN = Hash.new

  # extensions associated with files that are always text:
  %w{ 
    c
    cpp
    css
    h
    f
    for
    fpp
    hpp
    html
    java
    mk
    php
    pl
    pm
    rb
    rbw
    txt
  }.each { |suf| @@KNOWN[suf] = true }

  # extensions associated with files that are never text:
  %w{ 
    Z
    a
    bz2
    elc
    gif
    gz
    jar
    jpeg
    jpg
    o
    obj
    pdf
    png
    ps
    tar
    zip
  }.each { |suf| @@KNOWN[suf] = false }

  def self.ascii?(c)
    # from ctype.h
    (c.to_i & ~0x7f) == 0
  end

  def self.type(file)
    begin
      case File.stat(file).ftype
      when "directory"
        DIRECTORY
      when "file"
        if File.readable?(file)
          FileTester.text?(file) ? TEXT : BINARY
        else
          UNREADABLE
        end
      else
        UNKNOWN
      end
    rescue Errno::ENOENT
      NONE
    end
  end

  def self.set_text(ext)
    @@KNOWN[ext] = true
  end

  def self.set_nontext(ext)
    @@KNOWN[ext] = false
  end

  def self.text_extensions
    @@KNOWN.keys.select { |suf| @@KNOWN[suf] }
  end

  def self.nontext_extensions
    @@KNOWN.keys.reject { |suf| @@KNOWN[suf] }
  end

  def self.text?(file)
    # Don't waste our time if it doesn't even exist:
    return false unless File.exists?(file)
    
    if file.index(/\.(\w+)\s*$/)
      suffix = $1
      if @@KNOWN.include?(suffix)
        return @@KNOWN[suffix]
      end
    end
    
    ntested = 0
    nodd = 0

    File.open(file) do |f|
      buf = f.read(@@TEST_LENGTH)
      if buf
        buf.each_byte do |ch|
          ntested += 1

          # never allow null in a text file
          return false if ch.to_i == 0
          
          nodd += 1 unless FileTester.ascii?(ch)
        end
      else
        # file had length of 0:
        return UNKNOWN
      end
    end
    FileTester.summary(nodd, ntested)
  end

  def self.summary(nodd, ntested)
    nodd < ntested * @@ODD_FACTOR
  end

end
