<?php
namespace com\whitemagicsoftware;
require "constants.php";
use Imagick;
class Scan extends Obj {
public function distill( $path ) {
return $this->interpret( $this->recognize( $path ) );
}
public function recognize( $path ) {
global $EXECUTABLE_OCR;
$language = "eng";
$ext = pathinfo( $path, PATHINFO_EXTENSION );
$this->resample( $path );
$command = "$EXECUTABLE_OCR $path stdout -l $language -psm 1";
exec( "$command", $output, $exec_result );
return array_filter( $output );
}
private function resample( $path ) {
$im = new Imagick();
$im->setResolution( 300, 300 );
$im->readImage( $path );
$im->trimImage( 0 );
$w = $im->getImageWidth();
if( $w < 1024 ) {
$im->setImageColorspace( Imagick::COLORSPACE_REC601LUMA );
$im->modulateImage( 100, 0, 100 );
$im->resizeImage( $w * 3, 0, Imagick::INTERPOLATE_NEARESTNEIGHBOR, 1 );
}
$im->setFormat("png24");
file_put_contents( $path, $im );
$im->destroy();
}
public function interpret( $rawOcr ) {
$text = "";
$result = "";
$space = " ";
foreach( $rawOcr as $index => $text ) {
$text = trim( $text );
if( empty( $text ) ) {
$result = "$result\n";
}
else {
if( mb_substr( $text, -1 ) === "-" ) {
$text = mb_substr( $text, 0, -1 );
$result = "$result$space$text";
$space = "";
}
else {
$result = "$result$space$text";
$space = " ";
}
}
}
return $result;
}
}