<?php
namespace com\whitemagicsoftware;
class Normalizer {
private $url;
private $scheme;
private $host;
private $port;
private $user;
private $pass;
private $path;
private $query;
private $fragment;
private $default_scheme_ports = array( 'http:' => 80, 'https:' => 443, );
private $components = array( 'scheme', 'host', 'port', 'user', 'pass', 'path', 'query', 'fragment', );
public function __construct( $url=null ) {
if ( $url ) {
$this->setUrl( $url );
}
}
private function getQuery($query) {
$qs = array();
foreach($query as $qk => $qv) {
if(is_array($qv)) {
$qs[rawurldecode($qk)] = $this->getQuery($qv);
}
else {
$qs[rawurldecode($qk)] = rawurldecode($qv);
}
}
return $qs;
}
public function getUrl() {
return $this->url;
}
private function addScheme( $url, $scheme = "http://" ) {
return parse_url($url, PHP_URL_SCHEME) === null ?
$scheme . $url : $url;
}
public function setUrl( $url ) {
$this->url = $this->addScheme( $url );
$url_components = parse_url( $this->url );
if ( ! $url_components ) {
$this->url = '';
foreach ( $this->components as $key ) {
if ( property_exists( $this, $key ) ) {
$this->$key = '';
}
}
return false;
}
else {
foreach ( $url_components as $key => $value ) {
if ( property_exists( $this, $key ) ) {
$this->$key = $value;
}
}
$missing_components = array_diff (
array_values( $this->components ),
array_keys( $url_components )
);
foreach ( $missing_components as $key ) {
if ( property_exists( $this, $key ) ) {
$this->$key = '';
}
}
return true;
}
}
public function normalize() {
if ( $this->scheme ) {
$this->scheme = strtolower( $this->scheme ) . ':';
}
$authority = '';
if ( $this->host ) {
$authority .= '//';
if ( $this->user ) {
if ( $this->pass ) {
$authority .= $this->user . ':' . $this->pass . '@';
}
else {
$authority .= $this->user . '@';
}
}
$authority .= strtolower( $this->host );
if ( isset( $this->default_scheme_ports[$this->scheme] )
&& $this->port == $this->default_scheme_ports[$this->scheme]) {
$this->port = '';
}
if ( $this->port ) {
$authority .= ':' . $this->port;
}
}
if ( $this->path ) {
$this->path = $this->removeDotSegments( $this->path );
$this->path = $this->urlDecodeUnreservedChars( $this->path );
$this->path = $this->urlDecodeReservedSubDelimChars( $this->path );
}
elseif ( $this->url ) {
$this->path = '/';
}
if ( $this->query ) {
parse_str( $this->query, $query );
$qs = $this->getQuery($query);
$this->query = '?' . str_replace( '+', '%20', http_build_query( $qs, null, '&' ) );
$this->query = str_replace( '=&', '&', rtrim( $this->query, '=' ));
}
if ( $this->fragment ) {
$this->fragment = rawurldecode( $this->fragment );
$this->fragment = rawurlencode( $this->fragment );
$this->fragment = '#' . $this->fragment;
}
$this->setUrl( $this->scheme . $authority . $this->path . $this->query . $this->fragment );
return $this->getUrl();
}
public function removeDotSegments( $path ) {
$new_path = '';
while ( ! empty( $path ) ) {
$pattern_a = '!^(\.\./|\./)!x';
$pattern_b_1 = '!^(/\./)!x';
$pattern_b_2 = '!^(/\.)$!x';
$pattern_c = '!^(/\.\./|/\.\.)!x';
$pattern_d = '!^(\.|\.\.)$!x';
$pattern_e = '!(/*[^/]*)!x';
if ( preg_match( $pattern_a, $path ) ) {
$path = preg_replace( $pattern_a, '', $path );
}
elseif ( preg_match( $pattern_b_1, $path, $matches ) || preg_match( $pattern_b_2, $path, $matches ) ) {
$path = preg_replace( "!^" . $matches[1] . "!", '/', $path );
}
elseif ( preg_match( $pattern_c, $path, $matches ) ) {
$path = preg_replace( '!^' . preg_quote( $matches[1], '!' ) . '!x', '/', $path );
$new_path = preg_replace( '!/([^/]+)$!x', '', $new_path );
}
elseif ( preg_match( $pattern_d, $path ) ) {
$path = preg_replace( $pattern_d, '', $path );
}
else {
if ( preg_match( $pattern_e, $path, $matches ) ) {
$first_path_segment = $matches[1];
$path = preg_replace( '/^' . preg_quote( $first_path_segment, '/' ) . '/', '', $path, 1 );
$new_path .= $first_path_segment;
}
}
}
return $new_path;
}
public function getScheme() {
return $this->scheme;
}
public function urlDecodeUnreservedChars( $string ) {
$string = rawurldecode( $string );
$string = rawurlencode( $string );
$string = str_replace( array( '%2F', '%3A', '%40' ), array( '/', ':', '@' ), $string );
return $string;
}
public function urlDecodeReservedSubDelimChars( $string ) {
return str_replace( array( '%21', '%24', '%26', '%27', '%28', '%29', '%2A', '%2B', '%2C', '%3B', '%3D' ),
array( '!', '$', '&', "'", '(', ')', '*', '+', ',', ';', '=' ), $string );
}
}