Skip to content

Commit

Permalink
Merge pull request #111 from gitlost/issue_106
Browse files Browse the repository at this point in the history
PR 109 (double-width wrapping) and issue 106 (pre-colorization).
  • Loading branch information
miya0001 authored Jul 25, 2017
2 parents 2a15154 + 7466f68 commit 2d2b582
Show file tree
Hide file tree
Showing 6 changed files with 433 additions and 85 deletions.
83 changes: 42 additions & 41 deletions lib/cli/Colors.php
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ static public function color($color) {

$colors = array();
foreach (array('color', 'style', 'background') as $type) {
$code = @$color[$type];
$code = $color[$type];
if (isset(self::$_colors[$type][$code])) {
$colors[] = self::$_colors[$type][$code];
}
Expand All @@ -115,26 +115,25 @@ static public function color($color) {
static public function colorize($string, $colored = null) {
$passed = $string;

if (isset(self::$_string_cache[md5($passed)]['colorized'])) {
return self::$_string_cache[md5($passed)]['colorized'];
}

if (!self::shouldColorize($colored)) {
$colors = self::getColors();
$search = array_keys( $colors );
$return = str_replace( $search, '', $string );
self::cacheString($passed, $return, $colored);
$return = self::decolorize( $passed, 2 /*keep_encodings*/ );
self::cacheString($passed, $return);
return $return;
}

$md5 = md5($passed);
if (isset(self::$_string_cache[$md5]['colorized'])) {
return self::$_string_cache[$md5]['colorized'];
}

$string = str_replace('%%', '', $string);

foreach (self::getColors() as $key => $value) {
$string = str_replace($key, self::color($value), $string);
}

$string = str_replace('', '%', $string);
self::cacheString($passed, $string, $colored);
self::cacheString($passed, $string);

return $string;
}
Expand All @@ -143,15 +142,22 @@ static public function colorize($string, $colored = null) {
* Remove color information from a string.
*
* @param string $string A string with color information.
* @param int $keep Optional. If the 1 bit is set, color tokens (eg "%n") won't be stripped. If the 2 bit is set, color encodings (ANSI escapes) won't be stripped. Default 0.
* @return string A string with color information removed.
*/
static public function decolorize($string) {
// Get rid of color tokens if they exist
$string = str_replace(array_keys(self::getColors()), '', $string);
static public function decolorize( $string, $keep = 0 ) {
if ( ! ( $keep & 1 ) ) {
// Get rid of color tokens if they exist
$string = str_replace('%%', '', $string);
$string = str_replace(array_keys(self::getColors()), '', $string);
$string = str_replace('', '%', $string);
}

// Remove color encoding if it exists
foreach (self::getColors() as $key => $value) {
$string = str_replace(self::color($value), '', $string);
if ( ! ( $keep & 2 ) ) {
// Remove color encoding if it exists
foreach (self::getColors() as $key => $value) {
$string = str_replace(self::color($value), '', $string);
}
}

return $string;
Expand All @@ -162,13 +168,13 @@ static public function decolorize($string) {
*
* @param string $passed The original string before colorization.
* @param string $colorized The string after running through self::colorize.
* @param string $colored The string without any color information.
* @param string $deprecated Optional. Not used. Default null.
*/
static public function cacheString($passed, $colorized, $colored) {
static public function cacheString( $passed, $colorized, $deprecated = null ) {
self::$_string_cache[md5($passed)] = array(
'passed' => $passed,
'colorized' => $colorized,
'decolorized' => self::decolorize($passed)
'decolorized' => self::decolorize($passed), // Not very useful but keep for BC.
);
}

Expand All @@ -179,41 +185,36 @@ static public function cacheString($passed, $colorized, $colored) {
* @return int
*/
static public function length($string) {
if (isset(self::$_string_cache[md5($string)]['decolorized'])) {
$test_string = self::$_string_cache[md5($string)]['decolorized'];
} else {
$test_string = self::decolorize($string);
}

return safe_strlen($test_string);
return safe_strlen( self::decolorize( $string ) );
}

/**
* Return the width (length in characters) of the string without color codes.
* Return the width (length in characters) of the string without color codes if enabled.
*
* @param string $string the string to measure
* @param string $string The string to measure.
* @param bool $pre_colorized Optional. Set if the string is pre-colorized. Default false.
* @param string|bool $encoding Optional. The encoding of the string. Default false.
* @return int
*/
static public function width($string) {
$md5 = md5($string);
if (isset(self::$_string_cache[$md5]['decolorized'])) {
$test_string = self::$_string_cache[$md5]['decolorized'];
} else {
$test_string = self::decolorize($string);
}

return strwidth($test_string);
static public function width( $string, $pre_colorized = false, $encoding = false ) {
return strwidth( $pre_colorized || self::shouldColorize() ? self::decolorize( $string, $pre_colorized ? 1 /*keep_tokens*/ : 0 ) : $string, $encoding );
}

/**
* Pad the string to a certain display length.
*
* @param string $string the string to pad
* @param integer $length the display length
* @param string $string The string to pad.
* @param int $length The display length.
* @param bool $pre_colorized Optional. Set if the string is pre-colorized. Default false.
* @param string|bool $encoding Optional. The encoding of the string. Default false.
* @return string
*/
static public function pad($string, $length) {
return safe_str_pad( $string, $length );
static public function pad( $string, $length, $pre_colorized = false, $encoding = false ) {
$real_length = self::width( $string, $pre_colorized, $encoding );
$diff = strlen( $string ) - $real_length;
$length += $diff;

return str_pad( $string, $length );
}

/**
Expand Down
28 changes: 27 additions & 1 deletion lib/cli/Table.php
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ public function setRenderer(Renderer $renderer) {
*/
protected function checkRow(array $row) {
foreach ($row as $column => $str) {
$width = Colors::shouldColorize() ? Colors::width($str) : strwidth($str);
$width = Colors::width( $str, $this->isAsciiPreColorized( $column ) );
if (!isset($this->_width[$column]) || $width > $this->_width[$column]) {
$this->_width[$column] = $width;
}
Expand Down Expand Up @@ -228,4 +228,30 @@ public function setRows(array $rows) {
public function countRows() {
return count($this->_rows);
}

/**
* Set whether items in an Ascii table are pre-colorized.
*
* @param bool|array $precolorized A boolean to set all columns in the table as pre-colorized, or an array of booleans keyed by column index (number) to set individual columns as pre-colorized.
* @see cli\Ascii::setPreColorized()
*/
public function setAsciiPreColorized( $pre_colorized ) {
if ( $this->_renderer instanceof Ascii ) {
$this->_renderer->setPreColorized( $pre_colorized );
}
}

/**
* Is a column in an Ascii table pre-colorized?
*
* @param int $column Column index to check.
* @return bool True if whole Ascii table is marked as pre-colorized, or if the individual column is pre-colorized; else false.
* @see cli\Ascii::isPreColorized()
*/
private function isAsciiPreColorized( $column ) {
if ( $this->_renderer instanceof Ascii ) {
return $this->_renderer->isPreColorized( $column );
}
return false;
}
}
117 changes: 86 additions & 31 deletions lib/cli/cli.php
Original file line number Diff line number Diff line change
Expand Up @@ -156,15 +156,19 @@ function menu( $items, $default = null, $title = 'Choose an item' ) {
* Attempts an encoding-safe way of getting string length. If mb_string extensions aren't
* installed, falls back to basic strlen if no encoding is present
*
* @param string The string to check
* @return int Numeric value that represents the string's length
* @param string $str The string to check.
* @param string|bool $encoding Optional. The encoding of the string. Default false.
* @return int Numeric value that represents the string's length
*/
function safe_strlen( $str ) {
if ( function_exists( 'mb_strlen' ) && function_exists( 'mb_detect_encoding' ) ) {
$length = mb_strlen( $str, mb_detect_encoding( $str ) );
function safe_strlen( $str, $encoding = false ) {
if ( function_exists( 'mb_strlen' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
if ( ! $encoding ) {
$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
}
$length = mb_strlen( $str, $encoding );
} else {
// iconv will return PHP notice if non-ascii characters are present in input string
$str = iconv( 'ASCII' , 'ASCII', $str );
$str = iconv( $encoding ? $encoding : 'ASCII', 'ASCII', $str );

$length = strlen( $str );
}
Expand All @@ -176,17 +180,43 @@ function safe_strlen( $str ) {
* Attempts an encoding-safe way of getting a substring. If mb_string extensions aren't
* installed, falls back to ascii substring if no encoding is present
*
* @param string $str The input string
* @param int $start The starting position of the substring
* @param boolean $length Maximum length of the substring
* @return string Substring of string specified by start and length parameters
*/
function safe_substr( $str, $start, $length = false ) {
if ( function_exists( 'mb_substr' ) && function_exists( 'mb_detect_encoding' ) ) {
$substr = mb_substr( $str, $start, $length, mb_detect_encoding( $str ) );
* @param string $str The input string.
* @param int $start The starting position of the substring.
* @param int|bool|null $length Optional. Maximum length of the substring. Default false.
* @param int|bool $is_width Optional. If set and encoding is UTF-8, $length is interpreted as spacing width. Default false.
* @param string|bool $encoding Optional. The encoding of the string. Default false.
* @return string Substring of string specified by start and length parameters
*/
function safe_substr( $str, $start, $length = false, $is_width = false, $encoding = false ) {
// PHP 5.3 substr takes false as full length, PHP > 5.3 takes null - for compat. do `safe_strlen()`.
if ( null === $length || false === $length ) {
$length = safe_strlen( $str, $encoding );
}
if ( function_exists( 'mb_substr' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
if ( ! $encoding ) {
$encoding = mb_detect_encoding( $str, null, true /*strict*/ );
}
$substr = mb_substr( $str, $start, $length, $encoding );

if ( $is_width && 'UTF-8' === $encoding ) {
// Set the East Asian Width regex.
$eaw_regex = get_unicode_regexs( 'eaw' );
// If there's any East Asian double-width chars...
if ( preg_match( $eaw_regex, $substr ) ) {
// Explode string into an array of UTF-8 chars. Based on core `_mb_substr()` in "wp-includes/compat.php".
$chars = preg_split( '/([\x00-\x7f\xc2-\xf4][^\x00-\x7f\xc2-\xf4]*)/', $substr, $length + 1, PREG_SPLIT_DELIM_CAPTURE | PREG_SPLIT_NO_EMPTY );
$cnt = min( count( $chars ), $length );
$width = $length;

for ( $length = 0; $length < $cnt && $width > 0; $length++ ) {
$width -= preg_match( $eaw_regex, $chars[ $length ] ) ? 2 : 1;
}
return join( '', array_slice( $chars, 0, $length ) );
}
}
} else {
// iconv will return PHP notice if non-ascii characters are present in input string
$str = iconv( 'ASCII' , 'ASCII', $str );
$str = iconv( $encoding ? $encoding : 'ASCII', 'ASCII', $str );

$substr = substr( $str, $start, $length );
}
Expand All @@ -197,13 +227,13 @@ function safe_substr( $str, $start, $length = false ) {
/**
* An encoding-safe way of padding string length for display
*
* @param string $string The string to pad
* @param int $length The length to pad it to
* @param string $string The string to pad.
* @param int $length The length to pad it to.
* @param string|bool $encoding Optional. The encoding of the string. Default false.
* @return string
*/
function safe_str_pad( $string, $length ) {
$cleaned_string = Colors::shouldColorize() ? Colors::decolorize( $string ) : $string;
$real_length = strwidth( $cleaned_string );
function safe_str_pad( $string, $length, $encoding = false ) {
$real_length = strwidth( $string, $encoding );
$diff = strlen( $string ) - $real_length;
$length += $diff;

Expand All @@ -213,16 +243,13 @@ function safe_str_pad( $string, $length ) {
/**
* Get width of string, ie length in characters, taking into account multi-byte and mark characters for UTF-8, and multi-byte for non-UTF-8.
*
* @param string The string to check
* @return int The string's width.
* @param string $string The string to check.
* @param string|bool $encoding Optional. The encoding of the string. Default false.
* @return int The string's width.
*/
function strwidth( $string ) {
static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html
static $m_regex; // Mark characters regex (Unicode property "M") - mark combining "Mc", mark enclosing "Me" and mark non-spacing "Mn" chars that should be ignored for spacing purposes.
if ( null === $eaw_regex ) {
// Load both regexs generated from Unicode data.
require __DIR__ . '/unicode/regex.php';
}
function strwidth( $string, $encoding = false ) {
// Set the East Asian Width and Mark regexs.
list( $eaw_regex, $m_regex ) = get_unicode_regexs();

// Allow for selective testings - "1" bit set tests grapheme_strlen(), "2" preg_match_all( '/\X/u' ), "4" mb_strwidth(), "other" safe_strlen().
$test_strwidth = getenv( 'PHP_CLI_TOOLS_TEST_STRWIDTH' );
Expand All @@ -239,8 +266,10 @@ function strwidth( $string ) {
return $width + preg_match_all( $eaw_regex, $string, $dummy /*needed for PHP 5.3*/ );
}
}
if ( function_exists( 'mb_strwidth' ) && function_exists( 'mb_detect_encoding' ) ) {
$encoding = mb_detect_encoding( $string, null, true /*strict*/ );
if ( function_exists( 'mb_strwidth' ) && ( $encoding || function_exists( 'mb_detect_encoding' ) ) ) {
if ( ! $encoding ) {
$encoding = mb_detect_encoding( $string, null, true /*strict*/ );
}
$width = mb_strwidth( $string, $encoding );
if ( 'UTF-8' === $encoding ) {
// Subtract combining characters.
Expand All @@ -252,3 +281,29 @@ function strwidth( $string ) {
}
return safe_strlen( $string );
}

/**
* Get the regexs generated from Unicode data.
*
* @param string $idx Optional. Return a specific regex only. Default null.
* @return array|string Returns keyed array if not given $idx or $idx doesn't exist, otherwise the specific regex string.
*/
function get_unicode_regexs( $idx = null ) {
static $eaw_regex; // East Asian Width regex. Characters that count as 2 characters as they're "wide" or "fullwidth". See http://www.unicode.org/reports/tr11/tr11-19.html
static $m_regex; // Mark characters regex (Unicode property "M") - mark combining "Mc", mark enclosing "Me" and mark non-spacing "Mn" chars that should be ignored for spacing purposes.
if ( null === $eaw_regex ) {
// Load both regexs generated from Unicode data.
require __DIR__ . '/unicode/regex.php';
}

if ( null !== $idx ) {
if ( 'eaw' === $idx ) {
return $eaw_regex;
}
if ( 'm' === $idx ) {
return $m_regex;
}
}

return array( $eaw_regex, $m_regex, );
}
Loading

0 comments on commit 2d2b582

Please sign in to comment.