mirror of
https://github.com/smogon/pokemon-showdown-client.git
synced 2026-03-21 17:50:29 -05:00
parent
f0d9f53ded
commit
2ef97f83f1
|
|
@ -11,7 +11,7 @@ use Wikimedia\CSS\Util;
|
|||
|
||||
/**
|
||||
* Matcher that matches one out of a set of Matchers ("|" combiner).
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#comb-one
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#comb-one
|
||||
*/
|
||||
class Alternative extends Matcher {
|
||||
/** @var Matcher[] */
|
||||
|
|
@ -25,6 +25,7 @@ class Alternative extends Matcher {
|
|||
$this->matchers = $matchers;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$used = [];
|
||||
foreach ( $this->matchers as $matcher ) {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,8 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use UnexpectedValueException;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\CSSFunction;
|
||||
use Wikimedia\CSS\Objects\SimpleBlock;
|
||||
|
|
@ -15,7 +17,7 @@ use Wikimedia\CSS\Objects\Token;
|
|||
* Matcher that matches anything except bad strings, bad urls, and unmatched
|
||||
* left-paren, left-brace, or left-bracket.
|
||||
* @warning Be very careful using this!
|
||||
* @see https://drafts.csswg.org/css-syntax/#any-value for where this roughly comes from.
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#any-value
|
||||
*/
|
||||
class AnythingMatcher extends Matcher {
|
||||
|
||||
|
|
@ -42,9 +44,9 @@ class AnythingMatcher extends Matcher {
|
|||
*/
|
||||
public function __construct( array $options = [] ) {
|
||||
$this->toplevel = !empty( $options['toplevel'] );
|
||||
$this->quantifier = isset( $options['quantifier'] ) ? $options['quantifier'] : '';
|
||||
$this->quantifier = $options['quantifier'] ?? '';
|
||||
if ( !in_array( $this->quantifier, [ '', '+', '*' ], true ) ) {
|
||||
throw new \InvalidArgumentException( 'Invalid quantifier' );
|
||||
throw new InvalidArgumentException( 'Invalid quantifier' );
|
||||
}
|
||||
|
||||
$recurse = !$this->toplevel && $this->quantifier === '*'
|
||||
|
|
@ -55,12 +57,13 @@ class AnythingMatcher extends Matcher {
|
|||
}
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$origStart = $start;
|
||||
$lastMatch = $this->quantifier === '*' ? $this->makeMatch( $values, $start, $start ) : null;
|
||||
do {
|
||||
$newMatch = null;
|
||||
$cv = isset( $values[$start] ) ? $values[$start] : null;
|
||||
$cv = $values[$start] ?? null;
|
||||
if ( $cv instanceof Token ) {
|
||||
switch ( $cv->type() ) {
|
||||
case Token::T_BAD_STRING:
|
||||
|
|
@ -92,7 +95,7 @@ class AnythingMatcher extends Matcher {
|
|||
// If we encounter whitespace, assume it's significant.
|
||||
$newMatch = $this->makeMatch(
|
||||
$values, $origStart, $this->next( $values, $start, $options ),
|
||||
new Match( $values, $start, 1, 'significantWhitespace' ),
|
||||
new GrammarMatch( $values, $start, 1, 'significantWhitespace' ),
|
||||
[ [ $lastMatch ] ]
|
||||
);
|
||||
break;
|
||||
|
|
@ -103,7 +106,7 @@ class AnythingMatcher extends Matcher {
|
|||
case Token::T_LEFT_BRACKET:
|
||||
// Should never happen
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new \UnexpectedValueException( "How did a \"{$cv->type()}\" token get here?" );
|
||||
throw new UnexpectedValueException( "How did a \"{$cv->type()}\" token get here?" );
|
||||
// @codeCoverageIgnoreEnd
|
||||
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\SimpleBlock;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
|
||||
/**
|
||||
* Matcher that matches a SimpleBlock
|
||||
|
|
@ -34,7 +34,7 @@ class BlockMatcher extends Matcher {
|
|||
*/
|
||||
public function __construct( $blockType, Matcher $matcher ) {
|
||||
if ( SimpleBlock::matchingDelimiter( $blockType ) === null ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
'A block is delimited by either {}, [], or ().'
|
||||
);
|
||||
}
|
||||
|
|
@ -42,8 +42,9 @@ class BlockMatcher extends Matcher {
|
|||
$this->matcher = $matcher;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$cv = isset( $values[$start] ) ? $values[$start] : null;
|
||||
$cv = $values[$start] ?? null;
|
||||
if ( $cv instanceof SimpleBlock && $cv->getStartTokenType() === $this->blockType ) {
|
||||
// To successfully match, our sub-Matcher needs to match the whole
|
||||
// content of the block.
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ use Wikimedia\CSS\Objects\ComponentValueList;
|
|||
*/
|
||||
class CheckedMatcher extends Matcher {
|
||||
/** @var Matcher */
|
||||
private $matcher = null;
|
||||
private $matcher;
|
||||
|
||||
/** @var callable */
|
||||
protected $check;
|
||||
|
|
@ -21,13 +21,14 @@ class CheckedMatcher extends Matcher {
|
|||
/**
|
||||
* @param Matcher $matcher Base matcher
|
||||
* @param callable $check Function to check the match is really valid.
|
||||
* Prototype is bool func( ComponentValueList $values, Match $match, array $options )
|
||||
* Prototype is bool func( ComponentValueList $values, GrammarMatch $match, array $options )
|
||||
*/
|
||||
public function __construct( Matcher $matcher, callable $check ) {
|
||||
$this->matcher = $matcher;
|
||||
$this->check = $check;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
foreach ( $this->matcher->generateMatches( $values, $start, $options ) as $match ) {
|
||||
if ( call_user_func( $this->check, $values, $match, $options ) ) {
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ use Wikimedia\CSS\Objects\Token;
|
|||
* other types (case-sensitively) too. For the more common case-insensitive
|
||||
* identifier matching, use KeywordMatcher.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-types
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-types
|
||||
*/
|
||||
class DelimMatcher extends Matcher {
|
||||
/** @var string One of the Token::T_* constants */
|
||||
|
|
@ -39,8 +39,9 @@ class DelimMatcher extends Matcher {
|
|||
$this->type = $options['type'];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$cv = isset( $values[$start] ) ? $values[$start] : null;
|
||||
$cv = $values[$start] ?? null;
|
||||
if ( $cv instanceof Token && $cv->type() === $this->type &&
|
||||
in_array( $cv->value(), $this->values, true )
|
||||
) {
|
||||
|
|
|
|||
|
|
@ -6,9 +6,10 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use Closure;
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\CSSFunction;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
|
||||
/**
|
||||
* Matcher that matches a CSSFunction
|
||||
|
|
@ -29,25 +30,26 @@ class FunctionMatcher extends Matcher {
|
|||
protected $matcher;
|
||||
|
||||
/**
|
||||
* @param string|callable|null $name Function name, case-insensitive, or a
|
||||
* @param string|Closure|null $name Function name, case-insensitive, or a
|
||||
* function to check the name.
|
||||
* @param Matcher $matcher Matcher for the contents of the function
|
||||
*/
|
||||
public function __construct( $name, Matcher $matcher ) {
|
||||
if ( is_string( $name ) ) {
|
||||
$this->nameCheck = function ( $s ) use ( $name ) {
|
||||
$this->nameCheck = static function ( $s ) use ( $name ) {
|
||||
return !strcasecmp( $s, $name );
|
||||
};
|
||||
} elseif ( is_callable( $name ) || $name === null ) {
|
||||
$this->nameCheck = $name;
|
||||
} else {
|
||||
throw new \InvalidArgumentException( '$name must be a string, callable, or null' );
|
||||
throw new InvalidArgumentException( '$name must be a string, callable, or null' );
|
||||
}
|
||||
$this->matcher = $matcher;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$cv = isset( $values[$start] ) ? $values[$start] : null;
|
||||
$cv = $values[$start] ?? null;
|
||||
if ( $cv instanceof CSSFunction &&
|
||||
( !$this->nameCheck || call_user_func( $this->nameCheck, $cv->getName() ) )
|
||||
) {
|
||||
|
|
|
|||
|
|
@ -8,18 +8,19 @@ namespace Wikimedia\CSS\Grammar;
|
|||
|
||||
use Wikimedia\CSS\Objects\ComponentValue;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\CSSFunction;
|
||||
use Wikimedia\CSS\Objects\SimpleBlock;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
* Represent a match from a Matcher.
|
||||
*/
|
||||
class Match {
|
||||
class GrammarMatch {
|
||||
|
||||
/** @var int */
|
||||
protected $start, $length;
|
||||
protected $start;
|
||||
|
||||
/** @var int */
|
||||
protected $length;
|
||||
|
||||
/** @var ComponentValue[] Matched ComponentValues */
|
||||
protected $values;
|
||||
|
|
@ -27,7 +28,7 @@ class Match {
|
|||
/** @var string|null */
|
||||
protected $name = null;
|
||||
|
||||
/** @var Match[] Captured submatches */
|
||||
/** @var GrammarMatch[] Captured submatches */
|
||||
protected $capturedMatches = [];
|
||||
|
||||
/**
|
||||
|
|
@ -35,12 +36,12 @@ class Match {
|
|||
* @param int $start Starting index of the match.
|
||||
* @param int $length Number of tokens in the match.
|
||||
* @param string|null $name Give a name to this match.
|
||||
* @param Match[] $capturedMatches Captured submatches of this match.
|
||||
* @param GrammarMatch[] $capturedMatches Captured submatches of this match.
|
||||
*/
|
||||
public function __construct(
|
||||
ComponentValueList $list, $start, $length, $name = null, array $capturedMatches = []
|
||||
) {
|
||||
Util::assertAllInstanceOf( $capturedMatches, Match::class, '$capturedMatches' );
|
||||
Util::assertAllInstanceOf( $capturedMatches, self::class, '$capturedMatches' );
|
||||
|
||||
$this->values = $list->slice( $start, $length );
|
||||
$this->start = $start;
|
||||
|
|
@ -95,21 +96,21 @@ class Match {
|
|||
* This returns the matches from capturing submatchers (see
|
||||
* Matcher::capture()) that matched during the matching of the top-level
|
||||
* matcher that returned this match. If capturing submatchers were nested,
|
||||
* the Match objects returned here will themselves have captured submatches to
|
||||
* return.
|
||||
* the GrammarMatch objects returned here will themselves have captured sub-
|
||||
* matches to return.
|
||||
*
|
||||
* To borrow PCRE regular expression syntax, if the "pattern" described by
|
||||
* the Matchers resembled `www(?<A>xxx(?<B>yyy)xxx)(?<C>zzz)*` then the
|
||||
* top-level Match's getCapturedMatches() would return a Match named "A"
|
||||
* (containing the "xxxyyyxxx" bit) and zero or more matches named "C" (for
|
||||
* each "zzz"), and that "A" Match's getCapturedMatches() would return a Match
|
||||
* named "B" (containing just the "yyy").
|
||||
* top-level GrammarMatch's getCapturedMatches() would return a GrammarMatch
|
||||
* named "A" (containing the "xxxyyyxxx" bit) and zero or more matches named
|
||||
* "C" (for each "zzz"), and that "A" GrammarMatch's getCapturedMatches()
|
||||
* would return a GrammarMatch named "B" (containing just the "yyy").
|
||||
*
|
||||
* Note that the start and end positions reported by captured matches may be
|
||||
* relative to a containing SimpleBlock or CSSFunction's value rather than
|
||||
* to the ComponentValueList passed to the top-level Matcher.
|
||||
*
|
||||
* @return Match[]
|
||||
* @return GrammarMatch[]
|
||||
*/
|
||||
public function getCapturedMatches() {
|
||||
return $this->capturedMatches;
|
||||
|
|
@ -124,7 +125,7 @@ class Match {
|
|||
foreach ( $this->capturedMatches as $m ) {
|
||||
$data[] = $m->getUniqueId();
|
||||
}
|
||||
return md5( join( "\n", $data ) );
|
||||
return md5( implode( "\n", $data ) );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -143,4 +144,8 @@ class Match {
|
|||
$m->fixWhitespace( $old, $new );
|
||||
}
|
||||
}
|
||||
|
||||
public function __toString() {
|
||||
return Util::stringify( $this->getValues() );
|
||||
}
|
||||
}
|
||||
|
|
@ -6,14 +6,15 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use Iterator;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
* Matcher that groups other matchers (juxtaposition)
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-combinators
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#comb-comma
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-combinators
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#comb-comma
|
||||
*/
|
||||
class Juxtaposition extends Matcher {
|
||||
/** @var Matcher[] */
|
||||
|
|
@ -32,25 +33,25 @@ class Juxtaposition extends Matcher {
|
|||
$this->commas = (bool)$commas;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$used = [];
|
||||
|
||||
// Match each of our matchers in turn, pushing each one onto a stack as
|
||||
// we process it and popping a match once its exhausted.
|
||||
// we process it and popping a match once it's exhausted.
|
||||
$stack = [
|
||||
[
|
||||
new Match( $values, $start, 0 ),
|
||||
new GrammarMatch( $values, $start, 0 ),
|
||||
$start,
|
||||
$this->matchers[0]->generateMatches( $values, $start, $options ),
|
||||
false
|
||||
]
|
||||
];
|
||||
do {
|
||||
/** @var $lastMatch Match */
|
||||
/** @var $lastEnd int */
|
||||
/** @var $iter \Iterator<Match> */
|
||||
/** @var $iter Iterator<GrammarMatch> */
|
||||
/** @var $needEmpty bool */
|
||||
list( $lastMatch, $lastEnd, $iter, $needEmpty ) = $stack[count( $stack ) - 1];
|
||||
[ , $lastEnd, $iter, $needEmpty ] = $stack[count( $stack ) - 1];
|
||||
|
||||
// If the top of the stack has no more matches, pop it and loop.
|
||||
if ( !$iter->valid() ) {
|
||||
|
|
@ -72,30 +73,29 @@ class Juxtaposition extends Matcher {
|
|||
$thisEnd = $nextFrom = $match->getNext();
|
||||
|
||||
// Dealing with commas is a bit tricky. There are three cases:
|
||||
// 1. If the current match is empty, don't look for a following
|
||||
// comma now and reset $thisEnd to $lastEnd.
|
||||
// 2. If there is a comma following, update $nextFrom to be after
|
||||
// the comma.
|
||||
// 3. If there's no comma following, every subsequent Matcher must
|
||||
// be empty in order for the group as a whole to match, so set
|
||||
// the flag.
|
||||
// 1. If the current match is empty, don't look for a following
|
||||
// comma now and reset $thisEnd to $lastEnd.
|
||||
// 2. If there is a comma following, update $nextFrom to be after
|
||||
// the comma.
|
||||
// 3. If there's no comma following, every subsequent Matcher must
|
||||
// be empty in order for the group as a whole to match, so set
|
||||
// the flag.
|
||||
// Unlike '#', this doesn't specify skipping whitespace around the
|
||||
// commas if the production isn't already skipping whitespace.
|
||||
if ( $this->commas ) {
|
||||
if ( $match->getLength() === 0 ) {
|
||||
$thisEnd = $lastEnd;
|
||||
} elseif ( isset( $values[$nextFrom] ) && $values[$nextFrom] instanceof Token &&
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
$values[$nextFrom]->type() === Token::T_COMMA
|
||||
) {
|
||||
$nextFrom = $this->next( $values, $nextFrom, $options );
|
||||
} else {
|
||||
if ( isset( $values[$nextFrom] ) && $values[$nextFrom] instanceof Token &&
|
||||
$values[$nextFrom]->type() === Token::T_COMMA
|
||||
) {
|
||||
$nextFrom = $this->next( $values, $nextFrom, $options );
|
||||
} else {
|
||||
$needEmpty = true;
|
||||
}
|
||||
$needEmpty = true;
|
||||
}
|
||||
}
|
||||
|
||||
// If we ran out of Matchers, yield the final position. Otherwise
|
||||
// If we ran out of Matchers, yield the final position. Otherwise,
|
||||
// push the next matcher onto the stack.
|
||||
if ( count( $stack ) >= count( $this->matchers ) ) {
|
||||
$newMatch = $this->makeMatch( $values, $start, $thisEnd, $match, $stack );
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ use Wikimedia\CSS\Objects\Token;
|
|||
* other types (case-insensitively) too. For delimiter (or case-sensitive)
|
||||
* matching, use DelimMatcher.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-types
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-types
|
||||
*/
|
||||
class KeywordMatcher extends Matcher {
|
||||
/** @var string One of the Token::T_* constants */
|
||||
|
|
@ -39,8 +39,9 @@ class KeywordMatcher extends Matcher {
|
|||
$this->type = $options['type'];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$cv = isset( $values[$start] ) ? $values[$start] : null;
|
||||
$cv = $values[$start] ?? null;
|
||||
if ( $cv instanceof Token && $cv->type() === $this->type &&
|
||||
isset( $this->values[strtolower( $cv->value() )] )
|
||||
) {
|
||||
|
|
|
|||
|
|
@ -6,10 +6,11 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use Iterator;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
use Wikimedia\CSS\Objects\SimpleBlock;
|
||||
use Wikimedia\CSS\Objects\CSSFunction;
|
||||
use Wikimedia\CSS\Objects\SimpleBlock;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
|
||||
/**
|
||||
* Base class for grammar matchers.
|
||||
|
|
@ -20,16 +21,16 @@ use Wikimedia\CSS\Objects\CSSFunction;
|
|||
* object that will determine whether a ComponentValueList actually matches
|
||||
* this grammar.
|
||||
*
|
||||
* [SYN3]: https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/
|
||||
* [VAL3]: https://www.w3.org/TR/2016/CR-css-values-3-20160929/
|
||||
* [SYN3]: https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/
|
||||
* [VAL3]: https://www.w3.org/TR/2019/CR-css-values-3-20190606/
|
||||
*/
|
||||
abstract class Matcher {
|
||||
|
||||
/** @var string|null Name to set on Match objects */
|
||||
/** @var string|null Name to set on GrammarMatch objects */
|
||||
protected $captureName = null;
|
||||
|
||||
/**
|
||||
* @var array Default options for self::match()
|
||||
* @var array Default options for self::matchAgainst()
|
||||
* - skip-whitespace: (bool) Allow whitespace in between any two tokens
|
||||
* - nonterminal: (bool) Don't require the whole of $values is matched
|
||||
* - mark-significance: (bool) On a successful match, replace T_WHITESPACE
|
||||
|
|
@ -43,49 +44,29 @@ abstract class Matcher {
|
|||
|
||||
/**
|
||||
* Create an instance.
|
||||
* @param mixed ... See static::__construct()
|
||||
* @param mixed ...$args See static::__construct()
|
||||
* @return static
|
||||
*/
|
||||
public static function create() {
|
||||
// @todo Once we drop support for PHP 5.5, just do this:
|
||||
// public static function create( ...$args ) {
|
||||
// return new static( ...$args );
|
||||
// }
|
||||
|
||||
$args = func_get_args();
|
||||
switch ( count( $args ) ) {
|
||||
case 0:
|
||||
return new static();
|
||||
case 1:
|
||||
return new static( $args[0] );
|
||||
case 2:
|
||||
return new static( $args[0], $args[1] );
|
||||
case 3:
|
||||
return new static( $args[0], $args[1], $args[2] );
|
||||
case 4:
|
||||
return new static( $args[0], $args[1], $args[2], $args[3] );
|
||||
default:
|
||||
// Slow, but all the existing Matchers have a max of 4 args.
|
||||
$rc = new \ReflectionClass( static::class );
|
||||
return $rc->newInstanceArgs( $args );
|
||||
}
|
||||
public static function create( ...$args ) {
|
||||
// @phan-suppress-next-line PhanParamTooManyUnpack,PhanTypeInstantiateAbstractStatic
|
||||
return new static( ...$args );
|
||||
}
|
||||
|
||||
/**
|
||||
* Return a copy of this matcher that will capture its matches
|
||||
*
|
||||
* A "capturing" Matcher will produce Matches that return a value from the
|
||||
* Match::getName() method. The Match::getCapturedMatches() method may be
|
||||
* used to retrieve them from the top-level Match.
|
||||
* A "capturing" Matcher will produce GrammarMatches that return a value from
|
||||
* the GrammarMatch::getName() method. The GrammarMatch::getCapturedMatches()
|
||||
* method may be used to retrieve them from the top-level GrammarMatch.
|
||||
*
|
||||
* The concept is similar to capturing groups in PCRE and other regex
|
||||
* languages.
|
||||
*
|
||||
* @param string|null $captureName Name to apply to captured Match objects
|
||||
* @param string|null $captureName Name to apply to captured GrammarMatch objects
|
||||
* @return static
|
||||
*/
|
||||
public function capture( $captureName ) {
|
||||
$ret = clone( $this );
|
||||
$ret = clone $this;
|
||||
$ret->captureName = $captureName;
|
||||
return $ret;
|
||||
}
|
||||
|
|
@ -94,14 +75,14 @@ abstract class Matcher {
|
|||
* Match against a list of ComponentValues
|
||||
* @param ComponentValueList $values
|
||||
* @param array $options Matching options, see self::$defaultOptions
|
||||
* @return Match|null
|
||||
* @return GrammarMatch|null
|
||||
*/
|
||||
public function match( ComponentValueList $values, array $options = [] ) {
|
||||
public function matchAgainst( ComponentValueList $values, array $options = [] ) {
|
||||
$options += $this->getDefaultOptions();
|
||||
$start = $this->next( $values, -1, $options );
|
||||
$l = count( $values );
|
||||
foreach ( $this->generateMatches( $values, $start, $options ) as $match ) {
|
||||
if ( $match->getNext() === $l || $options['nonterminal'] ) {
|
||||
if ( $options['nonterminal'] || $match->getNext() === $l ) {
|
||||
if ( $options['mark-significance'] ) {
|
||||
$significantWS = self::collectSignificantWhitespace( $match );
|
||||
self::markSignificantWhitespace( $values, $match, $significantWS, $match->getNext() );
|
||||
|
|
@ -114,11 +95,11 @@ abstract class Matcher {
|
|||
|
||||
/**
|
||||
* Collect any 'significantWhitespace' matches
|
||||
* @param Match $match
|
||||
* @param Token[]|null &$ret
|
||||
* @param GrammarMatch $match
|
||||
* @param Token[] &$ret
|
||||
* @return Token[]
|
||||
*/
|
||||
private static function collectSignificantWhitespace( Match $match, &$ret = [] ) {
|
||||
private static function collectSignificantWhitespace( GrammarMatch $match, &$ret = [] ) {
|
||||
if ( $match->getName() === 'significantWhitespace' ) {
|
||||
$ret = array_merge( $ret, $match->getValues() );
|
||||
}
|
||||
|
|
@ -131,7 +112,7 @@ abstract class Matcher {
|
|||
/**
|
||||
* Mark whitespace as significant or not
|
||||
* @param ComponentValueList $list
|
||||
* @param Match $match
|
||||
* @param GrammarMatch $match
|
||||
* @param Token[] $significantWS
|
||||
* @param int $end
|
||||
*/
|
||||
|
|
@ -141,8 +122,9 @@ abstract class Matcher {
|
|||
if ( $cv instanceof Token && $cv->type() === Token::T_WHITESPACE ) {
|
||||
$significant = in_array( $cv, $significantWS, true );
|
||||
if ( $significant !== $cv->significant() ) {
|
||||
$list[$i] = $cv->copyWithSignificance( $significant );
|
||||
$match->fixWhitespace( $cv, $list[$i] );
|
||||
$newCv = $cv->copyWithSignificance( $significant );
|
||||
$match->fixWhitespace( $cv, $newCv );
|
||||
$list[$i] = $newCv;
|
||||
}
|
||||
} elseif ( $cv instanceof CSSFunction || $cv instanceof SimpleBlock ) {
|
||||
self::markSignificantWhitespace(
|
||||
|
|
@ -186,27 +168,28 @@ abstract class Matcher {
|
|||
do {
|
||||
$i++;
|
||||
} while ( $skipWS && $i < $l &&
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
$values[$i] instanceof Token && $values[$i]->type() === Token::T_WHITESPACE
|
||||
);
|
||||
return $i;
|
||||
}
|
||||
|
||||
/**
|
||||
* Create a Match
|
||||
* Create a GrammarMatch
|
||||
* @param ComponentValueList $list
|
||||
* @param int $start
|
||||
* @param int $end First position after the match
|
||||
* @param Match|null $submatch Submatch, for capturing. If $submatch itself
|
||||
* named it will be kept as a capture in the returned Match, otherwise its
|
||||
* captured matches (if any) as returned by getCapturedMatches() will be
|
||||
* kept as captures in the returned Match.
|
||||
* @param GrammarMatch|null $submatch Sub-match, for capturing. If $submatch
|
||||
* itself named it will be kept as a capture in the returned GrammarMatch,
|
||||
* otherwise its captured matches (if any) as returned by getCapturedMatches()
|
||||
* will be kept as captures in the returned GrammarMatch.
|
||||
* @param array $stack Stack from which to fetch more submatches for
|
||||
* capturing (see $submatch). The stack is expected to be an array of
|
||||
* arrays, with the first element of each subarray being a Match.
|
||||
* @return Match
|
||||
* arrays, with the first element of each subarray being a GrammarMatch.
|
||||
* @return GrammarMatch
|
||||
*/
|
||||
protected function makeMatch(
|
||||
ComponentValueList $list, $start, $end, Match $submatch = null, array $stack = []
|
||||
ComponentValueList $list, $start, $end, GrammarMatch $submatch = null, array $stack = []
|
||||
) {
|
||||
$matches = array_column( $stack, 0 );
|
||||
$matches[] = $submatch;
|
||||
|
|
@ -214,7 +197,7 @@ abstract class Matcher {
|
|||
$keptMatches = [];
|
||||
while ( $matches ) {
|
||||
$m = array_shift( $matches );
|
||||
if ( !$m instanceof Match ) {
|
||||
if ( !$m instanceof GrammarMatch ) {
|
||||
// skip it, probably null
|
||||
} elseif ( $m->getName() !== null ) {
|
||||
$keptMatches[] = $m;
|
||||
|
|
@ -223,7 +206,7 @@ abstract class Matcher {
|
|||
}
|
||||
}
|
||||
|
||||
return new Match( $list, $start, $end - $start, $this->captureName, $keptMatches );
|
||||
return new GrammarMatch( $list, $start, $end - $start, $this->captureName, $keptMatches );
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -231,18 +214,18 @@ abstract class Matcher {
|
|||
*
|
||||
* The job of a Matcher is to determine all the ways its particular grammar
|
||||
* fragment can consume ComponentValues starting at a particular location
|
||||
* in the ComponentValueList, represented by returning Match objects. For
|
||||
* example, a matcher implementing `IDENT*` at a starting position where
|
||||
* in the ComponentValueList, represented by returning GrammarMatch objects.
|
||||
* For example, a matcher implementing `IDENT*` at a starting position where
|
||||
* there are three IDENT tokens in a row would be able to match 0, 1, 2, or
|
||||
* all 3 of those IDENT tokens, and therefore should return an iterator
|
||||
* over that set of Match objects.
|
||||
* over that set of GrammarMatch objects.
|
||||
*
|
||||
* Some matchers take other matchers as input, for example `IDENT*` is
|
||||
* probably going to be implemented as a matcher for `*` that repeatedly
|
||||
* applies a matcher for `IDENT`. The `*` matcher would call the `IDENT`
|
||||
* matcher's generateMatches() method directly.
|
||||
*
|
||||
* Most Matchers implement this method as a generator so as to not build up
|
||||
* Most Matchers implement this method as a generator to not build up
|
||||
* the full set of results when it's reasonably likely the caller is going
|
||||
* to terminate early.
|
||||
*
|
||||
|
|
@ -250,8 +233,8 @@ abstract class Matcher {
|
|||
* @param int $start Starting position in $values
|
||||
* @param array $options See self::$defaultOptions.
|
||||
* Always use the options passed in, don't use $this->defaultOptions yourself.
|
||||
* @return \Iterator<Match> Iterates over the set of Match objects
|
||||
* defining all the ways this matcher can match.
|
||||
* @return Iterator<GrammarMatch> Iterates over the set of GrammarMatch
|
||||
* objects defining all the ways this matcher can match.
|
||||
*/
|
||||
abstract protected function generateMatches( ComponentValueList $values, $start, array $options );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,10 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
use Wikimedia\CSS\Parser\Parser;
|
||||
use Wikimedia\CSS\Sanitizer\PropertySanitizer;
|
||||
|
||||
/**
|
||||
* Factory for predefined Grammar matchers
|
||||
|
|
@ -16,12 +19,14 @@ class MatcherFactory {
|
|||
/** @var MatcherFactory|null */
|
||||
private static $instance = null;
|
||||
|
||||
/** @var Matcher[] Cache of constructed matchers */
|
||||
/** @var (Matcher|Matcher[])[] Cache of constructed matchers */
|
||||
protected $cache = [];
|
||||
|
||||
/** @var string[] length units */
|
||||
protected static $lengthUnits = [ 'em', 'ex', 'ch', 'rem', 'vw', 'vh',
|
||||
'vmin', 'vmax', 'cm', 'mm', 'Q', 'in', 'pc', 'pt', 'px' ];
|
||||
protected static $lengthUnits = [
|
||||
'em', 'ex', 'ch', 'rem', 'vw', 'vh', 'vmin', 'vmax',
|
||||
'cm', 'mm', 'Q', 'in', 'pc', 'pt', 'px'
|
||||
];
|
||||
|
||||
/** @var string[] angle units */
|
||||
protected static $angleUnits = [ 'deg', 'grad', 'rad', 'turn' ];
|
||||
|
|
@ -87,9 +92,32 @@ class MatcherFactory {
|
|||
return $this->cache[__METHOD__];
|
||||
}
|
||||
|
||||
/**
|
||||
* Matcher for a <custom-ident>
|
||||
*
|
||||
* Note this doesn't implement the semantic restriction about assigning
|
||||
* meaning to various idents in a complex value, as CSS Sanitizer doesn't
|
||||
* deal with semantics on that level.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#identifier-value
|
||||
* @param string[] $exclude Additional values to exclude, all-lowercase.
|
||||
* @return Matcher
|
||||
*/
|
||||
public function customIdent( array $exclude = [] ) {
|
||||
$exclude = array_merge( [
|
||||
// https://www.w3.org/TR/2019/CR-css-values-3-20190606/#common-keywords
|
||||
'initial', 'inherit', 'unset', 'default',
|
||||
// https://www.w3.org/TR/2018/CR-css-cascade-4-20180828/#all-shorthand
|
||||
'revert'
|
||||
], $exclude );
|
||||
return new TokenMatcher( Token::T_IDENT, static function ( Token $t ) use ( $exclude ) {
|
||||
return !in_array( strtolower( $t->value() ), $exclude, true );
|
||||
} );
|
||||
}
|
||||
|
||||
/**
|
||||
* Matcher for a string
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#strings
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#strings
|
||||
* @warning If the string will be used as a URL, use self::urlstring() instead.
|
||||
* @return Matcher
|
||||
*/
|
||||
|
|
@ -112,7 +140,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a URL
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#urls
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#urls
|
||||
* @param string $type Type of resource referenced, e.g. "image" or "audio".
|
||||
* Not used here, but might be used by a subclass to validate the URL more strictly.
|
||||
* @return Matcher
|
||||
|
|
@ -126,24 +154,28 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* CSS-wide value keywords
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#common-keywords
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#common-keywords
|
||||
* @return Matcher
|
||||
*/
|
||||
public function cssWideKeywords() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$this->cache[__METHOD__] = new KeywordMatcher( [ 'initial', 'inherit', 'unset' ] );
|
||||
$this->cache[__METHOD__] = new KeywordMatcher( [
|
||||
// https://www.w3.org/TR/2019/CR-css-values-3-20190606/#common-keywords
|
||||
'initial', 'inherit', 'unset',
|
||||
// added by https://www.w3.org/TR/2018/CR-css-cascade-4-20180828/#all-shorthand
|
||||
'revert'
|
||||
] );
|
||||
}
|
||||
return $this->cache[__METHOD__];
|
||||
}
|
||||
|
||||
/**
|
||||
* Add calc() support to a basic type matcher
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#calc-notation
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#calc-notation
|
||||
* @param Matcher $typeMatcher Matcher for the type
|
||||
* @param string $type Type being matched
|
||||
* @return Matcher
|
||||
* @return Matcher[]
|
||||
*/
|
||||
public function calc( Matcher $typeMatcher, $type ) {
|
||||
protected function calcInternal( Matcher $typeMatcher, $type ) {
|
||||
if ( $type === 'integer' ) {
|
||||
$num = $this->rawInteger();
|
||||
} else {
|
||||
|
|
@ -164,13 +196,23 @@ class MatcherFactory {
|
|||
&$calcValue,
|
||||
Quantifier::star( new Juxtaposition( [ $ows, new DelimMatcher( '*' ), $ows, &$calcValue ] ) )
|
||||
] );
|
||||
} else {
|
||||
} elseif ( $typeMatcher === $this->rawNumber() ) {
|
||||
$calcProduct = new Juxtaposition( [
|
||||
&$calcValue,
|
||||
Quantifier::star( new Alternative( [
|
||||
new Juxtaposition( [ $ows, new DelimMatcher( '*' ), $ows, &$calcValue ] ),
|
||||
new Juxtaposition( [ $ows, new DelimMatcher( '/' ), $ows, $this->rawNumber() ] ),
|
||||
] ) ),
|
||||
Quantifier::star(
|
||||
new Juxtaposition( [ $ows, new DelimMatcher( [ '*', '/' ] ), $ows, &$calcValue ] )
|
||||
),
|
||||
] );
|
||||
} else {
|
||||
$calcNumValue = $this->calcInternal( $this->rawNumber(), 'number' )[1];
|
||||
$calcProduct = new Juxtaposition( [
|
||||
&$calcValue,
|
||||
Quantifier::star(
|
||||
new Alternative( [
|
||||
new Juxtaposition( [ $ows, new DelimMatcher( '*' ), $ows, &$calcValue ] ),
|
||||
new Juxtaposition( [ $ows, new DelimMatcher( '/' ), $ows, $calcNumValue, ] ),
|
||||
] )
|
||||
),
|
||||
] );
|
||||
}
|
||||
|
||||
|
|
@ -200,17 +242,31 @@ class MatcherFactory {
|
|||
] );
|
||||
}
|
||||
|
||||
return new Alternative( [ $typeMatcher, $calcFunc ] );
|
||||
return [
|
||||
new Alternative( [ $typeMatcher, $calcFunc ] ),
|
||||
$calcValue,
|
||||
];
|
||||
}
|
||||
|
||||
/**
|
||||
* Add calc() support to a basic type matcher
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#calc-notation
|
||||
* @param Matcher $typeMatcher Matcher for the type
|
||||
* @param string $type Type being matched
|
||||
* @return Matcher
|
||||
*/
|
||||
public function calc( Matcher $typeMatcher, $type ) {
|
||||
return $this->calcInternal( $typeMatcher, $type )[0];
|
||||
}
|
||||
|
||||
/**
|
||||
* Matcher for an integer value, without calc()
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#integers
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#integers
|
||||
* @return Matcher
|
||||
*/
|
||||
protected function rawInteger() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
|
||||
// The spec says it must match /^[+-]\d+$/, but the tokenizer
|
||||
// should have marked any other number token as a 'number'
|
||||
// anyway so let's not bother checking.
|
||||
|
|
@ -222,7 +278,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for an integer value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#integers
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#integers
|
||||
* @return Matcher
|
||||
*/
|
||||
public function integer() {
|
||||
|
|
@ -234,7 +290,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a real number, without calc()
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#numbers
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#numbers
|
||||
* @return Matcher
|
||||
*/
|
||||
public function rawNumber() {
|
||||
|
|
@ -246,7 +302,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a real number
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#numbers
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#numbers
|
||||
* @return Matcher
|
||||
*/
|
||||
public function number() {
|
||||
|
|
@ -258,7 +314,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a percentage value, without calc()
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#percentages
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#percentages
|
||||
* @return Matcher
|
||||
*/
|
||||
public function rawPercentage() {
|
||||
|
|
@ -270,7 +326,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a percentage value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#percentages
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#percentages
|
||||
* @return Matcher
|
||||
*/
|
||||
public function percentage() {
|
||||
|
|
@ -282,7 +338,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a length-percentage value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-length-percentage
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-length-percentage
|
||||
* @return Matcher
|
||||
*/
|
||||
public function lengthPercentage() {
|
||||
|
|
@ -297,7 +353,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a frequency-percentage value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-frequency-percentage
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-frequency-percentage
|
||||
* @return Matcher
|
||||
*/
|
||||
public function frequencyPercentage() {
|
||||
|
|
@ -311,8 +367,8 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* Matcher for a angle-percentage value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-angle-percentage
|
||||
* Matcher for an angle-percentage value
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-angle-percentage
|
||||
* @return Matcher
|
||||
*/
|
||||
public function anglePercentage() {
|
||||
|
|
@ -327,7 +383,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a time-percentage value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-time-percentage
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-time-percentage
|
||||
* @return Matcher
|
||||
*/
|
||||
public function timePercentage() {
|
||||
|
|
@ -342,7 +398,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a number-percentage value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-number-percentage
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-number-percentage
|
||||
* @return Matcher
|
||||
*/
|
||||
public function numberPercentage() {
|
||||
|
|
@ -357,7 +413,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a dimension value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#dimensions
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#dimensions
|
||||
* @return Matcher
|
||||
*/
|
||||
public function dimension() {
|
||||
|
|
@ -371,9 +427,9 @@ class MatcherFactory {
|
|||
* Matches the number 0
|
||||
* @return Matcher
|
||||
*/
|
||||
protected function zero() {
|
||||
public function zero() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
|
||||
return $t->value() === 0 || $t->value() === 0.0;
|
||||
} );
|
||||
}
|
||||
|
|
@ -382,16 +438,16 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a length value, without calc()
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#lengths
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#lengths
|
||||
* @return Matcher
|
||||
*/
|
||||
protected function rawLength() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$unitsRe = '/^(' . join( '|', self::$lengthUnits ) . ')$/i';
|
||||
$unitsRe = '/^(' . implode( '|', self::$lengthUnits ) . ')$/i';
|
||||
|
||||
$this->cache[__METHOD__] = new Alternative( [
|
||||
$this->zero(),
|
||||
new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) use ( $unitsRe ) {
|
||||
new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) use ( $unitsRe ) {
|
||||
return preg_match( $unitsRe, $t->unit() );
|
||||
} ),
|
||||
] );
|
||||
|
|
@ -401,7 +457,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a length value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#lengths
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#lengths
|
||||
* @return Matcher
|
||||
*/
|
||||
public function length() {
|
||||
|
|
@ -413,26 +469,25 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for an angle value, without calc()
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#angles
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#angles
|
||||
* @return Matcher
|
||||
*/
|
||||
protected function rawAngle() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$unitsRe = '/^(' . join( '|', self::$angleUnits ) . ')$/i';
|
||||
$unitsRe = '/^(' . implode( '|', self::$angleUnits ) . ')$/i';
|
||||
|
||||
$this->cache[__METHOD__] = new Alternative( [
|
||||
$this->zero(),
|
||||
new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) use ( $unitsRe ) {
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION,
|
||||
static function ( Token $t ) use ( $unitsRe ) {
|
||||
return preg_match( $unitsRe, $t->unit() );
|
||||
} ),
|
||||
] );
|
||||
}
|
||||
);
|
||||
}
|
||||
return $this->cache[__METHOD__];
|
||||
}
|
||||
|
||||
/**
|
||||
* Matcher for an angle value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#angles
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#angles
|
||||
* @return Matcher
|
||||
*/
|
||||
public function angle() {
|
||||
|
|
@ -444,15 +499,15 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a duration (time) value, without calc()
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#time
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#time
|
||||
* @return Matcher
|
||||
*/
|
||||
protected function rawTime() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$unitsRe = '/^(' . join( '|', self::$timeUnits ) . ')$/i';
|
||||
$unitsRe = '/^(' . implode( '|', self::$timeUnits ) . ')$/i';
|
||||
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION,
|
||||
function ( Token $t ) use ( $unitsRe ) {
|
||||
static function ( Token $t ) use ( $unitsRe ) {
|
||||
return preg_match( $unitsRe, $t->unit() );
|
||||
}
|
||||
);
|
||||
|
|
@ -462,7 +517,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a duration (time) value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#time
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#time
|
||||
* @return Matcher
|
||||
*/
|
||||
public function time() {
|
||||
|
|
@ -474,15 +529,15 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a frequency value, without calc()
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#frequency
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#frequency
|
||||
* @return Matcher
|
||||
*/
|
||||
protected function rawFrequency() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$unitsRe = '/^(' . join( '|', self::$frequencyUnits ) . ')$/i';
|
||||
$unitsRe = '/^(' . implode( '|', self::$frequencyUnits ) . ')$/i';
|
||||
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION,
|
||||
function ( Token $t ) use ( $unitsRe ) {
|
||||
static function ( Token $t ) use ( $unitsRe ) {
|
||||
return preg_match( $unitsRe, $t->unit() );
|
||||
}
|
||||
);
|
||||
|
|
@ -492,7 +547,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a frequency value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#frequency
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#frequency
|
||||
* @return Matcher
|
||||
*/
|
||||
public function frequency() {
|
||||
|
|
@ -504,12 +559,12 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a resolution value
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#resolution
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#resolution
|
||||
* @return Matcher
|
||||
*/
|
||||
public function resolution() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) {
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) {
|
||||
return preg_match( '/^(dpi|dpcm|dppx)$/i', $t->unit() );
|
||||
} );
|
||||
}
|
||||
|
|
@ -543,7 +598,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a color value
|
||||
* @see https://www.w3.org/TR/2011/REC-css3-color-20110607/#colorunits
|
||||
* @see https://www.w3.org/TR/2018/REC-css-color-3-20180619/#colorunits
|
||||
* @return Matcher
|
||||
*/
|
||||
public function color() {
|
||||
|
|
@ -592,7 +647,7 @@ class MatcherFactory {
|
|||
// Other keywords. Intentionally omitting the deprecated system colors.
|
||||
'transparent', 'currentColor',
|
||||
] ),
|
||||
new TokenMatcher( Token::T_HASH, function ( Token $t ) {
|
||||
new TokenMatcher( Token::T_HASH, static function ( Token $t ) {
|
||||
return preg_match( '/^([0-9a-f]{3}|[0-9a-f]{6})$/i', $t->value() );
|
||||
} ),
|
||||
], $this->colorFuncs() ) );
|
||||
|
|
@ -602,31 +657,33 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for an image value
|
||||
* @see https://www.w3.org/TR/2012/CR-css3-images-20120417/#image-values
|
||||
* @see https://www.w3.org/TR/2019/CR-css-images-3-20191010/#image-values
|
||||
* @return Matcher
|
||||
*/
|
||||
public function image() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
// https://www.w3.org/TR/2012/CR-css3-images-20120417/#image-list-type
|
||||
// Note the undefined <element-reference> production has been dropped from the Editor's Draft.
|
||||
$imageDecl = new Alternative( [
|
||||
$this->url( 'image' ),
|
||||
$this->urlstring( 'image' ),
|
||||
] );
|
||||
|
||||
// https://www.w3.org/TR/2012/CR-css3-images-20120417/#gradients
|
||||
// https://www.w3.org/TR/2019/CR-css-images-3-20191010/#gradients
|
||||
$c = $this->comma();
|
||||
$colorStops = Quantifier::hash( new Juxtaposition( [
|
||||
$colorStop = UnorderedGroup::allOf( [
|
||||
$this->color(),
|
||||
// Not really <length-percentage>, but grammatically the same
|
||||
Quantifier::optional( $this->lengthPercentage() ),
|
||||
] ), 2, INF );
|
||||
] );
|
||||
$colorStopList = new Juxtaposition( [
|
||||
$colorStop,
|
||||
Quantifier::hash( new Juxtaposition( [
|
||||
Quantifier::optional( $this->lengthPercentage() ),
|
||||
$colorStop
|
||||
], true ) ),
|
||||
], true );
|
||||
$atPosition = new Juxtaposition( [ new KeywordMatcher( 'at' ), $this->position() ] );
|
||||
|
||||
$linearGradient = new Juxtaposition( [
|
||||
Quantifier::optional( new Juxtaposition( [
|
||||
new Alternative( [
|
||||
$this->angle(),
|
||||
new Alternative( [
|
||||
$this->zero(),
|
||||
$this->angle(),
|
||||
] ),
|
||||
new Juxtaposition( [ new KeywordMatcher( 'to' ), UnorderedGroup::someOf( [
|
||||
new KeywordMatcher( [ 'left', 'right' ] ),
|
||||
new KeywordMatcher( [ 'top', 'bottom' ] ),
|
||||
|
|
@ -634,7 +691,7 @@ class MatcherFactory {
|
|||
] ),
|
||||
$c
|
||||
] ) ),
|
||||
$colorStops,
|
||||
$colorStopList,
|
||||
] );
|
||||
$radialGradient = new Juxtaposition( [
|
||||
Quantifier::optional( new Juxtaposition( [
|
||||
|
|
@ -644,13 +701,12 @@ class MatcherFactory {
|
|||
UnorderedGroup::someOf( [ new KeywordMatcher( 'circle' ), $this->length() ] ),
|
||||
UnorderedGroup::someOf( [
|
||||
new KeywordMatcher( 'ellipse' ),
|
||||
// Not really <length-percentage>, but grammatically the same
|
||||
Quantifier::count( $this->lengthPercentage(), 2, 2 )
|
||||
] ),
|
||||
UnorderedGroup::someOf( [
|
||||
new KeywordMatcher( [ 'circle', 'ellipse' ] ),
|
||||
new KeywordMatcher( [
|
||||
'closest-side', 'farthest-side', 'closest-corner', 'farthest-corner'
|
||||
'closest-corner', 'closest-side', 'farthest-corner', 'farthest-side',
|
||||
] ),
|
||||
] ),
|
||||
] ),
|
||||
|
|
@ -660,16 +716,12 @@ class MatcherFactory {
|
|||
] ),
|
||||
$c
|
||||
] ) ),
|
||||
$colorStops,
|
||||
$colorStopList,
|
||||
] );
|
||||
|
||||
// Putting it all together
|
||||
$this->cache[__METHOD__] = new Alternative( [
|
||||
$this->url( 'image' ),
|
||||
new FunctionMatcher( 'image', new Juxtaposition( [
|
||||
Quantifier::star( new Juxtaposition( [ $imageDecl, $c ] ) ),
|
||||
new Alternative( [ $imageDecl, $this->color() ] ),
|
||||
] ) ),
|
||||
new FunctionMatcher( 'linear-gradient', $linearGradient ),
|
||||
new FunctionMatcher( 'radial-gradient', $radialGradient ),
|
||||
new FunctionMatcher( 'repeating-linear-gradient', $linearGradient ),
|
||||
|
|
@ -681,10 +733,41 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a position value
|
||||
* @see https://www.w3.org/TR/2014/CR-css3-background-20140909/#ltpositiongt
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-position
|
||||
* @return Matcher
|
||||
*/
|
||||
public function position() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$lp = $this->lengthPercentage();
|
||||
$center = new KeywordMatcher( 'center' );
|
||||
$leftRight = new KeywordMatcher( [ 'left', 'right' ] );
|
||||
$topBottom = new KeywordMatcher( [ 'top', 'bottom' ] );
|
||||
|
||||
$this->cache[__METHOD__] = new Alternative( [
|
||||
UnorderedGroup::someOf( [
|
||||
new Alternative( [ $center, $leftRight ] ),
|
||||
new Alternative( [ $center, $topBottom ] ),
|
||||
] ),
|
||||
new Juxtaposition( [
|
||||
new Alternative( [ $center, $leftRight, $lp ] ),
|
||||
Quantifier::optional( new Alternative( [ $center, $topBottom, $lp ] ) ),
|
||||
] ),
|
||||
|
||||
UnorderedGroup::allOf( [
|
||||
new Juxtaposition( [ $leftRight, $lp ] ),
|
||||
new Juxtaposition( [ $topBottom, $lp ] ),
|
||||
] ),
|
||||
] );
|
||||
}
|
||||
return $this->cache[__METHOD__];
|
||||
}
|
||||
|
||||
/**
|
||||
* Matcher for a bg-position value
|
||||
* @see https://www.w3.org/TR/2017/CR-css-backgrounds-3-20171017/#typedef-bg-position
|
||||
* @return Matcher
|
||||
*/
|
||||
public function bgPosition() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$lp = $this->lengthPercentage();
|
||||
$olp = Quantifier::optional( $lp );
|
||||
|
|
@ -709,7 +792,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a CSS media query
|
||||
* @see https://www.w3.org/TR/2016/WD-mediaqueries-4-20160706/#mq-syntax
|
||||
* @see https://www.w3.org/TR/2017/CR-mediaqueries-4-20170905/#mq-syntax
|
||||
* @param bool $strict Only allow defined query types
|
||||
* @return Matcher
|
||||
*/
|
||||
|
|
@ -736,10 +819,10 @@ class MatcherFactory {
|
|||
];
|
||||
$mfName = new KeywordMatcher( array_merge(
|
||||
$rangeFeatures,
|
||||
array_map( function ( $f ) {
|
||||
array_map( static function ( $f ) {
|
||||
return "min-$f";
|
||||
}, $rangeFeatures ),
|
||||
array_map( function ( $f ) {
|
||||
array_map( static function ( $f ) {
|
||||
return "max-$f";
|
||||
}, $rangeFeatures ),
|
||||
$discreteFeatures
|
||||
|
|
@ -757,7 +840,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
$posInt = $this->calc(
|
||||
new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
|
||||
new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
|
||||
return $t->typeFlag() === 'integer' && preg_match( '/^\+?\d+$/', $t->representation() );
|
||||
} ),
|
||||
'integer'
|
||||
|
|
@ -777,25 +860,38 @@ class MatcherFactory {
|
|||
new Juxtaposition( [ $posInt, new DelimMatcher( '/' ), $posInt ] ),
|
||||
] );
|
||||
|
||||
$mediaInParens = new NothingMatcher(); // temporary
|
||||
// temporary
|
||||
$mediaInParens = new NothingMatcher();
|
||||
$mediaNot = new Juxtaposition( [ new KeywordMatcher( 'not' ), &$mediaInParens ] );
|
||||
$mediaAnd = new Juxtaposition( [
|
||||
&$mediaInParens,
|
||||
Quantifier::plus( new Juxtaposition( [ new KeywordMatcher( 'and' ), &$mediaInParens ] ) )
|
||||
$mediaAnd = new Juxtaposition( [ new KeywordMatcher( 'and' ), &$mediaInParens ] );
|
||||
$mediaOr = new Juxtaposition( [ new KeywordMatcher( 'or' ), &$mediaInParens ] );
|
||||
$mediaCondition = new Alternative( [
|
||||
$mediaNot,
|
||||
new Juxtaposition( [
|
||||
&$mediaInParens,
|
||||
new Alternative( [
|
||||
Quantifier::star( $mediaAnd ),
|
||||
Quantifier::star( $mediaOr ),
|
||||
] )
|
||||
] ),
|
||||
] );
|
||||
$mediaOr = new Juxtaposition( [
|
||||
&$mediaInParens,
|
||||
Quantifier::plus( new Juxtaposition( [ new KeywordMatcher( 'or' ), &$mediaInParens ] ) )
|
||||
$mediaConditionWithoutOr = new Alternative( [
|
||||
$mediaNot,
|
||||
new Juxtaposition( [ &$mediaInParens, Quantifier::star( $mediaAnd ) ] ),
|
||||
] );
|
||||
$mediaCondition = new Alternative( [ $mediaNot, $mediaAnd, $mediaOr, &$mediaInParens ] );
|
||||
$mediaConditionWithoutOr = new Alternative( [ $mediaNot, $mediaAnd, &$mediaInParens ] );
|
||||
$mediaFeature = new BlockMatcher( Token::T_LEFT_PAREN, new Alternative( [
|
||||
new Juxtaposition( [ $mfName, new TokenMatcher( Token::T_COLON ), $mfValue ] ), // <mf-plain>
|
||||
$mfName, // <mf-boolean>
|
||||
new Juxtaposition( [ $mfName, $ltgteq, $mfValue ] ), // <mf-range>, 1st alternative
|
||||
new Juxtaposition( [ $mfValue, $ltgteq, $mfName ] ), // <mf-range>, 2nd alternative
|
||||
new Juxtaposition( [ $mfValue, $lteq, $mfName, $lteq, $mfValue ] ), // <mf-range>, 3rd alt
|
||||
new Juxtaposition( [ $mfValue, $gteq, $mfName, $gteq, $mfValue ] ), // <mf-range>, 4th alt
|
||||
// <mf-plain>
|
||||
new Juxtaposition( [ $mfName, new TokenMatcher( Token::T_COLON ), $mfValue ] ),
|
||||
// <mf-boolean>
|
||||
$mfName,
|
||||
// <mf-range>, 1st alternative
|
||||
new Juxtaposition( [ $mfName, $ltgteq, $mfValue ] ),
|
||||
// <mf-range>, 2nd alternative
|
||||
new Juxtaposition( [ $mfValue, $ltgteq, $mfName ] ),
|
||||
// <mf-range>, 3rd alt
|
||||
new Juxtaposition( [ $mfValue, $lteq, $mfName, $lteq, $mfValue ] ),
|
||||
// <mf-range>, 4th alt
|
||||
new Juxtaposition( [ $mfValue, $gteq, $mfName, $gteq, $mfValue ] ),
|
||||
] ) );
|
||||
$mediaInParens = new Alternative( [
|
||||
new BlockMatcher( Token::T_LEFT_PAREN, $mediaCondition ),
|
||||
|
|
@ -821,7 +917,7 @@ class MatcherFactory {
|
|||
|
||||
/**
|
||||
* Matcher for a CSS media query list
|
||||
* @see https://www.w3.org/TR/2016/WD-mediaqueries-4-20160706/#mq-syntax
|
||||
* @see https://www.w3.org/TR/2017/CR-mediaqueries-4-20170905/#mq-syntax
|
||||
* @param bool $strict Only allow defined query types
|
||||
* @return Matcher
|
||||
*/
|
||||
|
|
@ -834,15 +930,114 @@ class MatcherFactory {
|
|||
return $this->cache[$key];
|
||||
}
|
||||
|
||||
/************************************************************************//**
|
||||
/**
|
||||
* Matcher for a "supports-condition"
|
||||
* @see https://www.w3.org/TR/2013/CR-css3-conditional-20130404/#supports_condition
|
||||
* @param PropertySanitizer|null $declarationSanitizer Check declarations against this Sanitizer
|
||||
* @param bool $strict Only accept defined syntax. Default true.
|
||||
* @return Matcher
|
||||
*/
|
||||
public function cssSupportsCondition(
|
||||
PropertySanitizer $declarationSanitizer = null, $strict = true
|
||||
) {
|
||||
$ws = $this->significantWhitespace();
|
||||
$anythingPlus = new AnythingMatcher( [ 'quantifier' => '+' ] );
|
||||
|
||||
if ( $strict ) {
|
||||
$generalEnclosed = new NothingMatcher();
|
||||
} else {
|
||||
$generalEnclosed = new Alternative( [
|
||||
new FunctionMatcher( null, $anythingPlus ),
|
||||
new BlockMatcher( Token::T_LEFT_PAREN, new Juxtaposition( [ $this->ident(), $anythingPlus ] ) ),
|
||||
] );
|
||||
}
|
||||
|
||||
// temp
|
||||
$supportsConditionBlock = new NothingMatcher();
|
||||
$supportsConditionInParens = new Alternative( [
|
||||
&$supportsConditionBlock,
|
||||
new BlockMatcher( Token::T_LEFT_PAREN, $this->cssDeclaration( $declarationSanitizer ) ),
|
||||
$generalEnclosed,
|
||||
] );
|
||||
$supportsCondition = new Alternative( [
|
||||
new Juxtaposition( [ new KeywordMatcher( 'not' ), $ws, $supportsConditionInParens ] ),
|
||||
new Juxtaposition( [ $supportsConditionInParens, Quantifier::plus( new Juxtaposition( [
|
||||
$ws, new KeywordMatcher( 'and' ), $ws, $supportsConditionInParens
|
||||
] ) ) ] ),
|
||||
new Juxtaposition( [ $supportsConditionInParens, Quantifier::plus( new Juxtaposition( [
|
||||
$ws, new KeywordMatcher( 'or' ), $ws, $supportsConditionInParens
|
||||
] ) ) ] ),
|
||||
$supportsConditionInParens,
|
||||
] );
|
||||
$supportsConditionBlock = new BlockMatcher( Token::T_LEFT_PAREN, $supportsCondition );
|
||||
|
||||
return $supportsCondition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Matcher for a declaration
|
||||
* @param PropertySanitizer|null $declarationSanitizer Check declarations against this Sanitizer
|
||||
* @return Matcher
|
||||
*/
|
||||
public function cssDeclaration( PropertySanitizer $declarationSanitizer = null ) {
|
||||
$anythingPlus = new AnythingMatcher( [ 'quantifier' => '+' ] );
|
||||
|
||||
return new CheckedMatcher(
|
||||
$anythingPlus,
|
||||
static function ( ComponentValueList $list, GrammarMatch $match, array $options )
|
||||
use ( $declarationSanitizer )
|
||||
{
|
||||
$cvlist = new ComponentValueList( $match->getValues() );
|
||||
$parser = Parser::newFromTokens( $cvlist->toTokenArray() );
|
||||
$declaration = $parser->parseDeclaration();
|
||||
if ( !$declaration || $parser->getParseErrors() ) {
|
||||
return false;
|
||||
}
|
||||
if ( !$declarationSanitizer ) {
|
||||
return true;
|
||||
}
|
||||
$reset = $declarationSanitizer->stashSanitizationErrors();
|
||||
$ret = $declarationSanitizer->sanitize( $declaration );
|
||||
$errors = $declarationSanitizer->getSanitizationErrors();
|
||||
unset( $reset );
|
||||
return $ret === $declaration && !$errors;
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Matcher for single easing functions from CSS Easing Functions Level 1
|
||||
* @see https://www.w3.org/TR/2019/CR-css-easing-1-20190430/#typedef-easing-function
|
||||
* @return Matcher
|
||||
*/
|
||||
public function cssSingleEasingFunction() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$this->cache[__METHOD__] = new Alternative( [
|
||||
new KeywordMatcher( [
|
||||
'ease', 'linear', 'ease-in', 'ease-out', 'ease-in-out', 'step-start', 'step-end'
|
||||
] ),
|
||||
new FunctionMatcher( 'steps', new Juxtaposition( [
|
||||
$this->integer(),
|
||||
Quantifier::optional( new KeywordMatcher( [
|
||||
'jump-start', 'jump-end', 'jump-none', 'jump-both', 'start', 'end'
|
||||
] ) ),
|
||||
], true ) ),
|
||||
new FunctionMatcher( 'cubic-bezier', Quantifier::hash( $this->number(), 4, 4 ) ),
|
||||
] );
|
||||
}
|
||||
|
||||
return $this->cache[__METHOD__];
|
||||
}
|
||||
|
||||
/**
|
||||
* @name CSS Selectors Level 3
|
||||
* @{
|
||||
*
|
||||
* https://www.w3.org/TR/2011/REC-css3-selectors-20110929/#w3cselgrammar
|
||||
* https://www.w3.org/TR/2018/REC-selectors-3-20181106/#w3cselgrammar
|
||||
*/
|
||||
|
||||
/**
|
||||
* List of selectors
|
||||
* List of selectors (selectors_group)
|
||||
*
|
||||
* selector [ COMMA S* selector ]*
|
||||
*
|
||||
|
|
@ -862,7 +1057,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* A single selector
|
||||
* A single selector (selector)
|
||||
*
|
||||
* simple_selector_sequence [ combinator simple_selector_sequence ]*
|
||||
*
|
||||
|
|
@ -886,7 +1081,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* A CSS combinator
|
||||
* A CSS combinator (combinator)
|
||||
*
|
||||
* PLUS S* | GREATER S* | TILDE S* | S+
|
||||
*
|
||||
|
|
@ -910,7 +1105,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* A simple selector sequence
|
||||
* A simple selector sequence (simple_selector_sequence)
|
||||
*
|
||||
* [ type_selector | universal ]
|
||||
* [ HASH | class | attrib | pseudo | negation ]*
|
||||
|
|
@ -952,7 +1147,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* A type selector (i.e. a tag name)
|
||||
* A type selector, i.e. a tag name (type_selector)
|
||||
*
|
||||
* [ namespace_prefix ] ? element_name
|
||||
*
|
||||
|
|
@ -974,7 +1169,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* A namespace prefix
|
||||
* A namespace prefix (namespace_prefix)
|
||||
*
|
||||
* [ IDENT | '*' ]? '|'
|
||||
*
|
||||
|
|
@ -1010,7 +1205,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* The universal selector
|
||||
* The universal selector (universal)
|
||||
*
|
||||
* [ namespace_prefix ]? '*'
|
||||
*
|
||||
|
|
@ -1036,7 +1231,7 @@ class MatcherFactory {
|
|||
*/
|
||||
public function cssID() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_HASH, function ( Token $t ) {
|
||||
$this->cache[__METHOD__] = new TokenMatcher( Token::T_HASH, static function ( Token $t ) {
|
||||
return $t->typeFlag() === 'id';
|
||||
} );
|
||||
$this->cache[__METHOD__]->setDefaultOptions( [ 'skip-whitespace' => false ] );
|
||||
|
|
@ -1045,7 +1240,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* A class selector
|
||||
* A class selector (class)
|
||||
*
|
||||
* '.' IDENT
|
||||
*
|
||||
|
|
@ -1063,7 +1258,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* An attribute selector
|
||||
* An attribute selector (attrib)
|
||||
*
|
||||
* '[' S* [ namespace_prefix ]? IDENT S*
|
||||
* [ [ PREFIXMATCH |
|
||||
|
|
@ -1094,14 +1289,12 @@ class MatcherFactory {
|
|||
] )->capture( 'attribute' ),
|
||||
$this->optionalWhitespace(),
|
||||
Quantifier::optional( new Juxtaposition( [
|
||||
Alternative::create( [
|
||||
new TokenMatcher( Token::T_PREFIX_MATCH ),
|
||||
new TokenMatcher( Token::T_SUFFIX_MATCH ),
|
||||
new TokenMatcher( Token::T_SUBSTRING_MATCH ),
|
||||
// Sigh. They removed various tokens from CSS Syntax 3, but didn't update the grammar
|
||||
// in CSS Selectors 3. Wing it with a hint from CSS Selectors 4's <attr-matcher>
|
||||
( new Juxtaposition( [
|
||||
Quantifier::optional( new DelimMatcher( [ '^', '$', '*', '~', '|' ] ) ),
|
||||
new DelimMatcher( [ '=' ] ),
|
||||
new TokenMatcher( Token::T_INCLUDE_MATCH ),
|
||||
new TokenMatcher( Token::T_DASH_MATCH ),
|
||||
] )->capture( 'test' ),
|
||||
] ) )->capture( 'test' ),
|
||||
$this->optionalWhitespace(),
|
||||
Alternative::create( [
|
||||
$this->ident(),
|
||||
|
|
@ -1117,14 +1310,18 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* A pseudo-class or pseudo-element
|
||||
* A pseudo-class or pseudo-element (pseudo)
|
||||
*
|
||||
* ':' ':'? [ IDENT | functional_pseudo ]
|
||||
*
|
||||
* Where functional_pseudo is
|
||||
*
|
||||
* FUNCTION S* expression ')'
|
||||
*
|
||||
* Although this actually only matches the pseudo-selectors defined in the
|
||||
* following sources:
|
||||
* - https://www.w3.org/TR/2011/REC-css3-selectors-20110929/#pseudo-classes
|
||||
* - https://www.w3.org/TR/2016/WD-css-pseudo-4-20160607/
|
||||
* - https://www.w3.org/TR/2018/REC-selectors-3-20181106/#pseudo-classes
|
||||
* - https://www.w3.org/TR/2019/WD-css-pseudo-4-20190225/
|
||||
*
|
||||
* @return Matcher
|
||||
*/
|
||||
|
|
@ -1156,7 +1353,7 @@ class MatcherFactory {
|
|||
$colon,
|
||||
new KeywordMatcher( [
|
||||
'first-line', 'first-letter', 'before', 'after', 'selection', 'inactive-selection',
|
||||
'spelling-error', 'grammar-error', 'placeholder'
|
||||
'spelling-error', 'grammar-error', 'marker', 'placeholder'
|
||||
] ),
|
||||
] ),
|
||||
] );
|
||||
|
|
@ -1168,44 +1365,43 @@ class MatcherFactory {
|
|||
/**
|
||||
* An "AN+B" form
|
||||
*
|
||||
* https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#anb
|
||||
* https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#anb-microsyntax
|
||||
*
|
||||
* @return Matcher
|
||||
*/
|
||||
public function cssANplusB() {
|
||||
if ( !isset( $this->cache[__METHOD__] ) ) {
|
||||
// Quoth the spec:
|
||||
// > The An+B notation was originally defined using a slightly
|
||||
// > different tokenizer than the rest of CSS, resulting in a
|
||||
// > somewhat odd definition when expressed in terms of CSS tokens.
|
||||
// > The An+B notation was originally defined using a slightly
|
||||
// > different tokenizer than the rest of CSS, resulting in a
|
||||
// > somewhat odd definition when expressed in terms of CSS tokens.
|
||||
// That's a bit of an understatement
|
||||
|
||||
$plus = new DelimMatcher( [ '+' ] );
|
||||
$plusQ = Quantifier::optional( new DelimMatcher( [ '+' ] ) );
|
||||
$n = new KeywordMatcher( [ 'n' ] );
|
||||
$dashN = new KeywordMatcher( [ '-n' ] );
|
||||
$nDash = new KeywordMatcher( [ 'n-' ] );
|
||||
$plusQN = new Juxtaposition( [ $plusQ, $n ] );
|
||||
$plusQNDash = new Juxtaposition( [ $plusQ, $nDash ] );
|
||||
$nDimension = new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) {
|
||||
$nDimension = new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) {
|
||||
return $t->typeFlag() === 'integer' && !strcasecmp( $t->unit(), 'n' );
|
||||
} );
|
||||
$nDashDimension = new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) {
|
||||
$nDashDimension = new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) {
|
||||
return $t->typeFlag() === 'integer' && !strcasecmp( $t->unit(), 'n-' );
|
||||
} );
|
||||
$nDashDigitDimension = new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) {
|
||||
$nDashDigitDimension = new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) {
|
||||
return $t->typeFlag() === 'integer' && preg_match( '/^n-\d+$/i', $t->unit() );
|
||||
} );
|
||||
$nDashDigitIdent = new TokenMatcher( Token::T_IDENT, function ( Token $t ) {
|
||||
$nDashDigitIdent = new TokenMatcher( Token::T_IDENT, static function ( Token $t ) {
|
||||
return preg_match( '/^n-\d+$/i', $t->value() );
|
||||
} );
|
||||
$dashNDashDigitIdent = new TokenMatcher( Token::T_IDENT, function ( Token $t ) {
|
||||
$dashNDashDigitIdent = new TokenMatcher( Token::T_IDENT, static function ( Token $t ) {
|
||||
return preg_match( '/^-n-\d+$/i', $t->value() );
|
||||
} );
|
||||
$signedInt = new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
|
||||
$signedInt = new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
|
||||
return $t->typeFlag() === 'integer' && preg_match( '/^[+-]/', $t->representation() );
|
||||
} );
|
||||
$signlessInt = new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
|
||||
$signlessInt = new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
|
||||
return $t->typeFlag() === 'integer' && preg_match( '/^\d/', $t->representation() );
|
||||
} );
|
||||
$plusOrMinus = new DelimMatcher( [ '+', '-' ] );
|
||||
|
|
@ -1213,7 +1409,7 @@ class MatcherFactory {
|
|||
|
||||
$this->cache[__METHOD__] = new Alternative( [
|
||||
new KeywordMatcher( [ 'odd', 'even' ] ),
|
||||
new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
|
||||
new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
|
||||
return $t->typeFlag() === 'integer';
|
||||
} ),
|
||||
$nDimension,
|
||||
|
|
@ -1238,7 +1434,7 @@ class MatcherFactory {
|
|||
}
|
||||
|
||||
/**
|
||||
* A negation
|
||||
* A negation (negation)
|
||||
*
|
||||
* ':' not( S* [ type_selector | universal | HASH | class | attrib | pseudo ] S* ')'
|
||||
*
|
||||
|
|
@ -1272,7 +1468,7 @@ class MatcherFactory {
|
|||
return $this->cache[__METHOD__];
|
||||
}
|
||||
|
||||
/**@}*/
|
||||
/** @} */
|
||||
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,8 +14,9 @@ use Wikimedia\CSS\Objects\Token;
|
|||
*/
|
||||
class NoWhitespace extends Matcher {
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$cv = isset( $values[$start-1] ) ? $values[$start-1] : null;
|
||||
$cv = $values[$start - 1] ?? null;
|
||||
if ( !$cv instanceof Token || $cv->type() !== Token::T_WHITESPACE ) {
|
||||
yield $this->makeMatch( $values, $start, $start );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,8 +9,8 @@ namespace Wikimedia\CSS\Grammar;
|
|||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
|
||||
/**
|
||||
* Matcher that requires its sub-Matcher has only non-empty matches ("!" multipier)
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-req
|
||||
* Matcher that requires its sub-Matcher has only non-empty matches ("!" multiplier)
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-req
|
||||
*/
|
||||
class NonEmpty extends Matcher {
|
||||
/** @var Matcher */
|
||||
|
|
@ -23,6 +23,7 @@ class NonEmpty extends Matcher {
|
|||
$this->matcher = $matcher;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
foreach ( $this->matcher->generateMatches( $values, $start, $options ) as $match ) {
|
||||
if ( $match->getLength() !== 0 ) {
|
||||
|
|
|
|||
|
|
@ -6,14 +6,15 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use Wikimedia\CSS\Objects\ComponentValue;
|
||||
use EmptyIterator;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
|
||||
/**
|
||||
* Matcher that matches nothing
|
||||
*/
|
||||
class NothingMatcher extends Matcher {
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
return new \EmptyIterator;
|
||||
return new EmptyIterator;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,20 +6,25 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use Iterator;
|
||||
use UnexpectedValueException;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
|
||||
/**
|
||||
* Matcher that matches a sub-Matcher a certain number of times
|
||||
* ("?", "*", "+", "#", "{A,B}" multipliers)
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-multipliers
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-multipliers
|
||||
*/
|
||||
class Quantifier extends Matcher {
|
||||
/** @var Matcher */
|
||||
protected $matcher;
|
||||
|
||||
/** @var int */
|
||||
protected $min, $max;
|
||||
protected $min;
|
||||
|
||||
/** @var int */
|
||||
protected $max;
|
||||
|
||||
/** @var bool Whether matches are comma-separated */
|
||||
protected $commas;
|
||||
|
|
@ -39,7 +44,7 @@ class Quantifier extends Matcher {
|
|||
|
||||
/**
|
||||
* Implements "?": 0 or 1 matches
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-opt
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-opt
|
||||
* @param Matcher $matcher
|
||||
* @return static
|
||||
*/
|
||||
|
|
@ -49,7 +54,7 @@ class Quantifier extends Matcher {
|
|||
|
||||
/**
|
||||
* Implements "*": 0 or more matches
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-zero-plus
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-zero-plus
|
||||
* @param Matcher $matcher
|
||||
* @return static
|
||||
*/
|
||||
|
|
@ -59,7 +64,7 @@ class Quantifier extends Matcher {
|
|||
|
||||
/**
|
||||
* Implements "+": 1 or more matches
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-one-plus
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-one-plus
|
||||
* @param Matcher $matcher
|
||||
* @return static
|
||||
*/
|
||||
|
|
@ -69,7 +74,7 @@ class Quantifier extends Matcher {
|
|||
|
||||
/**
|
||||
* Implements "{A,B}": Between A and B matches
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-num-range
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-num-range
|
||||
* @param Matcher $matcher
|
||||
* @param int|float $min Minimum number of matches
|
||||
* @param int|float $max Maximum number of matches
|
||||
|
|
@ -81,7 +86,7 @@ class Quantifier extends Matcher {
|
|||
|
||||
/**
|
||||
* Implements "#" and "#{A,B}": Between A and B matches, comma-separated
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-comma
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-comma
|
||||
* @param Matcher $matcher
|
||||
* @param int|float $min Minimum number of matches
|
||||
* @param int|float $max Maximum number of matches
|
||||
|
|
@ -91,17 +96,21 @@ class Quantifier extends Matcher {
|
|||
return new static( $matcher, $min, $max, true );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$used = [];
|
||||
|
||||
// Maintain a stack of matches for backtracking purposes.
|
||||
$stack = [
|
||||
[ new Match( $values, $start, 0 ), $this->matcher->generateMatches( $values, $start, $options ) ]
|
||||
[
|
||||
new GrammarMatch( $values, $start, 0 ),
|
||||
$this->matcher->generateMatches( $values, $start, $options )
|
||||
]
|
||||
];
|
||||
do {
|
||||
/** @var $lastMatch Match */
|
||||
/** @var $iter \Iterator<Match> */
|
||||
list( $lastMatch, $iter ) = $stack[count( $stack ) - 1];
|
||||
/** @var $lastMatch GrammarMatch */
|
||||
/** @var $iter Iterator<GrammarMatch> */
|
||||
[ $lastMatch, $iter ] = $stack[count( $stack ) - 1];
|
||||
|
||||
// If the top of the stack has no more matches, pop it, maybe
|
||||
// yield the last matched position, and loop.
|
||||
|
|
@ -126,7 +135,7 @@ class Quantifier extends Matcher {
|
|||
|
||||
// Quantifiers don't work well when the quantified thing can be empty.
|
||||
if ( $match->getLength() === 0 ) {
|
||||
throw new \UnexpectedValueException( 'Empty match in quantifier!' );
|
||||
throw new UnexpectedValueException( 'Empty match in quantifier!' );
|
||||
}
|
||||
|
||||
$nextFrom = $match->getNext();
|
||||
|
|
@ -136,17 +145,19 @@ class Quantifier extends Matcher {
|
|||
$canBeMore = count( $stack ) < $this->max;
|
||||
|
||||
// Commas are slightly tricky:
|
||||
// 1. If there is a following comma, start the next Matcher after it.
|
||||
// 2. If not, there can't be any more Matchers following.
|
||||
// 1. If there is a following comma, start the next Matcher after it.
|
||||
// 2. If not, there can't be any more Matchers following.
|
||||
// And in either case optional whitespace is always allowed.
|
||||
if ( $this->commas ) {
|
||||
$n = $nextFrom;
|
||||
if ( isset( $values[$n] ) && $values[$n] instanceof Token &&
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
$values[$n]->type() === Token::T_WHITESPACE
|
||||
) {
|
||||
$n = $this->next( $values, $n, [ 'skip-whitespace' => true ] + $options );
|
||||
}
|
||||
if ( isset( $values[$n] ) && $values[$n] instanceof Token &&
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
$values[$n]->type() === Token::T_COMMA
|
||||
) {
|
||||
$nextFrom = $this->next( $values, $n, [ 'skip-whitespace' => true ] + $options );
|
||||
|
|
@ -156,7 +167,7 @@ class Quantifier extends Matcher {
|
|||
}
|
||||
|
||||
// If there can be more matches, push another one onto the stack
|
||||
// and try it. Otherwise yield and continue with the current match.
|
||||
// and try it. Otherwise, yield and continue with the current match.
|
||||
if ( $canBeMore ) {
|
||||
$stack[] = [ $match, $this->matcher->generateMatches( $values, $nextFrom, $options ) ];
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ use Wikimedia\CSS\Objects\Token;
|
|||
|
||||
/**
|
||||
* Matcher that matches a token of a particular type
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-types
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-types
|
||||
*/
|
||||
class TokenMatcher extends Matcher {
|
||||
/** @var string One of the Token::T_* constants */
|
||||
|
|
@ -30,8 +30,9 @@ class TokenMatcher extends Matcher {
|
|||
$this->callback = $callback;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$cv = isset( $values[$start] ) ? $values[$start] : null;
|
||||
$cv = $values[$start] ?? null;
|
||||
if ( $cv instanceof Token && $cv->type() === $this->type &&
|
||||
( !$this->callback || call_user_func( $this->callback, $cv ) )
|
||||
) {
|
||||
|
|
|
|||
|
|
@ -6,12 +6,15 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use ArrayIterator;
|
||||
use EmptyIterator;
|
||||
use Iterator;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
* Matcher that groups other matchers without ordering ("&&" and "||" combiners)
|
||||
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-combinators
|
||||
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-combinators
|
||||
*/
|
||||
class UnorderedGroup extends Matcher {
|
||||
/** @var Matcher[] */
|
||||
|
|
@ -48,6 +51,7 @@ class UnorderedGroup extends Matcher {
|
|||
return new static( $matchers, false );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$used = [];
|
||||
|
||||
|
|
@ -55,20 +59,20 @@ class UnorderedGroup extends Matcher {
|
|||
// of remaining matchers.
|
||||
$stack = [
|
||||
[
|
||||
new Match( $values, $start, 0 ),
|
||||
new GrammarMatch( $values, $start, 0 ),
|
||||
$this->matchers,
|
||||
new \ArrayIterator( $this->matchers ),
|
||||
new ArrayIterator( $this->matchers ),
|
||||
null,
|
||||
new \EmptyIterator
|
||||
new EmptyIterator
|
||||
]
|
||||
];
|
||||
do {
|
||||
/** @var $lastMatch Match */
|
||||
/** @var $lastMatch GrammarMatch */
|
||||
/** @var $matchers Matcher[] */
|
||||
/** @var $matcherIter \Iterator<Matcher> */
|
||||
/** @var $matcherIter Iterator<Matcher> */
|
||||
/** @var $curMatcher Matcher|null */
|
||||
/** @var $iter \Iterator<Match> */
|
||||
list( $lastMatch, $matchers, $matcherIter, $curMatcher, $iter ) = $stack[count( $stack ) - 1];
|
||||
/** @var $iter Iterator<GrammarMatch> */
|
||||
[ $lastMatch, $matchers, $matcherIter, $curMatcher, $iter ] = $stack[count( $stack ) - 1];
|
||||
|
||||
// If the top of the stack has more matches, process the next one.
|
||||
if ( $iter->valid() ) {
|
||||
|
|
@ -76,9 +80,9 @@ class UnorderedGroup extends Matcher {
|
|||
$iter->next();
|
||||
|
||||
// If we have unused matchers to try after this one, do so.
|
||||
// Otherwise yield and continue with the current one.
|
||||
// Otherwise, yield and continue with the current one.
|
||||
if ( $matchers ) {
|
||||
$stack[] = [ $match, $matchers, new \ArrayIterator( $matchers ), null, new \EmptyIterator ];
|
||||
$stack[] = [ $match, $matchers, new ArrayIterator( $matchers ), null, new EmptyIterator ];
|
||||
} else {
|
||||
$newMatch = $this->makeMatch( $values, $start, $match->getNext(), $match, $stack );
|
||||
$mid = $newMatch->getUniqueID();
|
||||
|
|
@ -91,7 +95,7 @@ class UnorderedGroup extends Matcher {
|
|||
}
|
||||
|
||||
// We ran out of matches for the current top of the stack. Pop it,
|
||||
// and put $curMatcher back into $matchers so it can be tried again
|
||||
// and put $curMatcher back into $matchers, so it can be tried again
|
||||
// at a later position.
|
||||
array_pop( $stack );
|
||||
if ( $curMatcher ) {
|
||||
|
|
@ -109,14 +113,12 @@ class UnorderedGroup extends Matcher {
|
|||
unset( $matchers[$matcherIter->key()] );
|
||||
$iter = $curMatcher->generateMatches( $values, $fromPos, $options );
|
||||
$stack[] = [ $lastMatch, $matchers, $matcherIter, $curMatcher, $iter ];
|
||||
} else {
|
||||
if ( $stack && !$this->all ) {
|
||||
$newMatch = $this->makeMatch( $values, $start, $fromPos, $lastMatch, $stack );
|
||||
$mid = $newMatch->getUniqueID();
|
||||
if ( !isset( $used[$mid] ) ) {
|
||||
$used[$mid] = 1;
|
||||
yield $newMatch;
|
||||
}
|
||||
} elseif ( $stack && !$this->all ) {
|
||||
$newMatch = $this->makeMatch( $values, $start, $fromPos, $lastMatch, $stack );
|
||||
$mid = $newMatch->getUniqueID();
|
||||
if ( !isset( $used[$mid] ) ) {
|
||||
$used[$mid] = 1;
|
||||
yield $newMatch;
|
||||
}
|
||||
}
|
||||
} while ( $stack );
|
||||
|
|
|
|||
98
lib/css-sanitizer/Wikimedia/CSS/Grammar/UrangeMatcher.php
Normal file
98
lib/css-sanitizer/Wikimedia/CSS/Grammar/UrangeMatcher.php
Normal file
|
|
@ -0,0 +1,98 @@
|
|||
<?php
|
||||
/**
|
||||
* @file
|
||||
* @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
|
||||
*/
|
||||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
|
||||
/**
|
||||
* Match the special "<urange>" notation
|
||||
*
|
||||
* If this matcher is marked for capturing, its matches will have submatches
|
||||
* "start" and "end" holding T_NUMBER tokens representing the starting and
|
||||
* ending codepoints in the range.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#urange
|
||||
*/
|
||||
class UrangeMatcher extends Matcher {
|
||||
/** @var Matcher Syntax matcher */
|
||||
private $matcher;
|
||||
|
||||
public function __construct() {
|
||||
$u = new KeywordMatcher( [ 'u' ] );
|
||||
$plus = new DelimMatcher( [ '+' ] );
|
||||
$ident = new TokenMatcher( Token::T_IDENT );
|
||||
$number = new TokenMatcher( Token::T_NUMBER );
|
||||
$dimension = new TokenMatcher( Token::T_DIMENSION );
|
||||
$q = new DelimMatcher( [ '?' ] );
|
||||
$qs = Quantifier::count( $q, 0, 6 );
|
||||
|
||||
// This matches a lot of things; we post-process in generateMatches() to limit it to
|
||||
// only what's actually supposed to be accepted.
|
||||
$this->matcher = new Alternative( [
|
||||
new Juxtaposition( [ $u, $plus, $ident, $qs ] ),
|
||||
new Juxtaposition( [ $u, $number, $dimension ] ),
|
||||
new Juxtaposition( [ $u, $number, $number ] ),
|
||||
new Juxtaposition( [ $u, $dimension, $qs ] ),
|
||||
new Juxtaposition( [ $u, $number, $qs ] ),
|
||||
new Juxtaposition( [ $u, $plus, Quantifier::count( $q, 1, 6 ) ] ),
|
||||
] );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
foreach ( $this->matcher->generateMatches( $values, $start, $options ) as $match ) {
|
||||
// <urange> is basically defined as a series of tokens that happens to have a certain string
|
||||
// representation. So stringify and regex it to see if it actually matches.
|
||||
$v = trim( $match->__toString(), "\n\t " );
|
||||
// Strip interpolated comments
|
||||
$v = strtr( $v, [ '/**/' => '' ] );
|
||||
$l = strlen( $v );
|
||||
if ( preg_match( '/^u\+([0-9a-f]{1,6})-([0-9a-f]{1,6})$/iD', $v, $m ) ) {
|
||||
$ustart = intval( $m[1], 16 );
|
||||
$uend = intval( $m[2], 16 );
|
||||
} elseif ( $l > 2 && $l <= 8 && preg_match( '/^u\+([0-9a-f]*\?*)$/iD', $v, $m ) ) {
|
||||
$ustart = intval( strtr( $m[1], [ '?' => '0' ] ), 16 );
|
||||
$uend = intval( strtr( $m[1], [ '?' => 'f' ] ), 16 );
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
if ( $ustart >= 0 && $ustart <= $uend && $uend <= 0x10ffff ) {
|
||||
$len = $match->getNext() - $start;
|
||||
$matches = [];
|
||||
if ( $this->captureName !== null ) {
|
||||
$tstart = new Token( Token::T_NUMBER, [ 'value' => $ustart, 'typeFlag' => 'integer' ] );
|
||||
$tend = new Token( Token::T_NUMBER, [ 'value' => $uend, 'typeFlag' => 'integer' ] );
|
||||
$matches = [
|
||||
new GrammarMatch(
|
||||
new ComponentValueList( $tstart->toComponentValueArray() ),
|
||||
0,
|
||||
1,
|
||||
'start',
|
||||
[]
|
||||
),
|
||||
new GrammarMatch(
|
||||
new ComponentValueList( $tend->toComponentValueArray() ),
|
||||
0,
|
||||
1,
|
||||
'end',
|
||||
[]
|
||||
),
|
||||
];
|
||||
}
|
||||
|
||||
// Mark the 'U' T_IDENT beginning a <urange>, to later avoid
|
||||
// serializing it with extraneous comments.
|
||||
// @see Wikimedia\CSS\Util::stringify()
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
$values[$start]->urangeHack( $len );
|
||||
|
||||
yield new GrammarMatch( $values, $start, $len, $this->captureName, $matches );
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -6,12 +6,13 @@
|
|||
|
||||
namespace Wikimedia\CSS\Grammar;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\CSSFunction;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
|
||||
/**
|
||||
* Matcher that matches a CSSFunction for a url or a T_URL token
|
||||
* Matcher that matches a CSSFunction for a URL or a T_URL token
|
||||
*/
|
||||
class UrlMatcher extends FunctionMatcher {
|
||||
/** @var callable|null */
|
||||
|
|
@ -28,7 +29,7 @@ class UrlMatcher extends FunctionMatcher {
|
|||
if ( isset( $options['modifierMatcher'] ) ) {
|
||||
$modifierMatcher = $options['modifierMatcher'];
|
||||
if ( !$modifierMatcher instanceof Matcher ) {
|
||||
throw new \InvalidArgumentException( 'modifierMatcher must be a Matcher' );
|
||||
throw new InvalidArgumentException( 'modifierMatcher must be a Matcher' );
|
||||
}
|
||||
} else {
|
||||
$modifierMatcher = new NothingMatcher;
|
||||
|
|
@ -54,13 +55,14 @@ class UrlMatcher extends FunctionMatcher {
|
|||
] );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
// First, is it a URL token?
|
||||
$cv = isset( $values[$start] ) ? $values[$start] : null;
|
||||
$cv = $values[$start] ?? null;
|
||||
if ( $cv instanceof Token && $cv->type() === Token::T_URL ) {
|
||||
$url = $cv->value();
|
||||
if ( !$this->urlCheck || call_user_func( $this->urlCheck, $url, [] ) ) {
|
||||
$match = new Match( $values, $start, 1, 'url' );
|
||||
$match = new GrammarMatch( $values, $start, 1, 'url' );
|
||||
yield $this->makeMatch( $values, $start, $this->next( $values, $start, $options ), $match );
|
||||
}
|
||||
return;
|
||||
|
|
@ -73,12 +75,12 @@ class UrlMatcher extends FunctionMatcher {
|
|||
$modifiers = [];
|
||||
foreach ( $match->getCapturedMatches() as $submatch ) {
|
||||
$cvs = $submatch->getValues();
|
||||
if ( $submatch->getName() === 'url' ) {
|
||||
if ( $cvs[0] instanceof Token && $submatch->getName() === 'url' ) {
|
||||
$url = $cvs[0]->value();
|
||||
} elseif ( $submatch->getName() === 'modifier' ) {
|
||||
if ( $cvs[0] instanceof CSSFunction ) {
|
||||
$modifiers[] = $cvs[0];
|
||||
} elseif ( $cvs[0]->type() === Token::T_IDENT ) {
|
||||
} elseif ( $cvs[0] instanceof Token && $cvs[0]->type() === Token::T_IDENT ) {
|
||||
$modifiers[] = $cvs[0];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -26,9 +26,11 @@ class WhitespaceMatcher extends Matcher {
|
|||
$this->significant = !empty( $options['significant'] );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
|
||||
$end = $start;
|
||||
while ( isset( $values[$end] ) &&
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
$values[$end] instanceof Token && $values[$end]->type() === Token::T_WHITESPACE
|
||||
) {
|
||||
$end++;
|
||||
|
|
@ -46,6 +48,7 @@ class WhitespaceMatcher extends Matcher {
|
|||
if ( $end === $start ) {
|
||||
$start--;
|
||||
if ( !$options['skip-whitespace'] || !isset( $values[$start] ) ||
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
!$values[$start] instanceof Token || $values[$start]->type() !== Token::T_WHITESPACE
|
||||
) {
|
||||
return;
|
||||
|
|
@ -54,7 +57,7 @@ class WhitespaceMatcher extends Matcher {
|
|||
|
||||
// Return the match. Include a 'significantWhitespace' capture.
|
||||
yield $this->makeMatch( $values, $start, $end,
|
||||
new Match( $values, $start, 1, 'significantWhitespace' )
|
||||
new GrammarMatch( $values, $start, 1, 'significantWhitespace' )
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
|
|
@ -27,7 +28,7 @@ class AtRule extends Rule implements DeclarationOrAtRule {
|
|||
*/
|
||||
public function __construct( Token $token ) {
|
||||
if ( $token->type() !== Token::T_AT_KEYWORD ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
"At rule must begin with an at-keyword token, got {$token->type()}"
|
||||
);
|
||||
}
|
||||
|
|
@ -38,9 +39,9 @@ class AtRule extends Rule implements DeclarationOrAtRule {
|
|||
}
|
||||
|
||||
public function __clone() {
|
||||
$this->prelude = clone( $this->prelude );
|
||||
$this->prelude = clone $this->prelude;
|
||||
if ( $this->block ) {
|
||||
$this->block = clone( $this->block );
|
||||
$this->block = clone $this->block;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -83,13 +84,14 @@ class AtRule extends Rule implements DeclarationOrAtRule {
|
|||
*/
|
||||
public function setBlock( SimpleBlock $block = null ) {
|
||||
if ( $block->getStartTokenType() !== Token::T_LEFT_BRACE ) {
|
||||
throw new \InvalidArgumentException( 'At-rule block must be delimited by {}' );
|
||||
throw new InvalidArgumentException( 'At-rule block must be delimited by {}' );
|
||||
}
|
||||
$this->block = $block;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $function Function to call, toTokenArray() or toComponentValueArray()
|
||||
* @return Token[]|ComponentValue[]
|
||||
*/
|
||||
private function toTokenOrCVArray( $function ) {
|
||||
$ret = [];
|
||||
|
|
@ -97,7 +99,7 @@ class AtRule extends Rule implements DeclarationOrAtRule {
|
|||
$ret[] = new Token(
|
||||
Token::T_AT_KEYWORD, [ 'value' => $this->name, 'position' => [ $this->line, $this->pos ] ]
|
||||
);
|
||||
// Manually looping and appending turns out to be noticably faster than array_merge.
|
||||
// Manually looping and appending turns out to be noticeably faster than array_merge.
|
||||
foreach ( $this->prelude->$function() as $v ) {
|
||||
$ret[] = $v;
|
||||
}
|
||||
|
|
@ -112,10 +114,12 @@ class AtRule extends Rule implements DeclarationOrAtRule {
|
|||
return $ret;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toTokenArray() {
|
||||
return $this->toTokenOrCVArray( __FUNCTION__ );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toComponentValueArray() {
|
||||
return $this->toTokenOrCVArray( __FUNCTION__ );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
|
|
@ -24,18 +25,18 @@ class CSSFunction extends ComponentValue {
|
|||
*/
|
||||
public function __construct( Token $token ) {
|
||||
if ( $token->type() !== Token::T_FUNCTION ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
"CSS function must begin with a function token, got {$token->type()}"
|
||||
);
|
||||
}
|
||||
|
||||
list( $this->line, $this->pos ) = $token->getPosition();
|
||||
[ $this->line, $this->pos ] = $token->getPosition();
|
||||
$this->name = $token->value();
|
||||
$this->value = new ComponentValueList();
|
||||
}
|
||||
|
||||
public function __clone() {
|
||||
$this->value = clone( $this->value );
|
||||
$this->value = clone $this->value;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -48,7 +49,7 @@ class CSSFunction extends ComponentValue {
|
|||
}
|
||||
|
||||
/**
|
||||
* Return the functions's name
|
||||
* Return the function's name
|
||||
* @return string
|
||||
*/
|
||||
public function getName() {
|
||||
|
|
@ -74,7 +75,7 @@ class CSSFunction extends ComponentValue {
|
|||
Token::T_FUNCTION,
|
||||
[ 'value' => $this->name, 'position' => [ $this->line, $this->pos ] ]
|
||||
);
|
||||
// Manually looping and appending turns out to be noticably faster than array_merge.
|
||||
// Manually looping and appending turns out to be noticeably faster than array_merge.
|
||||
foreach ( $this->value->toTokenArray() as $v ) {
|
||||
$ret[] = $v;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,12 +6,17 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use ArrayAccess;
|
||||
use Countable;
|
||||
use InvalidArgumentException;
|
||||
use OutOfBoundsException;
|
||||
use SeekableIterator;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
* Represent a list of CSS objects
|
||||
*/
|
||||
class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSObject {
|
||||
class CSSObjectList implements Countable, SeekableIterator, ArrayAccess, CSSObject {
|
||||
|
||||
/** @var string The specific class of object contained */
|
||||
protected static $objectType;
|
||||
|
|
@ -41,7 +46,7 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
|
|||
/**
|
||||
* Insert one or more objects into the list
|
||||
* @param CSSObject|CSSObject[]|CSSObjectList $objects An object to add, or an array of objects.
|
||||
* @param int $index Insert the objects at this index. If omitted, the
|
||||
* @param int|null $index Insert the objects at this index. If omitted, the
|
||||
* objects are added at the end.
|
||||
*/
|
||||
public function add( $objects, $index = null ) {
|
||||
|
|
@ -53,7 +58,7 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
|
|||
static::testObjects( $objects );
|
||||
} else {
|
||||
if ( !$objects instanceof static::$objectType ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
static::class . ' may only contain instances of ' . static::$objectType . '.'
|
||||
);
|
||||
}
|
||||
|
|
@ -64,7 +69,7 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
|
|||
if ( $index === null ) {
|
||||
$index = count( $this->objects );
|
||||
} elseif ( $index < 0 || $index > count( $this->objects ) ) {
|
||||
throw new \OutOfBoundsException( 'Index is out of range.' );
|
||||
throw new OutOfBoundsException( 'Index is out of range.' );
|
||||
}
|
||||
|
||||
array_splice( $this->objects, $index, 0, $objects );
|
||||
|
|
@ -80,7 +85,7 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
|
|||
*/
|
||||
public function remove( $index ) {
|
||||
if ( $index < 0 || $index >= count( $this->objects ) ) {
|
||||
throw new \OutOfBoundsException( 'Index is out of range.' );
|
||||
throw new OutOfBoundsException( 'Index is out of range.' );
|
||||
}
|
||||
$ret = $this->objects[$index];
|
||||
array_splice( $this->objects, $index, 1 );
|
||||
|
|
@ -111,88 +116,101 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
|
|||
$this->offset = 0;
|
||||
}
|
||||
|
||||
// \Countable interface
|
||||
// Countable interface
|
||||
|
||||
public function count() {
|
||||
/** @inheritDoc */
|
||||
public function count(): int {
|
||||
return count( $this->objects );
|
||||
}
|
||||
|
||||
// \SeekableIterator interface
|
||||
// SeekableIterator interface
|
||||
|
||||
public function seek( $offset ) {
|
||||
/** @inheritDoc */
|
||||
public function seek( int $offset ): void {
|
||||
if ( $offset < 0 || $offset >= count( $this->objects ) ) {
|
||||
throw new \OutOfBoundsException( 'Offset is out of range.' );
|
||||
throw new OutOfBoundsException( 'Offset is out of range.' );
|
||||
}
|
||||
$this->offset = $offset;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
#[\ReturnTypeWillChange]
|
||||
public function current() {
|
||||
return isset( $this->objects[$this->offset] ) ? $this->objects[$this->offset] : null;
|
||||
return $this->objects[$this->offset] ?? null;
|
||||
}
|
||||
|
||||
public function key() {
|
||||
/** @inheritDoc */
|
||||
public function key(): int {
|
||||
return $this->offset;
|
||||
}
|
||||
|
||||
public function next() {
|
||||
/** @inheritDoc */
|
||||
public function next(): void {
|
||||
$this->offset++;
|
||||
}
|
||||
|
||||
public function rewind() {
|
||||
/** @inheritDoc */
|
||||
public function rewind(): void {
|
||||
$this->offset = 0;
|
||||
}
|
||||
|
||||
public function valid() {
|
||||
/** @inheritDoc */
|
||||
public function valid(): bool {
|
||||
return isset( $this->objects[$this->offset] );
|
||||
}
|
||||
|
||||
// \ArrayAccess interface
|
||||
// ArrayAccess interface
|
||||
|
||||
public function offsetExists( $offset ) {
|
||||
/** @inheritDoc */
|
||||
public function offsetExists( $offset ): bool {
|
||||
return isset( $this->objects[$offset] );
|
||||
}
|
||||
|
||||
public function offsetGet( $offset ) {
|
||||
/** @inheritDoc */
|
||||
public function offsetGet( $offset ): CSSObject {
|
||||
if ( !is_numeric( $offset ) || (float)(int)$offset !== (float)$offset ) {
|
||||
throw new \InvalidArgumentException( 'Offset must be an integer.' );
|
||||
throw new InvalidArgumentException( 'Offset must be an integer.' );
|
||||
}
|
||||
if ( $offset < 0 || $offset > count( $this->objects ) ) {
|
||||
throw new \OutOfBoundsException( 'Offset is out of range.' );
|
||||
throw new OutOfBoundsException( 'Offset is out of range.' );
|
||||
}
|
||||
return $this->objects[$offset];
|
||||
}
|
||||
|
||||
public function offsetSet( $offset, $value ) {
|
||||
/** @inheritDoc */
|
||||
public function offsetSet( $offset, $value ): void {
|
||||
if ( !$value instanceof static::$objectType ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
static::class . ' may only contain instances of ' . static::$objectType . '.'
|
||||
);
|
||||
}
|
||||
static::testObjects( [ $value ] );
|
||||
if ( !is_numeric( $offset ) || (float)(int)$offset !== (float)$offset ) {
|
||||
throw new \InvalidArgumentException( 'Offset must be an integer.' );
|
||||
throw new InvalidArgumentException( 'Offset must be an integer.' );
|
||||
}
|
||||
if ( $offset < 0 || $offset > count( $this->objects ) ) {
|
||||
throw new \OutOfBoundsException( 'Offset is out of range.' );
|
||||
throw new OutOfBoundsException( 'Offset is out of range.' );
|
||||
}
|
||||
$this->objects[$offset] = $value;
|
||||
}
|
||||
|
||||
public function offsetUnset( $offset ) {
|
||||
/** @inheritDoc */
|
||||
public function offsetUnset( $offset ): void {
|
||||
if ( isset( $this->objects[$offset] ) && $offset !== count( $this->objects ) - 1 ) {
|
||||
throw new \OutOfBoundsException( 'Cannot leave holes in the list.' );
|
||||
throw new OutOfBoundsException( 'Cannot leave holes in the list.' );
|
||||
}
|
||||
unset( $this->objects[$offset] );
|
||||
}
|
||||
|
||||
// CSSObject interface
|
||||
|
||||
/** @inheritDoc */
|
||||
public function getPosition() {
|
||||
$ret = null;
|
||||
foreach ( $this->objects as $obj ) {
|
||||
$pos = $obj->getPosition();
|
||||
if ( $pos[0] >= 0 && (
|
||||
!$ret || $pos[0] < $ret[0] || $pos[0] === $ret[0] && $pos[1] < $ret[1]
|
||||
!$ret || $pos[0] < $ret[0] || ( $pos[0] === $ret[0] && $pos[1] < $ret[1] )
|
||||
) ) {
|
||||
$ret = $pos;
|
||||
}
|
||||
|
|
@ -212,27 +230,31 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
|
|||
|
||||
/**
|
||||
* @param string $function Function to call, toTokenArray() or toComponentValueArray()
|
||||
* @return Token[]|ComponentValue[]
|
||||
*/
|
||||
private function toTokenOrCVArray( $function ) {
|
||||
$ret = [];
|
||||
$l = count( $this->objects );
|
||||
for ( $i = 0; $i < $l; $i++ ) {
|
||||
// Manually looping and appending turns out to be noticably faster than array_merge.
|
||||
foreach ( $this->objects[$i]->$function() as $v ) {
|
||||
foreach ( $this->objects as $i => $iValue ) {
|
||||
// Manually looping and appending turns out to be noticeably faster than array_merge.
|
||||
foreach ( $iValue->$function() as $v ) {
|
||||
$ret[] = $v;
|
||||
}
|
||||
$sep = $this->getSeparator( $this->objects[$i], $i + 1 < $l ? $this->objects[$i + 1] : null );
|
||||
$sep = $this->getSeparator( $iValue, $i + 1 < $l ? $this->objects[$i + 1] : null );
|
||||
foreach ( $sep as $v ) {
|
||||
$ret[] = $v;
|
||||
}
|
||||
}
|
||||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toTokenArray() {
|
||||
return $this->toTokenOrCVArray( __FUNCTION__ );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toComponentValueArray() {
|
||||
return $this->toTokenOrCVArray( __FUNCTION__ );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,8 +11,11 @@ namespace Wikimedia\CSS\Objects;
|
|||
*/
|
||||
abstract class ComponentValue implements CSSObject {
|
||||
|
||||
/** @var int Line and position in the input where this component value starts */
|
||||
protected $line = -1, $pos = -1;
|
||||
/** @var int Line in the input where this component value starts */
|
||||
protected $line = -1;
|
||||
|
||||
/** @var int Position in the input where this component value starts */
|
||||
protected $pos = -1;
|
||||
|
||||
/**
|
||||
* Get the position of this ComponentValue in the input stream
|
||||
|
|
@ -22,6 +25,7 @@ abstract class ComponentValue implements CSSObject {
|
|||
return [ $this->line, $this->pos ];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toComponentValueArray() {
|
||||
return [ $this ];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,12 +6,18 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use InvalidArgumentException;
|
||||
|
||||
/**
|
||||
* Represent a list of CSS declarations
|
||||
*/
|
||||
class ComponentValueList extends CSSObjectList {
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected static $objectType = ComponentValue::class;
|
||||
|
||||
/** @inheritDoc */
|
||||
protected static function testObjects( array $objects ) {
|
||||
foreach ( $objects as $object ) {
|
||||
$type = $object instanceof Token ? $object->type() : 'n/a';
|
||||
|
|
@ -20,15 +26,16 @@ class ComponentValueList extends CSSObjectList {
|
|||
case Token::T_LEFT_BRACKET:
|
||||
case Token::T_LEFT_PAREN:
|
||||
case Token::T_LEFT_BRACE:
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
static::class . " may not contain tokens of type \"$type\"."
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Much simpler
|
||||
/** @inheritDoc */
|
||||
public function toComponentValueArray() {
|
||||
// Much simpler
|
||||
return $this->objects;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
|
|
@ -13,8 +14,11 @@ use Wikimedia\CSS\Util;
|
|||
*/
|
||||
class Declaration implements DeclarationOrAtRule {
|
||||
|
||||
/** @var int Line and position in the input where this declaration starts */
|
||||
protected $line = -1, $pos = -1;
|
||||
/** @var int Line in the input where this declaration starts */
|
||||
protected $line = -1;
|
||||
|
||||
/** @var int Position in the input where this declaration starts */
|
||||
protected $pos = -1;
|
||||
|
||||
/** @var string */
|
||||
protected $name;
|
||||
|
|
@ -30,18 +34,18 @@ class Declaration implements DeclarationOrAtRule {
|
|||
*/
|
||||
public function __construct( Token $token ) {
|
||||
if ( $token->type() !== Token::T_IDENT ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
"Declaration must begin with an ident token, got {$token->type()}"
|
||||
);
|
||||
}
|
||||
|
||||
list( $this->line, $this->pos ) = $token->getPosition();
|
||||
[ $this->line, $this->pos ] = $token->getPosition();
|
||||
$this->name = $token->value();
|
||||
$this->value = new ComponentValueList();
|
||||
}
|
||||
|
||||
public function __clone() {
|
||||
$this->value = clone( $this->value );
|
||||
$this->value = clone $this->value;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -86,6 +90,7 @@ class Declaration implements DeclarationOrAtRule {
|
|||
|
||||
/**
|
||||
* @param string $function Function to call, toTokenArray() or toComponentValueArray()
|
||||
* @return Token[]|ComponentValue[]
|
||||
*/
|
||||
private function toTokenOrCVArray( $function ) {
|
||||
$ret = [];
|
||||
|
|
@ -95,7 +100,7 @@ class Declaration implements DeclarationOrAtRule {
|
|||
[ 'value' => $this->name, 'position' => [ $this->line, $this->pos ] ]
|
||||
);
|
||||
$ret[] = $v = new Token( Token::T_COLON );
|
||||
// Manually looping and appending turns out to be noticably faster than array_merge.
|
||||
// Manually looping and appending turns out to be noticeably faster than array_merge.
|
||||
foreach ( $this->value->$function() as $v ) {
|
||||
$ret[] = $v;
|
||||
}
|
||||
|
|
@ -109,10 +114,12 @@ class Declaration implements DeclarationOrAtRule {
|
|||
return $ret;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toTokenArray() {
|
||||
return $this->toTokenOrCVArray( __FUNCTION__ );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toComponentValueArray() {
|
||||
return $this->toTokenOrCVArray( __FUNCTION__ );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,16 +10,20 @@ namespace Wikimedia\CSS\Objects;
|
|||
* Represent a list of declarations
|
||||
*/
|
||||
class DeclarationList extends CSSObjectList {
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected static $objectType = Declaration::class;
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function getSeparator( CSSObject $left, CSSObject $right = null ) {
|
||||
if ( $right ) {
|
||||
return [
|
||||
new Token( Token::T_SEMICOLON ),
|
||||
new Token( Token::T_WHITESPACE, [ 'significant' => false ] ),
|
||||
];
|
||||
} else {
|
||||
return [ new Token( Token::T_SEMICOLON, [ 'significant' => false ] ) ];
|
||||
}
|
||||
|
||||
return [ new Token( Token::T_SEMICOLON, [ 'significant' => false ] ) ];
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,8 +10,12 @@ namespace Wikimedia\CSS\Objects;
|
|||
* Represent a list of CSS declarations and at-rules
|
||||
*/
|
||||
class DeclarationOrAtRuleList extends CSSObjectList {
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected static $objectType = DeclarationOrAtRule::class;
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function getSeparator( CSSObject $left, CSSObject $right = null ) {
|
||||
$ret = [];
|
||||
if ( $left instanceof Declaration ) {
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
|
|
@ -19,6 +20,7 @@ class QualifiedRule extends Rule {
|
|||
/** @var SimpleBlock */
|
||||
protected $block;
|
||||
|
||||
/** @inheritDoc */
|
||||
public function __construct( Token $token = null ) {
|
||||
parent::__construct( $token ?: new Token( Token::T_EOF ) );
|
||||
$this->prelude = new ComponentValueList();
|
||||
|
|
@ -26,8 +28,8 @@ class QualifiedRule extends Rule {
|
|||
}
|
||||
|
||||
public function __clone() {
|
||||
$this->prelude = clone( $this->prelude );
|
||||
$this->block = clone( $this->block );
|
||||
$this->prelude = clone $this->prelude;
|
||||
$this->block = clone $this->block;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -48,22 +50,23 @@ class QualifiedRule extends Rule {
|
|||
|
||||
/**
|
||||
* Set the block
|
||||
* @param SimpleBlock $block
|
||||
* @param SimpleBlock|null $block
|
||||
*/
|
||||
public function setBlock( SimpleBlock $block = null ) {
|
||||
if ( $block->getStartTokenType() !== Token::T_LEFT_BRACE ) {
|
||||
throw new \InvalidArgumentException( 'Qualified rule block must be delimited by {}' );
|
||||
throw new InvalidArgumentException( 'Qualified rule block must be delimited by {}' );
|
||||
}
|
||||
$this->block = $block;
|
||||
}
|
||||
|
||||
/**
|
||||
* @param string $function Function to call, toTokenArray() or toComponentValueArray()
|
||||
* @return Token[]|ComponentValue[]
|
||||
*/
|
||||
private function toTokenOrCVArray( $function ) {
|
||||
$ret = [];
|
||||
|
||||
// Manually looping and appending turns out to be noticably faster than array_merge.
|
||||
// Manually looping and appending turns out to be noticeably faster than array_merge.
|
||||
foreach ( $this->prelude->$function() as $v ) {
|
||||
$ret[] = $v;
|
||||
}
|
||||
|
|
@ -73,10 +76,12 @@ class QualifiedRule extends Rule {
|
|||
return $ret;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toTokenArray() {
|
||||
return $this->toTokenOrCVArray( __FUNCTION__ );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toComponentValueArray() {
|
||||
return $this->toTokenOrCVArray( __FUNCTION__ );
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,21 +6,22 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
* Represent an abstract CSS rule
|
||||
*/
|
||||
abstract class Rule implements CSSObject {
|
||||
|
||||
/** @var int Line and position in the input where this rule starts */
|
||||
protected $line = -1, $pos = -1;
|
||||
/** @var int Line in the input where this rule starts */
|
||||
protected $line = -1;
|
||||
|
||||
/** @var int Position in the input where this rule starts */
|
||||
protected $pos = -1;
|
||||
|
||||
/**
|
||||
* @param Token $token Token starting the rule
|
||||
*/
|
||||
public function __construct( Token $token ) {
|
||||
list( $this->line, $this->pos ) = $token->getPosition();
|
||||
[ $this->line, $this->pos ] = $token->getPosition();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
|||
|
|
@ -10,8 +10,12 @@ namespace Wikimedia\CSS\Objects;
|
|||
* Represent a list of CSS rules
|
||||
*/
|
||||
class RuleList extends CSSObjectList {
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected static $objectType = Rule::class;
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function getSeparator( CSSObject $left, CSSObject $right = null ) {
|
||||
return $right ? [ new Token( Token::T_WHITESPACE, [ 'significant' => false ] ) ] : [];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
|
|
@ -14,7 +15,10 @@ use Wikimedia\CSS\Util;
|
|||
class SimpleBlock extends ComponentValue {
|
||||
|
||||
/** @var string */
|
||||
protected $startTokenType, $endTokenType;
|
||||
protected $startTokenType;
|
||||
|
||||
/** @var string */
|
||||
protected $endTokenType;
|
||||
|
||||
/** @var ComponentValueList */
|
||||
protected $value;
|
||||
|
|
@ -25,18 +29,18 @@ class SimpleBlock extends ComponentValue {
|
|||
public function __construct( Token $token ) {
|
||||
$this->endTokenType = static::matchingDelimiter( $token->type() );
|
||||
if ( $this->endTokenType === null ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
'A SimpleBlock is delimited by either {}, [], or ().'
|
||||
);
|
||||
}
|
||||
|
||||
list( $this->line, $this->pos ) = $token->getPosition();
|
||||
[ $this->line, $this->pos ] = $token->getPosition();
|
||||
$this->startTokenType = $token->type();
|
||||
$this->value = new ComponentValueList();
|
||||
}
|
||||
|
||||
public function __clone() {
|
||||
$this->value = clone( $this->value );
|
||||
$this->value = clone $this->value;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -51,7 +55,7 @@ class SimpleBlock extends ComponentValue {
|
|||
|
||||
/**
|
||||
* Return the ending delimiter for a starting delimiter
|
||||
* @param string Token::T_* constant
|
||||
* @param string $delim Token::T_* constant
|
||||
* @return string|null Matching Token::T_* constant, if any
|
||||
*/
|
||||
public static function matchingDelimiter( $delim ) {
|
||||
|
|
@ -91,12 +95,13 @@ class SimpleBlock extends ComponentValue {
|
|||
return $this->value;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toTokenArray() {
|
||||
$ret = [
|
||||
new Token( $this->startTokenType, [ 'position' => [ $this->line, $this->pos ] ] ),
|
||||
];
|
||||
|
||||
// Manually looping and appending turns out to be noticably faster than array_merge.
|
||||
// Manually looping and appending turns out to be noticeably faster than array_merge.
|
||||
$tokens = $this->value->toTokenArray();
|
||||
if ( $tokens && $this->startTokenType === Token::T_LEFT_BRACE ) {
|
||||
if ( $tokens[0]->type() !== Token::T_WHITESPACE ) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use Wikimedia\CSS\Util;
|
||||
use Wikimedia\CSS\Sanitizer\Sanitizer;
|
||||
|
||||
/**
|
||||
* Represent a stylesheet
|
||||
|
|
@ -23,14 +22,14 @@ class Stylesheet implements CSSObject {
|
|||
protected $ruleList;
|
||||
|
||||
/**
|
||||
* @param RuleList $rules
|
||||
* @param RuleList|null $rules
|
||||
*/
|
||||
public function __construct( RuleList $rules = null ) {
|
||||
$this->ruleList = $rules ?: new RuleList();
|
||||
}
|
||||
|
||||
public function __clone() {
|
||||
$this->ruleList = clone( $this->ruleList );
|
||||
$this->ruleList = clone $this->ruleList;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -40,15 +39,18 @@ class Stylesheet implements CSSObject {
|
|||
return $this->ruleList;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function getPosition() {
|
||||
// Stylesheets don't really have a position
|
||||
return [ 0, 0 ];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toTokenArray() {
|
||||
return $this->ruleList->toTokenArray();
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toComponentValueArray() {
|
||||
return $this->ruleList->toComponentValueArray();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,42 +6,38 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use UnexpectedValueException;
|
||||
|
||||
/**
|
||||
* Represent a CSS token
|
||||
*/
|
||||
class Token extends ComponentValue {
|
||||
const T_IDENT = "ident";
|
||||
const T_FUNCTION = "function";
|
||||
const T_AT_KEYWORD = "at-keyword";
|
||||
const T_HASH = "hash";
|
||||
const T_STRING = "string";
|
||||
const T_BAD_STRING = "bad-string";
|
||||
const T_URL = "url";
|
||||
const T_BAD_URL = "bad-url";
|
||||
const T_DELIM = "delim";
|
||||
const T_NUMBER = "number";
|
||||
const T_PERCENTAGE = "percentage";
|
||||
const T_DIMENSION = "dimension";
|
||||
const T_UNICODE_RANGE = "unicode-range";
|
||||
const T_INCLUDE_MATCH = "include-match";
|
||||
const T_DASH_MATCH = "dash-match";
|
||||
const T_PREFIX_MATCH = "prefix-match";
|
||||
const T_SUFFIX_MATCH = "suffix-match";
|
||||
const T_SUBSTRING_MATCH = "substring-match";
|
||||
const T_COLUMN = "column";
|
||||
const T_WHITESPACE = "whitespace";
|
||||
const T_CDO = "CDO";
|
||||
const T_CDC = "CDC";
|
||||
const T_COLON = "colon";
|
||||
const T_SEMICOLON = "semicolon";
|
||||
const T_COMMA = "comma";
|
||||
const T_LEFT_BRACKET = "[";
|
||||
const T_RIGHT_BRACKET = "]";
|
||||
const T_LEFT_PAREN = "(";
|
||||
const T_RIGHT_PAREN = ")";
|
||||
const T_LEFT_BRACE = "{";
|
||||
const T_RIGHT_BRACE = "}";
|
||||
const T_EOF = "EOF";
|
||||
public const T_IDENT = "ident";
|
||||
public const T_FUNCTION = "function";
|
||||
public const T_AT_KEYWORD = "at-keyword";
|
||||
public const T_HASH = "hash";
|
||||
public const T_STRING = "string";
|
||||
public const T_BAD_STRING = "bad-string";
|
||||
public const T_URL = "url";
|
||||
public const T_BAD_URL = "bad-url";
|
||||
public const T_DELIM = "delim";
|
||||
public const T_NUMBER = "number";
|
||||
public const T_PERCENTAGE = "percentage";
|
||||
public const T_DIMENSION = "dimension";
|
||||
public const T_WHITESPACE = "whitespace";
|
||||
public const T_CDO = "CDO";
|
||||
public const T_CDC = "CDC";
|
||||
public const T_COLON = "colon";
|
||||
public const T_SEMICOLON = "semicolon";
|
||||
public const T_COMMA = "comma";
|
||||
public const T_LEFT_BRACKET = "[";
|
||||
public const T_RIGHT_BRACKET = "]";
|
||||
public const T_LEFT_PAREN = "(";
|
||||
public const T_RIGHT_PAREN = ")";
|
||||
public const T_LEFT_BRACE = "{";
|
||||
public const T_RIGHT_BRACE = "}";
|
||||
public const T_EOF = "EOF";
|
||||
|
||||
/** @var string One of the T_* constants */
|
||||
protected $type;
|
||||
|
|
@ -58,12 +54,12 @@ class Token extends ComponentValue {
|
|||
/** @var string Unit for dimension tokens */
|
||||
protected $unit = '';
|
||||
|
||||
/** @var int Start and end for unicode-range tokens */
|
||||
protected $start = 0, $end = 0;
|
||||
|
||||
/** @var bool Whether this token is considered "significant" */
|
||||
protected $significant = true;
|
||||
|
||||
/** @var int See ::urangeHack() */
|
||||
private $urangeHack = 0;
|
||||
|
||||
/**
|
||||
* @param string $type One of the T_* constants
|
||||
* @param string|array $value Value of the token, or an array with the
|
||||
|
|
@ -78,8 +74,6 @@ class Token extends ComponentValue {
|
|||
* - representation: (string) String representation of the value for
|
||||
* T_NUMBER, T_PERCENTAGE, and T_DIMENSION.
|
||||
* - unit: (string) Unit for T_DIMENSION.
|
||||
* - start: (int) Start code point for T_UNICODE_RANGE.
|
||||
* - end: (int) End code point for T_UNICODE_RANGE.
|
||||
* - significant: (bool) Whether the token is considered "significant"
|
||||
*/
|
||||
public function __construct( $type, $value = [] ) {
|
||||
|
|
@ -89,11 +83,11 @@ class Token extends ComponentValue {
|
|||
|
||||
if ( isset( $value['position'] ) ) {
|
||||
if ( !is_array( $value['position'] ) || count( $value['position'] ) !== 2 ) {
|
||||
throw new \InvalidArgumentException( 'Position must be an array of two integers' );
|
||||
throw new InvalidArgumentException( 'Position must be an array of two integers' );
|
||||
}
|
||||
list( $this->line, $this->pos ) = $value['position'];
|
||||
[ $this->line, $this->pos ] = $value['position'];
|
||||
if ( !is_int( $this->line ) || !is_int( $this->pos ) ) {
|
||||
throw new \InvalidArgumentException( 'Position must be an array of two integers' );
|
||||
throw new InvalidArgumentException( 'Position must be an array of two integers' );
|
||||
}
|
||||
}
|
||||
if ( isset( $value['significant'] ) ) {
|
||||
|
|
@ -108,20 +102,20 @@ class Token extends ComponentValue {
|
|||
case self::T_STRING:
|
||||
case self::T_URL:
|
||||
if ( !isset( $value['value'] ) ) {
|
||||
throw new \InvalidArgumentException( "Token type $this->type requires a value" );
|
||||
throw new InvalidArgumentException( "Token type $this->type requires a value" );
|
||||
}
|
||||
$this->value = (string)$value['value'];
|
||||
break;
|
||||
|
||||
case self::T_HASH:
|
||||
if ( !isset( $value['value'] ) ) {
|
||||
throw new \InvalidArgumentException( "Token type $this->type requires a value" );
|
||||
throw new InvalidArgumentException( "Token type $this->type requires a value" );
|
||||
}
|
||||
if ( !isset( $value['typeFlag'] ) ) {
|
||||
throw new \InvalidArgumentException( "Token type $this->type requires a typeFlag" );
|
||||
throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" );
|
||||
}
|
||||
if ( !in_array( $value['typeFlag'], [ 'id', 'unrestricted' ], true ) ) {
|
||||
throw new \InvalidArgumentException( "Invalid type flag for Token type $this->type" );
|
||||
throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
|
||||
}
|
||||
$this->value = (string)$value['value'];
|
||||
$this->typeFlag = $value['typeFlag'];
|
||||
|
|
@ -129,11 +123,11 @@ class Token extends ComponentValue {
|
|||
|
||||
case self::T_DELIM:
|
||||
if ( !isset( $value['value'] ) ) {
|
||||
throw new \InvalidArgumentException( "Token type $this->type requires a value" );
|
||||
throw new InvalidArgumentException( "Token type $this->type requires a value" );
|
||||
}
|
||||
$this->value = (string)$value['value'];
|
||||
if ( mb_strlen( $this->value, 'UTF-8' ) !== 1 ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
"Value for Token type $this->type must be a single character"
|
||||
);
|
||||
}
|
||||
|
|
@ -145,32 +139,32 @@ class Token extends ComponentValue {
|
|||
if ( !isset( $value['value'] ) ||
|
||||
!is_numeric( $value['value'] ) || !is_finite( $value['value'] )
|
||||
) {
|
||||
throw new \InvalidArgumentException( "Token type $this->type requires a numeric value" );
|
||||
throw new InvalidArgumentException( "Token type $this->type requires a numeric value" );
|
||||
}
|
||||
if ( !isset( $value['typeFlag'] ) ) {
|
||||
throw new \InvalidArgumentException( "Token type $this->type requires a typeFlag" );
|
||||
throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" );
|
||||
}
|
||||
$this->typeFlag = $value['typeFlag'];
|
||||
if ( $this->typeFlag === 'integer' ) {
|
||||
$this->value = (int)$value['value'];
|
||||
if ( (float)$this->value !== (float)$value['value'] ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
"typeFlag is 'integer', but value supplied is not an integer"
|
||||
);
|
||||
}
|
||||
} elseif ( $this->typeFlag === 'number' ) {
|
||||
$this->value = (float)$value['value'];
|
||||
} else {
|
||||
throw new \InvalidArgumentException( "Invalid type flag for Token type $this->type" );
|
||||
throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
|
||||
}
|
||||
|
||||
if ( isset( $value['representation'] ) ) {
|
||||
if ( !is_numeric( $value['representation'] ) ) {
|
||||
throw new \InvalidArgumentException( 'Representation must be numeric' );
|
||||
throw new InvalidArgumentException( 'Representation must be numeric' );
|
||||
}
|
||||
$this->representation = $value['representation'];
|
||||
if ( (float)$this->representation !== (float)$this->value ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
"Representation \"$this->representation\" does not match value \"$this->value\""
|
||||
);
|
||||
}
|
||||
|
|
@ -178,36 +172,14 @@ class Token extends ComponentValue {
|
|||
|
||||
if ( $type === self::T_DIMENSION ) {
|
||||
if ( !isset( $value['unit'] ) ) {
|
||||
throw new \InvalidArgumentException( "Token type $this->type requires a unit" );
|
||||
throw new InvalidArgumentException( "Token type $this->type requires a unit" );
|
||||
}
|
||||
$this->unit = $value['unit'];
|
||||
}
|
||||
break;
|
||||
|
||||
case self::T_UNICODE_RANGE:
|
||||
if ( !isset( $value['start'] ) || !is_int( $value['start'] ) ) {
|
||||
throw new \InvalidArgumentException(
|
||||
"Token type $this->type requires a starting code point as an integer"
|
||||
);
|
||||
}
|
||||
$this->start = $value['start'];
|
||||
if ( !isset( $value['end'] ) ) {
|
||||
$this->end = $this->start;
|
||||
} elseif ( !is_int( $value['end'] ) ) {
|
||||
throw new \InvalidArgumentException( 'Ending code point must be an integer' );
|
||||
} else {
|
||||
$this->end = $value['end'];
|
||||
}
|
||||
break;
|
||||
|
||||
case self::T_BAD_STRING:
|
||||
case self::T_BAD_URL:
|
||||
case self::T_INCLUDE_MATCH:
|
||||
case self::T_DASH_MATCH:
|
||||
case self::T_PREFIX_MATCH:
|
||||
case self::T_SUFFIX_MATCH:
|
||||
case self::T_SUBSTRING_MATCH:
|
||||
case self::T_COLUMN:
|
||||
case self::T_WHITESPACE:
|
||||
case self::T_CDO:
|
||||
case self::T_CDC:
|
||||
|
|
@ -228,13 +200,13 @@ class Token extends ComponentValue {
|
|||
if ( isset( $value['typeFlag'] ) && $value['typeFlag'] !== '' ) {
|
||||
$this->typeFlag = $value['typeFlag'];
|
||||
if ( $this->typeFlag !== 'recursion-depth-exceeded' ) {
|
||||
throw new \InvalidArgumentException( "Invalid type flag for Token type $this->type" );
|
||||
throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
throw new \InvalidArgumentException( "Unknown token type \"$this->type\"." );
|
||||
throw new InvalidArgumentException( "Unknown token type \"$this->type\"." );
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -278,14 +250,6 @@ class Token extends ComponentValue {
|
|||
return $this->unit;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get the unicode range for this T_UNICODE_RANGE token
|
||||
* @return array [ int $start, int $end ]
|
||||
*/
|
||||
public function range() {
|
||||
return [ $this->start, $this->end ];
|
||||
}
|
||||
|
||||
/**
|
||||
* Whether this token is considered "significant"
|
||||
*
|
||||
|
|
@ -309,22 +273,24 @@ class Token extends ComponentValue {
|
|||
if ( $significant === $this->significant ) {
|
||||
return $this;
|
||||
}
|
||||
$ret = clone( $this );
|
||||
$ret = clone $this;
|
||||
$ret->significant = $significant;
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toTokenArray() {
|
||||
return [ $this ];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toComponentValueArray() {
|
||||
switch ( $this->type ) {
|
||||
case self::T_FUNCTION:
|
||||
case self::T_LEFT_BRACKET:
|
||||
case self::T_LEFT_PAREN:
|
||||
case self::T_LEFT_BRACE:
|
||||
throw new \UnexpectedValueException(
|
||||
throw new UnexpectedValueException(
|
||||
"Token type \"$this->type\" is not valid in a ComponentValueList."
|
||||
);
|
||||
|
||||
|
|
@ -341,20 +307,54 @@ class Token extends ComponentValue {
|
|||
private static function escapeIdent( $s ) {
|
||||
return preg_replace_callback(
|
||||
'/
|
||||
[^a-zA-Z0-9_\-\x{80}-\x{10ffff}] # Characters that are never allowed
|
||||
| (?:^|(?<=^-))[0-9] # Digits are not allowed at the start of an identifier
|
||||
| (?<=^-)- # Two dashes are not allowed at the start of an identifier
|
||||
[^a-zA-Z0-9_\-\x{80}-\x{10ffff}] # Characters that are never allowed
|
||||
| (?:^|(?<=^-))[0-9] # Digits are not allowed at the start of an identifier
|
||||
| [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}] # To be safe, control characters and whitespace
|
||||
/ux',
|
||||
function ( $m ) {
|
||||
if ( $m[0] === "\n" || ctype_xdigit( $m[0] ) ) {
|
||||
return sprintf( '\\%x ', ord( $m[0] ) );
|
||||
}
|
||||
return '\\' . $m[0];
|
||||
},
|
||||
[ __CLASS__, 'escapePregCallback' ],
|
||||
$s
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Escape characters in a string
|
||||
*
|
||||
* - Double quote needs escaping as the string delimiter.
|
||||
* - Backslash needs escaping since it's the escape character.
|
||||
* - Newline (\n) isn't valid in a string, and so needs escaping.
|
||||
* - Carriage return (\r), form feed (\f), and U+0000 would be changed by
|
||||
* CSS's input conversion rules, and so need escaping.
|
||||
* - Other non-space whitespace and controls don't need escaping, but it's
|
||||
* safer to do so.
|
||||
* - Angle brackets are escaped numerically to make it safer to embed in HTML.
|
||||
*
|
||||
* @param string $s
|
||||
* @return string
|
||||
*/
|
||||
private static function escapeString( $s ) {
|
||||
return preg_replace_callback(
|
||||
'/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}"\x5c<>]/u',
|
||||
[ __CLASS__, 'escapePregCallback' ],
|
||||
$s
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback for escaping functions
|
||||
* @param array $m Matches
|
||||
* @return string
|
||||
*/
|
||||
private static function escapePregCallback( $m ) {
|
||||
// Newlines, carriage returns, form feeds, and hex digits have to be
|
||||
// escaped numerically. Other non-space whitespace and controls don't
|
||||
// have to be, but it's saner to do so. Angle brackets are escaped
|
||||
// numerically too to make it safer to embed in HTML.
|
||||
if ( preg_match( '/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}0-9a-fA-F<>]/u', $m[0] ) ) {
|
||||
return sprintf( '\\%x ', mb_ord( $m[0] ) );
|
||||
}
|
||||
return '\\' . $m[0];
|
||||
}
|
||||
|
||||
public function __toString() {
|
||||
switch ( $this->type ) {
|
||||
case self::T_IDENT:
|
||||
|
|
@ -369,29 +369,26 @@ class Token extends ComponentValue {
|
|||
case self::T_HASH:
|
||||
if ( $this->typeFlag === 'id' ) {
|
||||
return '#' . self::escapeIdent( $this->value );
|
||||
} else {
|
||||
return '#' . preg_replace_callback( '/[^a-zA-Z0-9_\-\x{80}-\x{10ffff}]/u', function ( $m ) {
|
||||
return $m[0] === "\n" ? '\\a ' : '\\' . $m[0];
|
||||
}, $this->value );
|
||||
}
|
||||
|
||||
return '#' . preg_replace_callback(
|
||||
'/
|
||||
[^a-zA-Z0-9_\-\x{80}-\x{10ffff}] # Characters that are never allowed
|
||||
| [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}] # To be safe, control characters and whitespace
|
||||
/ux',
|
||||
[ __CLASS__, 'escapePregCallback' ],
|
||||
$this->value
|
||||
);
|
||||
|
||||
case self::T_STRING:
|
||||
// We could try to decide whether single or double quote is
|
||||
// better, but it doesn't seem worth the effort.
|
||||
return '"' . strtr( $this->value, [
|
||||
'"' => '\\"',
|
||||
'\\' => '\\\\',
|
||||
"\n" => '\\a ',
|
||||
] ) . '"';
|
||||
return '"' . self::escapeString( $this->value ) . '"';
|
||||
|
||||
case self::T_URL:
|
||||
// We could try to decide whether single or double quote is
|
||||
// better, but it doesn't seem worth the effort.
|
||||
return 'url("' . strtr( $this->value, [
|
||||
'"' => '\\"',
|
||||
'\\' => '\\\\',
|
||||
"\n" => '\\a ',
|
||||
] ) . '")';
|
||||
return 'url("' . self::escapeString( $this->value ) . '")';
|
||||
|
||||
case self::T_BAD_STRING:
|
||||
// It's supposed to round trip, so...
|
||||
|
|
@ -435,41 +432,6 @@ class Token extends ComponentValue {
|
|||
|
||||
return $number . $unit;
|
||||
|
||||
case self::T_UNICODE_RANGE:
|
||||
if ( $this->start === 0 && $this->end === 0xffffff ) {
|
||||
return 'U+??????';
|
||||
}
|
||||
$fmt = 'U+%x';
|
||||
for ( $b = 0; $b < 24; $b += 4, $fmt .= '?' ) {
|
||||
$mask = ( 1 << $b ) - 1;
|
||||
if (
|
||||
( $this->start & $mask ) === 0 &&
|
||||
( $this->end & $mask ) === $mask &&
|
||||
( $this->start & ~$mask ) === ( $this->end & ~$mask )
|
||||
) {
|
||||
return sprintf( $fmt, $this->start >> $b );
|
||||
}
|
||||
}
|
||||
return sprintf( 'U+%x-%x', $this->start, $this->end );
|
||||
|
||||
case self::T_INCLUDE_MATCH:
|
||||
return '~=';
|
||||
|
||||
case self::T_DASH_MATCH:
|
||||
return '|=';
|
||||
|
||||
case self::T_PREFIX_MATCH:
|
||||
return '^=';
|
||||
|
||||
case self::T_SUFFIX_MATCH:
|
||||
return '$=';
|
||||
|
||||
case self::T_SUBSTRING_MATCH:
|
||||
return '*=';
|
||||
|
||||
case self::T_COLUMN:
|
||||
return '||';
|
||||
|
||||
case self::T_WHITESPACE:
|
||||
return ' ';
|
||||
|
||||
|
|
@ -500,13 +462,13 @@ class Token extends ComponentValue {
|
|||
return '';
|
||||
|
||||
default:
|
||||
throw new \UnexpectedValueException( "Unknown token type \"$this->type\"." );
|
||||
throw new UnexpectedValueException( "Unknown token type \"$this->type\"." );
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate whether the two tokens need to be separated
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#serialization
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#serialization
|
||||
* @param Token $firstToken
|
||||
* @param Token $secondToken
|
||||
* @return bool
|
||||
|
|
@ -516,53 +478,69 @@ class Token extends ComponentValue {
|
|||
static $sepTable = [
|
||||
self::T_IDENT => [
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE, self::T_CDC, self::T_LEFT_PAREN
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC, self::T_LEFT_PAREN,
|
||||
// Internet Explorer is buggy in some contexts (T191134)
|
||||
self::T_HASH,
|
||||
],
|
||||
self::T_AT_KEYWORD => [
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE, self::T_CDC
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
|
||||
],
|
||||
self::T_HASH => [
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE, self::T_CDC
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
|
||||
// Internet Explorer is buggy in some contexts (T191134)
|
||||
self::T_HASH,
|
||||
],
|
||||
self::T_DIMENSION => [
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE, self::T_CDC
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
|
||||
// Internet Explorer is buggy in some contexts (T191134)
|
||||
self::T_HASH,
|
||||
],
|
||||
'#' => [
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE
|
||||
self::T_PERCENTAGE, self::T_DIMENSION,
|
||||
],
|
||||
'-' => [
|
||||
// Add '-' here from Editor's Draft, to go with the draft's
|
||||
// adding of tokens beginning with "--" that we also picked up.
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE
|
||||
self::T_PERCENTAGE, self::T_DIMENSION,
|
||||
],
|
||||
self::T_NUMBER => [
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, self::T_NUMBER,
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE
|
||||
self::T_PERCENTAGE, self::T_DIMENSION, '%',
|
||||
// Internet Explorer is buggy in some contexts
|
||||
self::T_HASH,
|
||||
],
|
||||
'@' => [
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_UNICODE_RANGE
|
||||
],
|
||||
self::T_UNICODE_RANGE => [
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION, '?'
|
||||
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-',
|
||||
],
|
||||
'.' => [ self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION ],
|
||||
'+' => [ self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION ],
|
||||
'$' => [ '=' ],
|
||||
'*' => [ '=' ],
|
||||
'^' => [ '=' ],
|
||||
'~' => [ '=' ],
|
||||
'|' => [ '=', '|' ],
|
||||
'/' => [ '*' ],
|
||||
];
|
||||
|
||||
$t1 = $firstToken->type === Token::T_DELIM ? $firstToken->value : $firstToken->type;
|
||||
$t2 = $secondToken->type === Token::T_DELIM ? $secondToken->value : $secondToken->type;
|
||||
$t1 = $firstToken->type === self::T_DELIM ? $firstToken->value : $firstToken->type;
|
||||
$t2 = $secondToken->type === self::T_DELIM ? $secondToken->value : $secondToken->type;
|
||||
|
||||
return isset( $sepTable[$t1] ) && in_array( $t2, $sepTable[$t1], true );
|
||||
}
|
||||
|
||||
/**
|
||||
* Allow for marking the 'U' T_IDENT beginning a <urange>, to later avoid
|
||||
* serializing it with extraneous comments.
|
||||
* @internal
|
||||
* @see \Wikimedia\CSS\Util::stringify()
|
||||
* @see \Wikimedia\CSS\Grammar\UrangeMatcher
|
||||
* @param int|null $hack Set the hack value
|
||||
* @return int Current/old hack value
|
||||
*/
|
||||
public function urangeHack( $hack = null ) {
|
||||
$ret = $this->urangeHack;
|
||||
if ( $hack !== null ) {
|
||||
$this->urangeHack = max( (int)$this->urangeHack, $hack );
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,25 +6,35 @@
|
|||
|
||||
namespace Wikimedia\CSS\Objects;
|
||||
|
||||
use UnexpectedValueException;
|
||||
use Wikimedia\CSS\Parser\Parser;
|
||||
|
||||
/**
|
||||
* Represent a list of CSS tokens
|
||||
*/
|
||||
class TokenList extends CSSObjectList {
|
||||
/**
|
||||
* @var string
|
||||
*/
|
||||
protected static $objectType = Token::class;
|
||||
|
||||
// We can greatly simplify this, assuming no separator
|
||||
/** @var Token[] The objects contained */
|
||||
protected $objects;
|
||||
|
||||
/** @inheritDoc */
|
||||
public function toTokenArray() {
|
||||
// We can greatly simplify this, assuming no separator
|
||||
return $this->objects;
|
||||
}
|
||||
|
||||
// This one, though, is complicated
|
||||
/** @inheritDoc */
|
||||
public function toComponentValueArray() {
|
||||
// This one, though, is complicated
|
||||
$parser = Parser::newFromTokens( $this->objects );
|
||||
$ret = $parser->parseComponentValueList();
|
||||
if ( $parser->getParseErrors() ) {
|
||||
$ex = new \UnexpectedValueException( 'TokenList cannot be converted to a ComponentValueList' );
|
||||
$ex = new UnexpectedValueException( 'TokenList cannot be converted to a ComponentValueList' );
|
||||
// @phan-suppress-next-line PhanUndeclaredProperty
|
||||
$ex->parseErrors = $parser->getParseErrors();
|
||||
throw $ex;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ namespace Wikimedia\CSS\Parser;
|
|||
*/
|
||||
interface DataSource {
|
||||
|
||||
const EOF = '';
|
||||
public const EOF = '';
|
||||
|
||||
/**
|
||||
* Read a character from the data source.
|
||||
|
|
|
|||
|
|
@ -6,21 +6,28 @@
|
|||
|
||||
namespace Wikimedia\CSS\Parser;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use UnexpectedValueException;
|
||||
use UtfNormal\Constants;
|
||||
use UtfNormal\Utils;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
|
||||
/**
|
||||
* Parse CSS into tokens
|
||||
*
|
||||
* This implements the tokenizer from the CSS Syntax Module Level 3 candidate recommendation.
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/
|
||||
*/
|
||||
class DataSourceTokenizer implements Tokenizer {
|
||||
|
||||
/** @var DataSource */
|
||||
protected $source;
|
||||
|
||||
/** @var int position in the input */
|
||||
protected $line = 1, $pos = 0;
|
||||
/** @var int line in the input */
|
||||
protected $line = 1;
|
||||
|
||||
/** @var int position in the line in the input */
|
||||
protected $pos = 0;
|
||||
|
||||
/** @var string|null|object The most recently consumed character */
|
||||
protected $currentCharacter = null;
|
||||
|
|
@ -42,7 +49,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
/**
|
||||
* Read a character from the data source
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#input-preprocessing
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#input-preprocessing
|
||||
* @return string One UTF-8 character, or empty string on EOF
|
||||
*/
|
||||
protected function nextChar() {
|
||||
|
|
@ -50,17 +57,19 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
// Perform transformations per the spec
|
||||
|
||||
// Any U+0000 becomes U+FFFD
|
||||
if ( $char === "\0" ) {
|
||||
return \UtfNormal\Constants::UTF8_REPLACEMENT;
|
||||
// Any U+0000 or surrogate code point becomes U+FFFD
|
||||
if ( $char === "\0" || ( $char >= "\u{D800}" && $char <= "\u{DFFF}" ) ) {
|
||||
return Constants::UTF8_REPLACEMENT;
|
||||
}
|
||||
|
||||
// Any U+000D, U+000C, or pair of U+000D + U+000A becomes U+000A
|
||||
if ( $char === "\f" ) { // U+000C
|
||||
if ( $char === "\f" ) {
|
||||
// U+000C
|
||||
return "\n";
|
||||
}
|
||||
|
||||
if ( $char === "\r" ) { // Either U+000D + U+000A or a lone U+000D
|
||||
if ( $char === "\r" ) {
|
||||
// Either U+000D + U+000A or a lone U+000D
|
||||
$char2 = $this->source->readCharacter();
|
||||
if ( $char2 !== "\n" ) {
|
||||
$this->source->putBackCharacter( $char2 );
|
||||
|
|
@ -90,13 +99,13 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
/**
|
||||
* Reconsume the next character
|
||||
*
|
||||
* In more normal terms, this pushes a character back onto the data source
|
||||
* In more normal terms, this pushes a character back onto the data source,
|
||||
* so it will be read again for the next call to self::consumeCharacter().
|
||||
*/
|
||||
protected function reconsumeCharacter() {
|
||||
// @codeCoverageIgnoreStart
|
||||
if ( !is_string( $this->currentCharacter ) ) {
|
||||
throw new \UnexpectedValueException( "[$this->line:$this->pos] Can't reconsume" );
|
||||
throw new UnexpectedValueException( "[$this->line:$this->pos] Can't reconsume" );
|
||||
}
|
||||
// @codeCoverageIgnoreEnd
|
||||
|
||||
|
|
@ -128,10 +137,12 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
return $ret;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function getParseErrors() {
|
||||
return $this->parseErrors;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function clearParseErrors() {
|
||||
$this->parseErrors = [];
|
||||
}
|
||||
|
|
@ -162,10 +173,13 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
/**
|
||||
* Read a token from the data source
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-token
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-token
|
||||
* @return Token
|
||||
* @suppress PhanPluginDuplicateAdjacentStatement,PhanPluginDuplicateSwitchCaseLooseEquality
|
||||
*/
|
||||
public function consumeToken() {
|
||||
// We "consume comments" inline below, see `case '/'`.
|
||||
|
||||
$this->consumeCharacter();
|
||||
$pos = [ 'position' => [ $this->line, $this->pos ] ];
|
||||
|
||||
|
|
@ -185,7 +199,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
return $this->consumeStringToken( $this->currentCharacter, $pos );
|
||||
|
||||
case '#':
|
||||
list( $next, $next2, $next3 ) = $this->lookAhead();
|
||||
[ $next, $next2, $next3 ] = $this->lookAhead();
|
||||
if ( self::isNameCharacter( $this->nextCharacter ) ||
|
||||
self::isValidEscape( $next, $next2 )
|
||||
) {
|
||||
|
|
@ -197,31 +211,15 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
|
||||
|
||||
case '$':
|
||||
if ( $this->nextCharacter === '=' ) {
|
||||
$this->consumeCharacter();
|
||||
return new Token( Token::T_SUFFIX_MATCH, $pos );
|
||||
}
|
||||
|
||||
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
|
||||
|
||||
case '(':
|
||||
return new Token( Token::T_LEFT_PAREN, $pos );
|
||||
|
||||
case ')':
|
||||
return new Token( Token::T_RIGHT_PAREN, $pos );
|
||||
|
||||
case '*':
|
||||
if ( $this->nextCharacter === '=' ) {
|
||||
$this->consumeCharacter();
|
||||
return new Token( Token::T_SUBSTRING_MATCH, $pos );
|
||||
}
|
||||
|
||||
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
|
||||
|
||||
case '+':
|
||||
case '.':
|
||||
list( $next, $next2, $next3 ) = $this->lookAhead();
|
||||
[ $next, $next2, ] = $this->lookAhead();
|
||||
if ( self::wouldStartNumber( $this->currentCharacter, $next, $next2 ) ) {
|
||||
$this->reconsumeCharacter();
|
||||
return $this->consumeNumericToken( $pos );
|
||||
|
|
@ -233,7 +231,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
return new Token( Token::T_COMMA, $pos );
|
||||
|
||||
case '-':
|
||||
list( $next, $next2, $next3 ) = $this->lookAhead();
|
||||
[ $next, $next2, ] = $this->lookAhead();
|
||||
if ( self::wouldStartNumber( $this->currentCharacter, $next, $next2 ) ) {
|
||||
$this->reconsumeCharacter();
|
||||
return $this->consumeNumericToken( $pos );
|
||||
|
|
@ -257,15 +255,16 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
$this->consumeCharacter();
|
||||
$this->consumeCharacter();
|
||||
while ( $this->currentCharacter !== DataSource::EOF &&
|
||||
// @phan-suppress-next-line PhanSuspiciousValueComparisonInLoop
|
||||
!( $this->currentCharacter === '*' && $this->nextCharacter === '/' )
|
||||
) {
|
||||
$this->consumeCharacter();
|
||||
}
|
||||
if ( $this->currentCharacter === DataSource::EOF ) {
|
||||
// Parse error from the editor's draft as of 2017-01-06
|
||||
$this->parseError( 'unclosed-comment', $pos );
|
||||
}
|
||||
$this->consumeCharacter();
|
||||
// @phan-suppress-next-line PhanPossiblyInfiniteRecursionSameParams
|
||||
return $this->consumeToken();
|
||||
}
|
||||
|
||||
|
|
@ -278,7 +277,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
return new Token( Token::T_SEMICOLON, $pos );
|
||||
|
||||
case '<':
|
||||
list( $next, $next2, $next3 ) = $this->lookAhead();
|
||||
[ $next, $next2, $next3 ] = $this->lookAhead();
|
||||
if ( $next === '!' && $next2 === '-' && $next3 === '-' ) {
|
||||
$this->consumeCharacter();
|
||||
$this->consumeCharacter();
|
||||
|
|
@ -289,7 +288,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
|
||||
|
||||
case '@':
|
||||
list( $next, $next2, $next3 ) = $this->lookAhead();
|
||||
[ $next, $next2, $next3 ] = $this->lookAhead();
|
||||
if ( self::wouldStartIdentifier( $next, $next2, $next3 ) ) {
|
||||
return new Token( Token::T_AT_KEYWORD, $pos + [ 'value' => $this->consumeName() ] );
|
||||
}
|
||||
|
|
@ -311,14 +310,6 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
case ']':
|
||||
return new Token( Token::T_RIGHT_BRACKET, $pos );
|
||||
|
||||
case '^':
|
||||
if ( $this->nextCharacter === '=' ) {
|
||||
$this->consumeCharacter();
|
||||
return new Token( Token::T_PREFIX_MATCH, $pos );
|
||||
}
|
||||
|
||||
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
|
||||
|
||||
case '{':
|
||||
return new Token( Token::T_LEFT_BRACE, $pos );
|
||||
|
||||
|
|
@ -338,40 +329,6 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
$this->reconsumeCharacter();
|
||||
return $this->consumeNumericToken( $pos );
|
||||
|
||||
case 'u':
|
||||
case 'U':
|
||||
if ( $this->nextCharacter === '+' ) {
|
||||
list( $next, $next2 ) = $this->lookAhead();
|
||||
if ( self::isHexDigit( $next2 ) || $next2 === '?' ) {
|
||||
$this->consumeCharacter();
|
||||
return $this->consumeUnicodeRangeToken( $pos );
|
||||
}
|
||||
}
|
||||
|
||||
$this->reconsumeCharacter();
|
||||
return $this->consumeIdentLikeToken( $pos );
|
||||
|
||||
case '|':
|
||||
if ( $this->nextCharacter === '=' ) {
|
||||
$this->consumeCharacter();
|
||||
return new Token( Token::T_DASH_MATCH, $pos );
|
||||
}
|
||||
|
||||
if ( $this->nextCharacter === '|' ) {
|
||||
$this->consumeCharacter();
|
||||
return new Token( Token::T_COLUMN, $pos );
|
||||
}
|
||||
|
||||
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
|
||||
|
||||
case '~':
|
||||
if ( $this->nextCharacter === '=' ) {
|
||||
$this->consumeCharacter();
|
||||
return new Token( Token::T_INCLUDE_MATCH, $pos );
|
||||
}
|
||||
|
||||
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
|
||||
|
||||
case DataSource::EOF:
|
||||
return new Token( Token::T_EOF, $pos );
|
||||
|
||||
|
|
@ -387,14 +344,14 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
/**
|
||||
* Consume a numeric token
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-numeric-token
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-numeric-token
|
||||
* @param array $data Data for the new token (typically contains just 'position')
|
||||
* @return Token
|
||||
*/
|
||||
protected function consumeNumericToken( array $data ) {
|
||||
list( $data['representation'], $data['value'], $data['typeFlag'] ) = $this->consumeNumber();
|
||||
[ $data['representation'], $data['value'], $data['typeFlag'] ] = $this->consumeNumber();
|
||||
|
||||
list( $next, $next2, $next3 ) = $this->lookAhead();
|
||||
[ $next, $next2, $next3 ] = $this->lookAhead();
|
||||
if ( self::wouldStartIdentifier( $next, $next2, $next3 ) ) {
|
||||
return new Token( Token::T_DIMENSION, $data + [ 'unit' => $this->consumeName() ] );
|
||||
} elseif ( $this->nextCharacter === '%' ) {
|
||||
|
|
@ -407,10 +364,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
/**
|
||||
* Consume an ident-like token
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-an-ident-like-token
|
||||
* @note Per the draft as of January 2017, quoted URLs are parsed as
|
||||
* functions named 'url'. This is needed in order to implement the `<url>`
|
||||
* type in the [Values specification](https://www.w3.org/TR/2016/CR-css-values-3-20160929/#urls).
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-ident-like-token
|
||||
* @param array $data Data for the new token (typically contains just 'position')
|
||||
* @return Token
|
||||
*/
|
||||
|
|
@ -422,14 +376,14 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
if ( !strcasecmp( $name, 'url' ) ) {
|
||||
while ( true ) {
|
||||
list( $next, $next2 ) = $this->lookAhead();
|
||||
[ $next, $next2 ] = $this->lookAhead();
|
||||
if ( !self::isWhitespace( $next ) || !self::isWhitespace( $next2 ) ) {
|
||||
break;
|
||||
}
|
||||
$this->consumeCharacter();
|
||||
}
|
||||
if ( $next !== '"' && $next !== '\'' &&
|
||||
!( self::isWhitespace( $next ) && ( $next2 === '"' || $next2=== '\'' ) )
|
||||
!( self::isWhitespace( $next ) && ( $next2 === '"' || $next2 === '\'' ) )
|
||||
) {
|
||||
return $this->consumeUrlToken( $data );
|
||||
}
|
||||
|
|
@ -446,7 +400,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
*
|
||||
* This assumes the leading quote or apostrophe has already been consumed.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-string-token
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-string-token
|
||||
* @param string $endChar Ending character of the string
|
||||
* @param array $data Data for the new token (typically contains just 'position')
|
||||
* @return Token
|
||||
|
|
@ -458,7 +412,6 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
$this->consumeCharacter();
|
||||
switch ( $this->currentCharacter ) {
|
||||
case DataSource::EOF:
|
||||
// Parse error from the editor's draft as of 2017-01-06
|
||||
$this->parseError( 'unclosed-string', $data );
|
||||
break 2;
|
||||
|
||||
|
|
@ -473,8 +426,6 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
case '\\':
|
||||
if ( $this->nextCharacter === DataSource::EOF ) {
|
||||
// Do nothing
|
||||
// Parse error from the editor's draft as of 2017-01-06
|
||||
$this->parseError( 'bad-escape' );
|
||||
} elseif ( $this->nextCharacter === "\n" ) {
|
||||
// Consume it
|
||||
$this->consumeCharacter();
|
||||
|
|
@ -482,7 +433,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
$data['value'] .= $this->consumeEscape();
|
||||
} else {
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new \UnexpectedValueException( "[$this->line:$this->pos] Unexpected state" );
|
||||
throw new UnexpectedValueException( "[$this->line:$this->pos] Unexpected state" );
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
break;
|
||||
|
|
@ -493,6 +444,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
|
||||
return new Token( Token::T_STRING, $data );
|
||||
}
|
||||
|
||||
|
|
@ -501,8 +453,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
*
|
||||
* This assumes the leading "url(" has already been consumed.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-url-token
|
||||
* @note Per the draft as of January 2017, this does not handle quoted URL tokens.
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-url-token
|
||||
* @param array $data Data for the new token (typically contains just 'position')
|
||||
* @return Token
|
||||
*/
|
||||
|
|
@ -516,29 +467,23 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
}
|
||||
|
||||
// 3.
|
||||
if ( $this->nextCharacter === DataSource::EOF ) {
|
||||
// Parse error from the editor's draft as of 2017-01-06
|
||||
$this->parseError( 'unclosed-url', $data );
|
||||
return new Token( Token::T_URL, $data );
|
||||
}
|
||||
|
||||
// 4. (removed in draft, this was formerly the parsing for a quoted URL token)
|
||||
|
||||
// 5. (renumbered as 4 in the draft)
|
||||
while ( true ) {
|
||||
$this->consumeCharacter();
|
||||
switch ( $this->currentCharacter ) {
|
||||
case DataSource::EOF:
|
||||
// Parse error from the editor's draft as of 2017-01-06
|
||||
$this->parseError( 'unclosed-url', $data );
|
||||
break 2;
|
||||
|
||||
// @codeCoverageIgnoreStart
|
||||
case ')':
|
||||
// @codeCoverageIgnoreEnd
|
||||
break 2;
|
||||
|
||||
// @codeCoverageIgnoreStart
|
||||
case "\n":
|
||||
case "\t":
|
||||
case ' ':
|
||||
// @codeCoverageIgnoreEnd
|
||||
while ( self::isWhitespace( $this->nextCharacter ) ) {
|
||||
$this->consumeCharacter();
|
||||
}
|
||||
|
|
@ -546,7 +491,6 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
$this->consumeCharacter();
|
||||
break 2;
|
||||
} elseif ( $this->nextCharacter === DataSource::EOF ) {
|
||||
// Parse error from the editor's draft as of 2017-01-06
|
||||
$this->consumeCharacter();
|
||||
$this->parseError( 'unclosed-url', $data );
|
||||
break 2;
|
||||
|
|
@ -554,16 +498,19 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
$this->consumeBadUrlRemnants();
|
||||
return new Token( Token::T_BAD_URL, [ 'value' => '' ] + $data );
|
||||
}
|
||||
break;
|
||||
|
||||
// @codeCoverageIgnoreStart
|
||||
case '"':
|
||||
case '\'':
|
||||
case '(':
|
||||
// @codeCoverageIgnoreEnd
|
||||
$this->parseError( 'bad-character-in-url' );
|
||||
$this->consumeBadUrlRemnants();
|
||||
return new Token( Token::T_BAD_URL, [ 'value' => '' ] + $data );
|
||||
|
||||
// @codeCoverageIgnoreStart
|
||||
case '\\':
|
||||
// @codeCoverageIgnoreEnd
|
||||
if ( self::isValidEscape( $this->currentCharacter, $this->nextCharacter ) ) {
|
||||
$data['value'] .= $this->consumeEscape();
|
||||
} else {
|
||||
|
|
@ -585,12 +532,13 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
|
||||
return new Token( Token::T_URL, $data );
|
||||
}
|
||||
|
||||
/**
|
||||
* Clean up after finding an error in a URL
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-the-remnants-of-a-bad-url
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-remnants-of-bad-url
|
||||
*/
|
||||
protected function consumeBadUrlRemnants() {
|
||||
while ( true ) {
|
||||
|
|
@ -604,61 +552,9 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a unicode-range token
|
||||
*
|
||||
* This assumes the initial "u" has been consumed (currentCharacter is the '+'),
|
||||
* and the next codepoint is verfied to be a hex digit or "?".
|
||||
*
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-unicode-range-token
|
||||
* @param array $data Data for the new token (typically contains just 'position')
|
||||
* @return Token
|
||||
*/
|
||||
protected function consumeUnicodeRangeToken( array $data ) {
|
||||
// 1.
|
||||
$v = '';
|
||||
while ( strlen( $v ) < 6 && self::isHexDigit( $this->nextCharacter ) ) {
|
||||
$this->consumeCharacter();
|
||||
$v .= $this->currentCharacter;
|
||||
}
|
||||
$anyQ = false;
|
||||
while ( strlen( $v ) < 6 && $this->nextCharacter === '?' ) {
|
||||
$anyQ = true;
|
||||
$this->consumeCharacter();
|
||||
$v .= $this->currentCharacter;
|
||||
}
|
||||
|
||||
if ( $anyQ ) {
|
||||
return new Token( Token::T_UNICODE_RANGE, $data + [
|
||||
'start' => intval( str_replace( '?', '0', $v ), 16 ),
|
||||
'end' => intval( str_replace( '?', 'F', $v ), 16 ),
|
||||
] );
|
||||
}
|
||||
|
||||
$data['start'] = intval( $v, 16 );
|
||||
|
||||
// 2.
|
||||
list( $next, $next2 ) = $this->lookAhead();
|
||||
if ( $next === '-' && self::isHexDigit( $next2 ) ) {
|
||||
$this->consumeCharacter();
|
||||
$v = '';
|
||||
while ( strlen( $v ) < 6 && self::isHexDigit( $this->nextCharacter ) ) {
|
||||
$this->consumeCharacter();
|
||||
$v .= $this->currentCharacter;
|
||||
}
|
||||
$data['end'] = intval( $v, 16 );
|
||||
} else {
|
||||
// 3.
|
||||
$data['end'] = $data['start'];
|
||||
}
|
||||
|
||||
// 4.
|
||||
return new Token( Token::T_UNICODE_RANGE, $data );
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate if a character is whitespace
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#whitespace
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#whitespace
|
||||
* @param string $char A single UTF-8 character
|
||||
* @return bool
|
||||
*/
|
||||
|
|
@ -668,7 +564,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
/**
|
||||
* Indicate if a character is a name-start code point
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#name-start-code-point
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#name-start-code-point
|
||||
* @param string $char A single UTF-8 character
|
||||
* @return bool
|
||||
*/
|
||||
|
|
@ -676,14 +572,14 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
// Every non-ASCII character is a name start character, so we can just
|
||||
// check the first byte.
|
||||
$char = ord( $char );
|
||||
return $char >= 0x41 && $char <= 0x5a ||
|
||||
$char >= 0x61 && $char <= 0x7a ||
|
||||
return ( $char >= 0x41 && $char <= 0x5a ) ||
|
||||
( $char >= 0x61 && $char <= 0x7a ) ||
|
||||
$char >= 0x80 || $char === 0x5f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate if a character is a name code point
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#name-code-point
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#name-code-point
|
||||
* @param string $char A single UTF-8 character
|
||||
* @return bool
|
||||
*/
|
||||
|
|
@ -691,15 +587,15 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
// Every non-ASCII character is a name character, so we can just check
|
||||
// the first byte.
|
||||
$char = ord( $char );
|
||||
return $char >= 0x41 && $char <= 0x5a ||
|
||||
$char >= 0x61 && $char <= 0x7a ||
|
||||
$char >= 0x30 && $char <= 0x39 ||
|
||||
return ( $char >= 0x41 && $char <= 0x5a ) ||
|
||||
( $char >= 0x61 && $char <= 0x7a ) ||
|
||||
( $char >= 0x30 && $char <= 0x39 ) ||
|
||||
$char >= 0x80 || $char === 0x5f || $char === 0x2d;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate if a character is non-printable
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#non-printable-code-point
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#non-printable-code-point
|
||||
* @param string $char A single UTF-8 character
|
||||
* @return bool
|
||||
*/
|
||||
|
|
@ -707,15 +603,15 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
// No non-ASCII character is non-printable, so we can just check the
|
||||
// first byte.
|
||||
$char = ord( $char );
|
||||
return $char >= 0x00 && $char <= 0x08 ||
|
||||
return ( $char >= 0x00 && $char <= 0x08 ) ||
|
||||
$char === 0x0b ||
|
||||
$char >= 0x0e && $char <= 0x1f ||
|
||||
( $char >= 0x0e && $char <= 0x1f ) ||
|
||||
$char === 0x7f;
|
||||
}
|
||||
|
||||
/**
|
||||
* Indicate if a character is a digit
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#digit
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#digit
|
||||
* @param string $char A single UTF-8 character
|
||||
* @return bool
|
||||
*/
|
||||
|
|
@ -728,7 +624,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
/**
|
||||
* Indicate if a character is a hex digit
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#hex-digit
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#hex-digit
|
||||
* @param string $char A single UTF-8 character
|
||||
* @return bool
|
||||
*/
|
||||
|
|
@ -736,14 +632,14 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
// No non-ASCII character is a hex digit, so we can just check the
|
||||
// first byte.
|
||||
$char = ord( $char );
|
||||
return $char >= 0x30 && $char <= 0x39 ||
|
||||
$char >= 0x41 && $char <= 0x46 ||
|
||||
$char >= 0x61 && $char <= 0x66;
|
||||
return ( $char >= 0x30 && $char <= 0x39 ) ||
|
||||
( $char >= 0x41 && $char <= 0x46 ) ||
|
||||
( $char >= 0x61 && $char <= 0x66 );
|
||||
}
|
||||
|
||||
/**
|
||||
* Determine if two characters constitute a valid escape
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#starts-with-a-valid-escape
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#starts-with-a-valid-escape
|
||||
* @param string $char1
|
||||
* @param string $char2
|
||||
* @return bool
|
||||
|
|
@ -754,7 +650,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
/**
|
||||
* Determine if three characters would start an identifier
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#would-start-an-identifier
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#would-start-an-identifier
|
||||
* @param string $char1
|
||||
* @param string $char2
|
||||
* @param string $char3
|
||||
|
|
@ -762,7 +658,6 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
*/
|
||||
protected static function wouldStartIdentifier( $char1, $char2, $char3 ) {
|
||||
if ( $char1 === '-' ) {
|
||||
// Added the possibility for an itentifier beginning with "--" per the draft.
|
||||
return self::isNameStartCharacter( $char2 ) || $char2 === '-' ||
|
||||
self::isValidEscape( $char2, $char3 );
|
||||
} elseif ( self::isNameStartCharacter( $char1 ) ) {
|
||||
|
|
@ -776,7 +671,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
/**
|
||||
* Determine if three characters would start a number
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#starts-with-a-number
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#starts-with-a-number
|
||||
* @param string $char1
|
||||
* @param string $char2
|
||||
* @param string $char3
|
||||
|
|
@ -785,7 +680,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
protected static function wouldStartNumber( $char1, $char2, $char3 ) {
|
||||
if ( $char1 === '+' || $char1 === '-' ) {
|
||||
return self::isDigit( $char2 ) ||
|
||||
$char2 === '.' && self::isDigit( $char3 );
|
||||
( $char2 === '.' && self::isDigit( $char3 ) );
|
||||
} elseif ( $char1 === '.' ) {
|
||||
return self::isDigit( $char2 );
|
||||
// @codeCoverageIgnoreStart
|
||||
|
|
@ -801,7 +696,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
*
|
||||
* This assumes the leading backslash is consumed.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-an-escaped-code-point
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-escaped-code-point
|
||||
* @return string Escaped character
|
||||
*/
|
||||
protected function consumeEscape() {
|
||||
|
|
@ -809,12 +704,6 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
$this->consumeCharacter();
|
||||
|
||||
// @codeCoverageIgnoreStart
|
||||
if ( $this->currentCharacter === "\n" ) {
|
||||
throw new \UnexpectedValueException( "[$this->line:$this->pos] Unexpected newline" );
|
||||
}
|
||||
// @codeCoverageIgnoreEnd
|
||||
|
||||
// 1-6 hexits, plus one optional whitespace character
|
||||
if ( self::isHexDigit( $this->currentCharacter ) ) {
|
||||
$num = $this->currentCharacter;
|
||||
|
|
@ -827,16 +716,15 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
}
|
||||
|
||||
$num = intval( $num, 16 );
|
||||
if ( $num === 0 || $num >= 0xd800 && $num <= 0xdfff || $num > 0x10ffff ) {
|
||||
return \UtfNormal\Constants::UTF8_REPLACEMENT;
|
||||
if ( $num === 0 || ( $num >= 0xd800 && $num <= 0xdfff ) || $num > 0x10ffff ) {
|
||||
return Constants::UTF8_REPLACEMENT;
|
||||
}
|
||||
return \UtfNormal\Utils::codepointToUtf8( $num );
|
||||
return Utils::codepointToUtf8( $num );
|
||||
}
|
||||
|
||||
if ( $this->currentCharacter === DataSource::EOF ) {
|
||||
// Parse error from the editor's draft as of 2017-01-06
|
||||
$this->parseError( 'bad-escape', $position );
|
||||
return \UtfNormal\Constants::UTF8_REPLACEMENT;
|
||||
return Constants::UTF8_REPLACEMENT;
|
||||
}
|
||||
|
||||
return $this->currentCharacter;
|
||||
|
|
@ -849,7 +737,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
* self::wouldStartIdentifier() or the like before calling the method if
|
||||
* necessary.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-name
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-name
|
||||
* @return string Name
|
||||
*/
|
||||
protected function consumeName() {
|
||||
|
|
@ -863,13 +751,13 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
} elseif ( self::isValidEscape( $this->currentCharacter, $this->nextCharacter ) ) {
|
||||
$name .= $this->consumeEscape();
|
||||
} else {
|
||||
$this->reconsumeCharacter(); // Doesn't say to, but breaks otherwise
|
||||
return $name;
|
||||
$this->reconsumeCharacter();
|
||||
break;
|
||||
}
|
||||
}
|
||||
// @codeCoverageIgnoreStart
|
||||
|
||||
return $name;
|
||||
}
|
||||
// @codeCoverageIgnoreEnd
|
||||
|
||||
/**
|
||||
* Consume a number
|
||||
|
|
@ -877,8 +765,9 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
* Note this does not do validation on the input stream. Call
|
||||
* self::wouldStartNumber() before calling the method if necessary.
|
||||
*
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-number
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-number
|
||||
* @return array [ string $value, int|float $number, string $type ('integer' or 'number') ]
|
||||
* @suppress PhanPluginDuplicateAdjacentStatement
|
||||
*/
|
||||
protected function consumeNumber() {
|
||||
// 1.
|
||||
|
|
@ -899,7 +788,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
// 4.
|
||||
if ( $this->nextCharacter === '.' ) {
|
||||
list( $next, $next2, $next3 ) = $this->lookAhead();
|
||||
[ $next, $next2, ] = $this->lookAhead();
|
||||
if ( self::isDigit( $next2 ) ) {
|
||||
// 4.1.
|
||||
$this->consumeCharacter();
|
||||
|
|
@ -918,7 +807,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
|
||||
// 5.
|
||||
if ( $this->nextCharacter === 'e' || $this->nextCharacter === 'E' ) {
|
||||
list( $next, $next2, $next3 ) = $this->lookAhead();
|
||||
[ $next, $next2, $next3 ] = $this->lookAhead();
|
||||
$ok = false;
|
||||
if ( ( $next2 === '+' || $next2 === '-' ) && self::isDigit( $next3 ) ) {
|
||||
$ok = true;
|
||||
|
|
@ -948,7 +837,7 @@ class DataSourceTokenizer implements Tokenizer {
|
|||
}
|
||||
|
||||
// 6. We assume PHP's casting follows the same rules as
|
||||
// https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#convert-a-string-to-a-number
|
||||
// https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#convert-string-to-number
|
||||
$value = $type === 'integer' ? (int)$repr : (float)$repr;
|
||||
|
||||
// 7.
|
||||
|
|
|
|||
|
|
@ -6,9 +6,15 @@
|
|||
|
||||
namespace Wikimedia\CSS\Parser;
|
||||
|
||||
use RuntimeException;
|
||||
use UtfNormal\Constants;
|
||||
use UtfNormal\Utils;
|
||||
use Wikimedia\AtEase\AtEase;
|
||||
|
||||
/**
|
||||
* Character set conversion for CSS
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#input-byte-stream
|
||||
*
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#input-byte-stream
|
||||
*/
|
||||
class Encoder {
|
||||
|
||||
|
|
@ -96,9 +102,12 @@ class Encoder {
|
|||
'iso_8859-8' => 'ISO-8859-8',
|
||||
'iso_8859-8:1988' => 'ISO-8859-8',
|
||||
'visual' => 'ISO-8859-8',
|
||||
'csiso88598i' => 'ISO-8859-8', // ISO-8859-8-I?
|
||||
'iso-8859-8-i' => 'ISO-8859-8', // ISO-8859-8-I?
|
||||
'logical' => 'ISO-8859-8', // ISO-8859-8-I?
|
||||
// ISO-8859-8-I?
|
||||
'csiso88598i' => 'ISO-8859-8',
|
||||
// ISO-8859-8-I?
|
||||
'iso-8859-8-i' => 'ISO-8859-8',
|
||||
// ISO-8859-8-I?
|
||||
'logical' => 'ISO-8859-8',
|
||||
'csisolatin6' => 'ISO-8859-10',
|
||||
'iso-8859-10' => 'ISO-8859-10',
|
||||
'iso-ir-157' => 'ISO-8859-10',
|
||||
|
|
@ -188,15 +197,24 @@ class Encoder {
|
|||
'x-cp1258' => 'Windows-1258',
|
||||
'x-mac-cyrillic' => 'mac-cyrillic',
|
||||
'x-mac-ukrainian' => 'mac-cyrillic',
|
||||
'chinese' => 'GB18030', // GBK
|
||||
'csgb2312' => 'GB18030', // GBK
|
||||
'csiso58gb231280' => 'GB18030', // GBK
|
||||
'gb2312' => 'GB18030', // GBK
|
||||
'gb_2312' => 'GB18030', // GBK
|
||||
'gb_2312-80' => 'GB18030', // GBK
|
||||
'gbk' => 'GB18030', // GBK
|
||||
'iso-ir-58' => 'GB18030', // GBK
|
||||
'x-gbk' => 'GB18030', // GBK
|
||||
// GBK
|
||||
'chinese' => 'GB18030',
|
||||
// GBK
|
||||
'csgb2312' => 'GB18030',
|
||||
// GBK
|
||||
'csiso58gb231280' => 'GB18030',
|
||||
// GBK
|
||||
'gb2312' => 'GB18030',
|
||||
// GBK
|
||||
'gb_2312' => 'GB18030',
|
||||
// GBK
|
||||
'gb_2312-80' => 'GB18030',
|
||||
// GBK
|
||||
'gbk' => 'GB18030',
|
||||
// GBK
|
||||
'iso-ir-58' => 'GB18030',
|
||||
// GBK
|
||||
'x-gbk' => 'GB18030',
|
||||
'gb18030' => 'GB18030',
|
||||
'big5' => 'BIG-5',
|
||||
'big5-hkscs' => 'BIG-5',
|
||||
|
|
@ -231,6 +249,7 @@ class Encoder {
|
|||
'iso-2022-cn' => 'replacement',
|
||||
'iso-2022-cn-ext' => 'replacement',
|
||||
'iso-2022-kr' => 'replacement',
|
||||
'replacement' => 'replacement',
|
||||
'utf-16be' => 'UTF-16BE',
|
||||
'utf-16' => 'UTF-16LE',
|
||||
'utf-16le' => 'UTF-16LE',
|
||||
|
|
@ -247,7 +266,7 @@ class Encoder {
|
|||
*/
|
||||
public static function convert( $text, $encodings = [] ) {
|
||||
// First, check for a BOM and honor that if it's present.
|
||||
if ( substr( $text, 0, 3 ) === "\xef\xbb\xbf" ) {
|
||||
if ( strpos( $text, "\xef\xbb\xbf" ) === 0 ) {
|
||||
// UTF-8 with BOM (convert it anyway in case the BOM is a lie)
|
||||
return self::doConvert( 'UTF-8', substr( $text, 3 ) );
|
||||
}
|
||||
|
|
@ -300,13 +319,13 @@ class Encoder {
|
|||
protected static function doConvert( $encoding, $text ) {
|
||||
// Pseudo-encoding that just outputs one replacement character
|
||||
if ( $encoding === 'replacement' ) {
|
||||
return \UtfNormal\Constants::UTF8_REPLACEMENT;
|
||||
return Constants::UTF8_REPLACEMENT;
|
||||
}
|
||||
|
||||
// Pseudo-encoding that shifts non-ASCII bytes to the BMP private use area
|
||||
if ( $encoding === 'x-user-defined' ) {
|
||||
return preg_replace_callback( '/[\x80-\xff]/', function ( $m ) {
|
||||
return \UtfNormal\Utils::codepointToUtf8( 0xf700 + ord( $m[0] ) );
|
||||
return preg_replace_callback( '/[\x80-\xff]/', static function ( $m ) {
|
||||
return Utils::codepointToUtf8( 0xf700 + ord( $m[0] ) );
|
||||
}, $text );
|
||||
}
|
||||
|
||||
|
|
@ -315,15 +334,15 @@ class Encoder {
|
|||
// some encodings mbstring doesn't support.
|
||||
if ( in_array( $encoding, mb_list_encodings(), true ) ) {
|
||||
$old = mb_substitute_character();
|
||||
mb_substitute_character( \UtfNormal\Constants::UNICODE_REPLACEMENT );
|
||||
mb_substitute_character( Constants::UNICODE_REPLACEMENT );
|
||||
$text = mb_convert_encoding( $text, 'UTF-8', $encoding );
|
||||
mb_substitute_character( $old );
|
||||
return $text;
|
||||
}
|
||||
|
||||
$ret = \MediaWiki\quietCall( 'iconv', $encoding, 'UTF-8', $text );
|
||||
$ret = AtEase::quietCall( 'iconv', $encoding, 'UTF-8', $text );
|
||||
if ( $ret === false ) {
|
||||
throw new \RuntimeException( "Cannot convert '$text' from $encoding" );
|
||||
throw new RuntimeException( "Cannot convert '$text' from $encoding" );
|
||||
}
|
||||
return $ret;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,29 +7,28 @@
|
|||
namespace Wikimedia\CSS\Parser;
|
||||
|
||||
use Wikimedia\CSS\Objects\AtRule;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\ComponentValue;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\CSSFunction;
|
||||
use Wikimedia\CSS\Objects\Declaration;
|
||||
use Wikimedia\CSS\Objects\DeclarationList;
|
||||
use Wikimedia\CSS\Objects\DeclarationOrAtRuleList;
|
||||
use Wikimedia\CSS\Objects\Declaration;
|
||||
use Wikimedia\CSS\Objects\QualifiedRule;
|
||||
use Wikimedia\CSS\Objects\Rule;
|
||||
use Wikimedia\CSS\Objects\RuleList;
|
||||
use Wikimedia\CSS\Objects\SimpleBlock;
|
||||
use Wikimedia\CSS\Objects\Stylesheet;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
use Wikimedia\CSS\Sanitizer\Sanitizer;
|
||||
|
||||
// Note: While reading the code below, you might find that my calls to
|
||||
// consumeToken() don't match what the spec says and I don't ever "reconsume" a
|
||||
// consumeToken() don't match what the spec says, and I don't ever "reconsume" a
|
||||
// token. It turns out that the spec is overcomplicated and confused with
|
||||
// respect to the "current input token" and the "next input token". It turns
|
||||
// out things are pretty simple: every "consume an X" is called with the
|
||||
// current input token being the first token of X, and returns with the current
|
||||
// input token being the last token of X (or EOF if X ends at EOF).
|
||||
|
||||
// Also of note is that, since our Tokenizer can only return a stream of tokens
|
||||
// Also, of note is that, since our Tokenizer can only return a stream of tokens
|
||||
// rather than a stream of component values, the consume functions here only
|
||||
// consider tokens. ComponentValueList::toTokenArray() may be used to convert a
|
||||
// list of component values to a list of tokens if necessary.
|
||||
|
|
@ -38,15 +37,19 @@ use Wikimedia\CSS\Sanitizer\Sanitizer;
|
|||
* Parse CSS into a structure for further processing.
|
||||
*
|
||||
* This implements the CSS Syntax Module Level 3 candidate recommendation.
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/
|
||||
*
|
||||
* The usual entry points are:
|
||||
* - Parser::parseStylesheet() to parse a stylesheet or the contents of a <style> tag.
|
||||
* - Parser::parseDeclarationList() to parse an inline style attribute
|
||||
*/
|
||||
class Parser {
|
||||
/** Maximum depth of nested ComponentValues */
|
||||
const CV_DEPTH_LIMIT = 100; // Arbitrary number that seems like it should be enough
|
||||
/**
|
||||
* Maximum depth of nested ComponentValues
|
||||
*
|
||||
* Arbitrary number that seems like it should be enough
|
||||
*/
|
||||
private const CV_DEPTH_LIMIT = 100;
|
||||
|
||||
/** @var Tokenizer */
|
||||
protected $tokenizer;
|
||||
|
|
@ -151,57 +154,47 @@ class Parser {
|
|||
* @param array $data Extra data about the error.
|
||||
*/
|
||||
protected function parseError( $tag, Token $token, array $data = [] ) {
|
||||
list( $line, $pos ) = $token->getPosition();
|
||||
[ $line, $pos ] = $token->getPosition();
|
||||
$this->parseErrors[] = array_merge( [ $tag, $line, $pos ], $data );
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a stylesheet
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-stylesheet
|
||||
* @note Per the Editor's Draft, if the first rule is an at-rule named
|
||||
* "charset" it will be silently dropped. If you're not using the provided
|
||||
* Sanitizer classes to further sanitize the CSS, you'll want to manually
|
||||
* filter out any other such rules before stringifying the stylesheet
|
||||
* and/or prepend `@charset "utf-8";` after stringifying it.
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-stylesheet
|
||||
* @return Stylesheet
|
||||
*/
|
||||
public function parseStylesheet() {
|
||||
$this->consumeToken(); // Move to the first token
|
||||
// Move to the first token
|
||||
$this->consumeToken();
|
||||
$list = $this->consumeRuleList( true );
|
||||
|
||||
// Drop @charset per the Editor's Draft
|
||||
if ( isset( $list[0] ) && $list[0] instanceof AtRule &&
|
||||
!strcasecmp( $list[0]->getName(), 'charset' )
|
||||
) {
|
||||
$list->remove( 0 );
|
||||
$list->rewind();
|
||||
}
|
||||
|
||||
return new Stylesheet( $list );
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a list of rules
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-list-of-rules
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-list-of-rules
|
||||
* @return RuleList
|
||||
*/
|
||||
public function parseRuleList() {
|
||||
$this->consumeToken(); // Move to the first token
|
||||
// Move to the first token
|
||||
$this->consumeToken();
|
||||
return $this->consumeRuleList( false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a rule
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-rule
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-rule
|
||||
* @return Rule|null
|
||||
*/
|
||||
public function parseRule() {
|
||||
// 1. and 2.
|
||||
// 1.
|
||||
$this->consumeTokenAndWhitespace();
|
||||
|
||||
// 3.
|
||||
// 2.
|
||||
if ( $this->currentToken->type() === Token::T_EOF ) {
|
||||
$this->parseError( 'unexpected-eof', $this->currentToken ); // "return a syntax error"?
|
||||
// "return a syntax error"?
|
||||
$this->parseError( 'unexpected-eof', $this->currentToken );
|
||||
return null;
|
||||
}
|
||||
|
||||
|
|
@ -214,39 +207,39 @@ class Parser {
|
|||
}
|
||||
}
|
||||
|
||||
// 4.
|
||||
// 3.
|
||||
$this->consumeTokenAndWhitespace();
|
||||
|
||||
// 5.
|
||||
// 4.
|
||||
if ( $this->currentToken->type() === Token::T_EOF ) {
|
||||
return $rule;
|
||||
} else {
|
||||
$this->parseError( 'expected-eof', $this->currentToken ); // "return a syntax error"?
|
||||
return null;
|
||||
}
|
||||
|
||||
// "return a syntax error"?
|
||||
$this->parseError( 'expected-eof', $this->currentToken );
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a declaration
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-declaration
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-declaration
|
||||
* @return Declaration|null
|
||||
*/
|
||||
public function parseDeclaration() {
|
||||
// 1. and 2.
|
||||
// 1.
|
||||
$this->consumeTokenAndWhitespace();
|
||||
|
||||
// 3.
|
||||
// 2.
|
||||
if ( $this->currentToken->type() !== Token::T_IDENT ) {
|
||||
$this->parseError( 'expected-ident', $this->currentToken ); // "return a syntax error"?
|
||||
// "return a syntax error"?
|
||||
$this->parseError( 'expected-ident', $this->currentToken );
|
||||
return null;
|
||||
}
|
||||
|
||||
// 4.
|
||||
$declaration = $this->consumeDeclaration();
|
||||
|
||||
// 3.
|
||||
// Declarations always run to EOF, no need to check.
|
||||
|
||||
return $declaration;
|
||||
return $this->consumeDeclaration();
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -256,63 +249,66 @@ class Parser {
|
|||
* @return DeclarationList
|
||||
*/
|
||||
public function parseDeclarationList() {
|
||||
$this->consumeToken(); // Move to the first token
|
||||
// Move to the first token
|
||||
$this->consumeToken();
|
||||
return $this->consumeDeclarationOrAtRuleList( false );
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a list of declarations and at-rules
|
||||
* @note This is the entry point the standard calls "parse a list of declarations"
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-list-of-declarations
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-list-of-declarations
|
||||
* @return DeclarationOrAtRuleList
|
||||
*/
|
||||
public function parseDeclarationOrAtRuleList() {
|
||||
$this->consumeToken(); // Move to the first token
|
||||
// Move to the first token
|
||||
$this->consumeToken();
|
||||
return $this->consumeDeclarationOrAtRuleList();
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a (non-whitespace) component value
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-component-value
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-component-value
|
||||
* @return ComponentValue|null
|
||||
*/
|
||||
public function parseComponentValue() {
|
||||
// 1. and 2.
|
||||
// 1.
|
||||
$this->consumeTokenAndWhitespace();
|
||||
|
||||
// 2.
|
||||
if ( $this->currentToken->type() === Token::T_EOF ) {
|
||||
// "return a syntax error"?
|
||||
$this->parseError( 'unexpected-eof', $this->currentToken );
|
||||
return null;
|
||||
}
|
||||
|
||||
// 3.
|
||||
if ( $this->currentToken->type() === Token::T_EOF ) {
|
||||
$this->parseError( 'unexpected-eof', $this->currentToken ); // "return a syntax error"?
|
||||
return null;
|
||||
}
|
||||
$value = $this->consumeComponentValue();
|
||||
|
||||
// 4.
|
||||
$value = $this->consumeComponentValue();
|
||||
// The spec says to return a syntax error if nothing is returned, but
|
||||
// that can never happen and the Editor's Draft removed that language.
|
||||
|
||||
// 5.
|
||||
$this->consumeTokenAndWhitespace();
|
||||
|
||||
// 6.
|
||||
// 5.
|
||||
if ( $this->currentToken->type() === Token::T_EOF ) {
|
||||
return $value;
|
||||
} else {
|
||||
$this->parseError( 'expected-eof', $this->currentToken ); // "return a syntax error"?
|
||||
return null;
|
||||
}
|
||||
|
||||
// "return a syntax error"?
|
||||
$this->parseError( 'expected-eof', $this->currentToken );
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a list of component values
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-list-of-component-values
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-list-of-component-values
|
||||
* @return ComponentValueList
|
||||
*/
|
||||
public function parseComponentValueList() {
|
||||
$list = new ComponentValueList();
|
||||
while ( true ) {
|
||||
$this->consumeToken(); // Move to the first/next token
|
||||
// Move to the first/next token
|
||||
$this->consumeToken();
|
||||
$value = $this->consumeComponentValue();
|
||||
if ( $value instanceof Token && $value->type() === Token::T_EOF ) {
|
||||
break;
|
||||
|
|
@ -323,14 +319,42 @@ class Parser {
|
|||
return $list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Parse a comma-separated list of component values
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-comma-separated-list-of-component-values
|
||||
* @return ComponentValueList[]
|
||||
*/
|
||||
public function parseCommaSeparatedComponentValueList() {
|
||||
$lists = [];
|
||||
do {
|
||||
$list = new ComponentValueList();
|
||||
while ( true ) {
|
||||
// Move to the first/next token
|
||||
$this->consumeToken();
|
||||
$value = $this->consumeComponentValue();
|
||||
if ( $value instanceof Token &&
|
||||
( $value->type() === Token::T_EOF || $value->type() === Token::T_COMMA )
|
||||
) {
|
||||
break;
|
||||
}
|
||||
$list->add( $value );
|
||||
}
|
||||
$lists[] = $list;
|
||||
} while ( $value->type() === Token::T_COMMA );
|
||||
|
||||
return $lists;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a list of rules
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-list-of-rules
|
||||
* @param boolean $topLevel Determines the behavior when CDO and CDC tokens are encountered
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-list-of-rules
|
||||
* @param bool $topLevel Determines the behavior when CDO and CDC tokens are encountered
|
||||
* @return RuleList
|
||||
*/
|
||||
protected function consumeRuleList( $topLevel ) {
|
||||
// @phan-suppress-previous-line PhanPluginNeverReturnMethod
|
||||
$list = new RuleList();
|
||||
// @phan-suppress-next-line PhanInfiniteLoop
|
||||
while ( true ) {
|
||||
$rule = false;
|
||||
switch ( $this->currentToken->type() ) {
|
||||
|
|
@ -342,11 +366,10 @@ class Parser {
|
|||
|
||||
case Token::T_CDO:
|
||||
case Token::T_CDC:
|
||||
if ( $topLevel ) {
|
||||
// Do nothing
|
||||
} else {
|
||||
if ( !$topLevel ) {
|
||||
$rule = $this->consumeQualifiedRule();
|
||||
}
|
||||
// Else, do nothing
|
||||
break;
|
||||
|
||||
case Token::T_AT_KEYWORD:
|
||||
|
|
@ -364,18 +387,21 @@ class Parser {
|
|||
$this->consumeToken();
|
||||
}
|
||||
|
||||
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
|
||||
return $list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a list of declarations and at-rules
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-list-of-declarations
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-list-of-declarations
|
||||
* @param bool $allowAtRules Whether to allow at-rules. This flag is not in
|
||||
* the spec, and is used to implement the non-spec self::parseDeclarationList().
|
||||
* the spec and is used to implement the non-spec self::parseDeclarationList().
|
||||
* @return DeclarationOrAtRuleList|DeclarationList
|
||||
*/
|
||||
protected function consumeDeclarationOrAtRuleList( $allowAtRules = true ) {
|
||||
// @phan-suppress-previous-line PhanPluginNeverReturnMethod
|
||||
$list = $allowAtRules ? new DeclarationOrAtRuleList() : new DeclarationList();
|
||||
// @phan-suppress-next-line PhanInfiniteLoop
|
||||
while ( true ) {
|
||||
$declaration = false;
|
||||
switch ( $this->currentToken->type() ) {
|
||||
|
|
@ -400,7 +426,6 @@ class Parser {
|
|||
break;
|
||||
|
||||
case Token::T_IDENT:
|
||||
// The draft changes this to ComponentValue instead of Token, which makes more sense.
|
||||
$cvs = [];
|
||||
do {
|
||||
$cvs[] = $this->consumeComponentValue();
|
||||
|
|
@ -411,7 +436,8 @@ class Parser {
|
|||
);
|
||||
$tokens = ( new ComponentValueList( $cvs ) )->toTokenArray();
|
||||
$parser = static::newFromTokens( $tokens, $this->currentToken );
|
||||
$parser->consumeToken(); // Load that first token
|
||||
// Load that first token
|
||||
$parser->consumeToken();
|
||||
$declaration = $parser->consumeDeclaration();
|
||||
// Propagate any errors
|
||||
$this->parseErrors = array_merge( $this->parseErrors, $parser->parseErrors );
|
||||
|
|
@ -436,32 +462,32 @@ class Parser {
|
|||
$this->consumeToken();
|
||||
}
|
||||
|
||||
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
|
||||
return $list;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a declaration
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-declaration
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-declaration
|
||||
* @return Declaration|null
|
||||
*/
|
||||
protected function consumeDeclaration() {
|
||||
$declaration = new Declaration( $this->currentToken );
|
||||
|
||||
// 2.
|
||||
// 1.
|
||||
$this->consumeTokenAndWhitespace();
|
||||
|
||||
// 3.
|
||||
// 2. and 3.
|
||||
if ( $this->currentToken->type() !== Token::T_COLON ) {
|
||||
$this->parseError( 'expected-colon', $this->currentToken );
|
||||
return null;
|
||||
}
|
||||
$this->consumeToken();
|
||||
$this->consumeTokenAndWhitespace();
|
||||
|
||||
// 4.
|
||||
$value = $declaration->getValue();
|
||||
$l1 = $l2 = -1;
|
||||
while ( $this->currentToken->type() !== Token::T_EOF ) {
|
||||
// The draft changes this to ComponentValue instead of Token, which makes more sense.
|
||||
$value->add( $this->consumeComponentValue() );
|
||||
if ( $this->currentToken->type() !== Token::T_WHITESPACE ) {
|
||||
$l1 = $l2;
|
||||
|
|
@ -470,48 +496,62 @@ class Parser {
|
|||
$this->consumeToken();
|
||||
}
|
||||
|
||||
// 5.
|
||||
// 5. and part of 6.
|
||||
// @phan-suppress-next-line PhanSuspiciousValueComparison False positive about $l1 is -1
|
||||
$v1 = $l1 >= 0 ? $value[$l1] : null;
|
||||
$v2 = $l2 >= 0 ? $value[$l2] : null;
|
||||
if ( $v1 instanceof Token && $v1->type() === Token::T_DELIM && $v1->value() === '!' &&
|
||||
$v2 instanceof Token && $v2->type() === Token::T_IDENT &&
|
||||
if ( $v1 instanceof Token &&
|
||||
$v1->type() === Token::T_DELIM &&
|
||||
$v1->value() === '!' &&
|
||||
$v2 instanceof Token &&
|
||||
$v2->type() === Token::T_IDENT &&
|
||||
!strcasecmp( $v2->value(), 'important' )
|
||||
) {
|
||||
// Technically it doesn't say to remove any whitespace within/after
|
||||
// the "!important" too, but it makes sense to do so.
|
||||
// This removes the "!" and "important" (5), and also any whitespace between/after (6)
|
||||
while ( isset( $value[$l1] ) ) {
|
||||
$value->remove( $l1 );
|
||||
}
|
||||
$declaration->setImportant( true );
|
||||
}
|
||||
|
||||
// 6.
|
||||
// Rest of 6.
|
||||
$i = $value->count();
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
while ( --$i >= 0 && $value[$i] instanceof Token && $value[$i]->type() === Token::T_WHITESPACE ) {
|
||||
$value->remove( $i );
|
||||
}
|
||||
|
||||
// 7.
|
||||
return $declaration;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume an at-rule
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-an-at-rule
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-at-rule
|
||||
* @return AtRule
|
||||
* @suppress PhanPluginNeverReturnMethod due to break 2;
|
||||
*/
|
||||
protected function consumeAtRule() {
|
||||
$rule = new AtRule( $this->currentToken );
|
||||
$this->consumeToken();
|
||||
// @phan-suppress-next-line PhanInfiniteLoop
|
||||
while ( true ) {
|
||||
switch ( $this->currentToken->type() ) {
|
||||
case Token::T_SEMICOLON:
|
||||
return $rule;
|
||||
break 2;
|
||||
|
||||
case Token::T_EOF:
|
||||
// Parse error from the editor's draft as of 2017-01-11
|
||||
if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
|
||||
$this->parseError( 'unexpected-eof-in-rule', $this->currentToken );
|
||||
}
|
||||
return $rule;
|
||||
break 2;
|
||||
|
||||
case Token::T_LEFT_BRACE:
|
||||
$rule->setBlock( $this->consumeSimpleBlock( true ) );
|
||||
return $rule;
|
||||
$rule->setBlock( $this->consumeSimpleBlock() );
|
||||
break 2;
|
||||
|
||||
// Spec has "simple block with an associated token of <{-token>" here, but that isn't possible
|
||||
// because it's not a Token.
|
||||
|
||||
default:
|
||||
$rule->getPrelude()->add( $this->consumeComponentValue() );
|
||||
|
|
@ -519,13 +559,14 @@ class Parser {
|
|||
}
|
||||
$this->consumeToken();
|
||||
}
|
||||
// @codeCoverageIgnoreStart
|
||||
|
||||
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
|
||||
return $rule;
|
||||
}
|
||||
// @codeCoverageIgnoreEnd
|
||||
|
||||
/**
|
||||
* Consume a qualified rule
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-qualified-rule
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-qualified-rule
|
||||
* @return QualifiedRule|null
|
||||
*/
|
||||
protected function consumeQualifiedRule() {
|
||||
|
|
@ -539,8 +580,11 @@ class Parser {
|
|||
return null;
|
||||
|
||||
case Token::T_LEFT_BRACE:
|
||||
$rule->setBlock( $this->consumeSimpleBlock( true ) );
|
||||
return $rule;
|
||||
$rule->setBlock( $this->consumeSimpleBlock() );
|
||||
break 2;
|
||||
|
||||
// Spec has "simple block with an associated token of <{-token>" here, but that isn't possible
|
||||
// because it's not a Token.
|
||||
|
||||
default:
|
||||
$rule->getPrelude()->add( $this->consumeComponentValue() );
|
||||
|
|
@ -548,13 +592,14 @@ class Parser {
|
|||
}
|
||||
$this->consumeToken();
|
||||
}
|
||||
// @codeCoverageIgnoreStart
|
||||
|
||||
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
|
||||
return $rule;
|
||||
}
|
||||
// @codeCoverageIgnoreEnd
|
||||
|
||||
/**
|
||||
* Consume a component value
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-component-value
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-component-value
|
||||
* @return ComponentValue
|
||||
*/
|
||||
protected function consumeComponentValue() {
|
||||
|
|
@ -562,7 +607,7 @@ class Parser {
|
|||
$this->parseError( 'recursion-depth-exceeded', $this->currentToken );
|
||||
// There's no way to safely recover from this without more recursion.
|
||||
// So just eat the rest of the input, then return a
|
||||
// specially-flagged EOF so we can avoid 100 "unexpected EOF"
|
||||
// specially-flagged EOF, so we can avoid 100 "unexpected EOF"
|
||||
// errors.
|
||||
$position = $this->currentToken->getPosition();
|
||||
while ( $this->currentToken->type() !== Token::T_EOF ) {
|
||||
|
|
@ -591,29 +636,31 @@ class Parser {
|
|||
}
|
||||
|
||||
$this->cvDepth--;
|
||||
// @phan-suppress-next-line PhanTypeMismatchReturnNullable $ret always set
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Consume a simple block
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-simple-block
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-simple-block
|
||||
* @return SimpleBlock
|
||||
* @suppress PhanPluginNeverReturnMethod due to break 2;
|
||||
*/
|
||||
protected function consumeSimpleBlock() {
|
||||
$block = new SimpleBlock( $this->currentToken );
|
||||
$endTokenType = $block->getEndTokenType();
|
||||
$this->consumeToken();
|
||||
// @phan-suppress-next-line PhanInfiniteLoop
|
||||
while ( true ) {
|
||||
switch ( $this->currentToken->type() ) {
|
||||
case Token::T_EOF:
|
||||
// Parse error from the editor's draft as of 2017-01-12
|
||||
if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
|
||||
$this->parseError( 'unexpected-eof-in-block', $this->currentToken );
|
||||
}
|
||||
return $block;
|
||||
break 2;
|
||||
|
||||
case $endTokenType:
|
||||
return $block;
|
||||
break 2;
|
||||
|
||||
default:
|
||||
$block->getValue()->add( $this->consumeComponentValue() );
|
||||
|
|
@ -621,30 +668,32 @@ class Parser {
|
|||
}
|
||||
$this->consumeToken();
|
||||
}
|
||||
// @codeCoverageIgnoreStart
|
||||
|
||||
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
|
||||
return $block;
|
||||
}
|
||||
// @codeCoverageIgnoreEnd
|
||||
|
||||
/**
|
||||
* Consume a function
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-function
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-function
|
||||
* @return CSSFunction
|
||||
* @suppress PhanPluginNeverReturnMethod due to break 2;
|
||||
*/
|
||||
protected function consumeFunction() {
|
||||
$function = new CSSFunction( $this->currentToken );
|
||||
$this->consumeToken();
|
||||
|
||||
// @phan-suppress-next-line PhanInfiniteLoop
|
||||
while ( true ) {
|
||||
switch ( $this->currentToken->type() ) {
|
||||
case Token::T_EOF:
|
||||
// Parse error from the editor's draft as of 2017-01-12
|
||||
if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
|
||||
$this->parseError( 'unexpected-eof-in-function', $this->currentToken );
|
||||
}
|
||||
return $function;
|
||||
break 2;
|
||||
|
||||
case Token::T_RIGHT_PAREN:
|
||||
return $function;
|
||||
break 2;
|
||||
|
||||
default:
|
||||
$function->getValue()->add( $this->consumeComponentValue() );
|
||||
|
|
@ -652,7 +701,10 @@ class Parser {
|
|||
}
|
||||
$this->consumeToken();
|
||||
}
|
||||
// @codeCoverageIgnoreStart
|
||||
|
||||
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
|
||||
return $function;
|
||||
}
|
||||
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,6 +6,9 @@
|
|||
|
||||
namespace Wikimedia\CSS\Parser;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use UnexpectedValueException;
|
||||
|
||||
/**
|
||||
* Read data for the CSS parser
|
||||
*/
|
||||
|
|
@ -15,7 +18,10 @@ class StringDataSource implements DataSource {
|
|||
protected $string;
|
||||
|
||||
/** @var int */
|
||||
protected $len = 0, $pos = 0;
|
||||
protected $len = 0;
|
||||
|
||||
/** @var int */
|
||||
protected $pos = 0;
|
||||
|
||||
/** @var string[] */
|
||||
protected $putBack = [];
|
||||
|
|
@ -24,28 +30,15 @@ class StringDataSource implements DataSource {
|
|||
* @param string $string Input string. Must be valid UTF-8 with no BOM.
|
||||
*/
|
||||
public function __construct( $string ) {
|
||||
static $newPHP;
|
||||
|
||||
$this->string = (string)$string;
|
||||
$this->len = strlen( $this->string );
|
||||
|
||||
// HHVM 3.4 and older come with an outdated version of libmbfl that
|
||||
// incorrectly allows values above U+10FFFF, so we have to check
|
||||
// for them separately. (This issue also exists in PHP 5.3 and
|
||||
// older, which are no longer supported.)
|
||||
// @codeCoverageIgnoreStart
|
||||
if ( $newPHP === null ) {
|
||||
$newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
|
||||
}
|
||||
// @codeCoverageIgnoreEnd
|
||||
|
||||
if ( !mb_check_encoding( $this->string, 'UTF-8' ) ||
|
||||
!$newPHP && preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $this->string ) !== 0
|
||||
) {
|
||||
throw new \InvalidArgumentException( '$string is not valid UTF-8' );
|
||||
if ( !mb_check_encoding( $this->string, 'UTF-8' ) ) {
|
||||
throw new InvalidArgumentException( '$string is not valid UTF-8' );
|
||||
}
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function readCharacter() {
|
||||
if ( $this->putBack ) {
|
||||
return array_pop( $this->putBack );
|
||||
|
|
@ -61,7 +54,7 @@ class StringDataSource implements DataSource {
|
|||
$c = $this->string[$p];
|
||||
$cc = ord( $this->string[$p] );
|
||||
if ( $cc <= 0x7f ) {
|
||||
$this->pos += 1;
|
||||
$this->pos++;
|
||||
return $c;
|
||||
} elseif ( ( $cc & 0xe0 ) === 0xc0 ) {
|
||||
$this->pos += 2;
|
||||
|
|
@ -76,13 +69,14 @@ class StringDataSource implements DataSource {
|
|||
// WTF? Should never get here because it should have failed
|
||||
// validation in the constructor.
|
||||
// @codeCoverageIgnoreStart
|
||||
throw new \UnexpectedValueException(
|
||||
throw new UnexpectedValueException(
|
||||
sprintf( 'Unexpected byte %02X in string at position %d.', $cc, $this->pos )
|
||||
);
|
||||
// @codeCoverageIgnoreEnd
|
||||
}
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function putBackCharacter( $char ) {
|
||||
if ( $char !== self::EOF ) {
|
||||
$this->putBack[] = $char;
|
||||
|
|
|
|||
|
|
@ -6,9 +6,10 @@
|
|||
|
||||
namespace Wikimedia\CSS\Parser;
|
||||
|
||||
use Wikimedia\CSS\Util;
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
use Wikimedia\CSS\Objects\TokenList;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
* Tokenizer that just returns a predefined list of tokens
|
||||
|
|
@ -32,7 +33,7 @@ class TokenListTokenizer implements Tokenizer {
|
|||
Util::assertAllInstanceOf( $tokens, Token::class, '$tokens' );
|
||||
$this->tokens = $tokens;
|
||||
} else {
|
||||
throw new \InvalidArgumentException( '$tokens must be a TokenList or an array of tokens' );
|
||||
throw new InvalidArgumentException( '$tokens must be a TokenList or an array of tokens' );
|
||||
}
|
||||
|
||||
if ( $eof && $eof->type() === Token::T_EOF ) {
|
||||
|
|
@ -46,13 +47,16 @@ class TokenListTokenizer implements Tokenizer {
|
|||
}
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function getParseErrors() {
|
||||
return [];
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function clearParseErrors() {
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function consumeToken() {
|
||||
return array_shift( $this->tokens ) ?: $this->eof;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ use Wikimedia\CSS\Grammar\MatcherFactory;
|
|||
use Wikimedia\CSS\Grammar\Quantifier;
|
||||
use Wikimedia\CSS\Grammar\TokenMatcher;
|
||||
use Wikimedia\CSS\Grammar\UnorderedGroup;
|
||||
use Wikimedia\CSS\Grammar\UrangeMatcher;
|
||||
use Wikimedia\CSS\Objects\AtRule;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Objects\Rule;
|
||||
|
|
@ -22,7 +23,7 @@ use Wikimedia\CSS\Util;
|
|||
|
||||
/**
|
||||
* Sanitizes a CSS \@font-face rule
|
||||
* @see https://www.w3.org/TR/2013/CR-css-fonts-3-20131003/#font-resources
|
||||
* @see https://www.w3.org/TR/2018/REC-css-fonts-3-20180920/#font-resources
|
||||
*/
|
||||
class FontFaceAtRuleSanitizer extends RuleSanitizer {
|
||||
|
||||
|
|
@ -52,13 +53,7 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
|
|||
new KeywordMatcher( [ 'normal', 'bold' ] ), $matchData['numWeight']
|
||||
] ),
|
||||
'font-stretch' => $matchData['font-stretch'],
|
||||
'unicode-range' => Quantifier::hash(
|
||||
new TokenMatcher( Token::T_UNICODE_RANGE, function ( Token $t ) {
|
||||
list( $start, $end ) = $t->range();
|
||||
return $start <= $end && $end <= 0x10ffff;
|
||||
} )
|
||||
),
|
||||
'font-variant' => $matchData['font-variant'],
|
||||
'unicode-range' => Quantifier::hash( new UrangeMatcher() ),
|
||||
'font-feature-settings' => $matchData['font-feature-settings'],
|
||||
] );
|
||||
}
|
||||
|
|
@ -69,14 +64,12 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
|
|||
* @return array
|
||||
*/
|
||||
public static function fontMatchData( MatcherFactory $matcherFactory ) {
|
||||
$featureValueName = $matcherFactory->ident();
|
||||
$featureValueNameHash = Quantifier::hash( $featureValueName );
|
||||
$ret = [
|
||||
'familyName' => new Alternative( [
|
||||
$matcherFactory->string(),
|
||||
Quantifier::plus( $matcherFactory->ident() ),
|
||||
] ),
|
||||
'numWeight' => new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
|
||||
'numWeight' => new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
|
||||
return $t->typeFlag() === 'integer' && preg_match( '/^[1-9]00$/', $t->representation() );
|
||||
} ),
|
||||
'font-style' => new KeywordMatcher( [ 'normal', 'italic', 'oblique' ] ),
|
||||
|
|
@ -87,7 +80,7 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
|
|||
'font-feature-settings' => new Alternative( [
|
||||
new KeywordMatcher( 'normal' ),
|
||||
Quantifier::hash( new Juxtaposition( [
|
||||
new TokenMatcher( Token::T_STRING, function ( Token $t ) {
|
||||
new TokenMatcher( Token::T_STRING, static function ( Token $t ) {
|
||||
return preg_match( '/^[\x20-\x7e]{4}$/', $t->value() );
|
||||
} ),
|
||||
Quantifier::optional( new Alternative( [
|
||||
|
|
@ -102,15 +95,6 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
|
|||
new KeywordMatcher( [ 'historical-ligatures', 'no-historical-ligatures' ] ),
|
||||
new KeywordMatcher( [ 'contextual', 'no-contextual' ] )
|
||||
],
|
||||
'alt' => [
|
||||
new FunctionMatcher( 'stylistic', $featureValueName ),
|
||||
new KeywordMatcher( 'historical-forms' ),
|
||||
new FunctionMatcher( 'styleset', $featureValueNameHash ),
|
||||
new FunctionMatcher( 'character-variant', $featureValueNameHash ),
|
||||
new FunctionMatcher( 'swash', $featureValueName ),
|
||||
new FunctionMatcher( 'ornaments', $featureValueName ),
|
||||
new FunctionMatcher( 'annotation', $featureValueName ),
|
||||
],
|
||||
'capsKeywords' => [
|
||||
'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase', 'titling-caps'
|
||||
],
|
||||
|
|
@ -125,27 +109,32 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
|
|||
new KeywordMatcher( [ 'jis78', 'jis83', 'jis90', 'jis04', 'simplified', 'traditional' ] ),
|
||||
new KeywordMatcher( [ 'full-width', 'proportional-width' ] ),
|
||||
new KeywordMatcher( 'ruby' ),
|
||||
]
|
||||
],
|
||||
'positionKeywords' => [
|
||||
'sub', 'super',
|
||||
],
|
||||
];
|
||||
$ret['font-variant'] = new Alternative( [
|
||||
new KeywordMatcher( [ 'normal', 'none' ] ),
|
||||
UnorderedGroup::someOf( array_merge(
|
||||
$ret['ligatures'],
|
||||
$ret['alt'],
|
||||
[ new KeywordMatcher( $ret['capsKeywords'] ) ],
|
||||
$ret['numeric'],
|
||||
$ret['eastAsian']
|
||||
$ret['eastAsian'],
|
||||
[ new KeywordMatcher( $ret['positionKeywords'] ) ]
|
||||
) )
|
||||
] );
|
||||
return $ret;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'font-face' );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-at-rule', $object, [ 'font-face' ] );
|
||||
return null;
|
||||
}
|
||||
|
|
@ -161,7 +150,7 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
|
|||
return null;
|
||||
}
|
||||
|
||||
$ret = clone( $object );
|
||||
$ret = clone $object;
|
||||
$this->fixPreludeWhitespace( $ret, false );
|
||||
$this->sanitizeDeclarationBlock( $ret->getBlock(), $this->propertySanitizer );
|
||||
|
||||
|
|
|
|||
|
|
@ -1,83 +0,0 @@
|
|||
<?php
|
||||
/**
|
||||
* @file
|
||||
* @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
|
||||
*/
|
||||
|
||||
namespace Wikimedia\CSS\Sanitizer;
|
||||
|
||||
use Wikimedia\CSS\Grammar\Matcher;
|
||||
use Wikimedia\CSS\Objects\AtRule;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\DeclarationList;
|
||||
use Wikimedia\CSS\Objects\Rule;
|
||||
use Wikimedia\CSS\Parser\Parser;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
* Sanitizes a feature-value at-rule inside a CSS \@font-feature-values rule
|
||||
* @see https://www.w3.org/TR/2013/CR-css-fonts-3-20131003/#at-font-feature-values-rule
|
||||
*/
|
||||
class FontFeatureValueAtRuleSanitizer extends RuleSanitizer {
|
||||
|
||||
/** @var string */
|
||||
protected $name;
|
||||
|
||||
/** @var Matcher */
|
||||
protected $valueMatcher;
|
||||
|
||||
/**
|
||||
* @param string $name
|
||||
* @param Matcher $valueMatcher
|
||||
*/
|
||||
public function __construct( $name, Matcher $valueMatcher ) {
|
||||
$this->name = $name;
|
||||
$this->valueMatcher = $valueMatcher;
|
||||
}
|
||||
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), $this->name );
|
||||
}
|
||||
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-at-rule', $object, [ $this->name ] );
|
||||
return null;
|
||||
}
|
||||
|
||||
if ( $object->getBlock() === null ) {
|
||||
$this->sanitizationError( 'at-rule-block-required', $object, [ $this->name ] );
|
||||
return null;
|
||||
}
|
||||
|
||||
// No non-whitespace prelude allowed
|
||||
if ( Util::findFirstNonWhitespace( $object->getPrelude() ) ) {
|
||||
$this->sanitizationError( 'invalid-font-feature-value', $object, [ $this->name ] );
|
||||
return null;
|
||||
}
|
||||
|
||||
$ret = clone( $object );
|
||||
$this->fixPreludeWhitespace( $ret, false );
|
||||
|
||||
// Parse the block's contents into a list of declarations, sanitize it,
|
||||
// and put it back into the block.
|
||||
$blockContents = $ret->getBlock()->getValue();
|
||||
$parser = Parser::newFromTokens( $blockContents->toTokenArray() );
|
||||
$oldDeclarations = $parser->parseDeclarationList();
|
||||
$this->sanitizationErrors = array_merge( $this->sanitizationErrors, $parser->getParseErrors() );
|
||||
$newDeclarations = new DeclarationList();
|
||||
foreach ( $oldDeclarations as $declaration ) {
|
||||
if ( $this->valueMatcher->match( $declaration->getValue(), [ 'mark-significance' => true ] ) ) {
|
||||
$newDeclarations->add( $declaration );
|
||||
} else {
|
||||
$this->sanitizationError( 'invalid-font-feature-value-declaration', $declaration,
|
||||
[ $this->name ] );
|
||||
}
|
||||
}
|
||||
$blockContents->clear();
|
||||
$blockContents->add( $newDeclarations->toComponentValueArray() );
|
||||
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,84 +0,0 @@
|
|||
<?php
|
||||
/**
|
||||
* @file
|
||||
* @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
|
||||
*/
|
||||
|
||||
namespace Wikimedia\CSS\Sanitizer;
|
||||
|
||||
use Wikimedia\CSS\Grammar\Alternative;
|
||||
use Wikimedia\CSS\Grammar\Matcher;
|
||||
use Wikimedia\CSS\Grammar\MatcherFactory;
|
||||
use Wikimedia\CSS\Grammar\Quantifier;
|
||||
use Wikimedia\CSS\Objects\AtRule;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Objects\Rule;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
* Sanitizes a CSS \@font-feature-values rule
|
||||
* @see https://www.w3.org/TR/2013/CR-css-fonts-3-20131003/#at-font-feature-values-rule
|
||||
*/
|
||||
class FontFeatureValuesAtRuleSanitizer extends RuleSanitizer {
|
||||
|
||||
/** @var Matcher */
|
||||
protected $fontListMatcher;
|
||||
|
||||
/** @var FontFeatureValueAtRuleSanitizer[] */
|
||||
protected $ruleSanitizers;
|
||||
|
||||
/**
|
||||
* @param MatcherFactory $matcherFactory
|
||||
*/
|
||||
public function __construct( MatcherFactory $matcherFactory ) {
|
||||
$this->fontListMatcher = Quantifier::hash( new Alternative( [
|
||||
$matcherFactory->string(),
|
||||
Quantifier::plus( $matcherFactory->ident() ),
|
||||
] ) );
|
||||
|
||||
$n = $matcherFactory->rawNumber();
|
||||
$n2 = Quantifier::count( $n, 1, 2 );
|
||||
$nPlus = Quantifier::plus( $n );
|
||||
$this->ruleSanitizers = [
|
||||
new FontFeatureValueAtRuleSanitizer( 'stylistic', $n ),
|
||||
new FontFeatureValueAtRuleSanitizer( 'styleset', $nPlus ),
|
||||
new FontFeatureValueAtRuleSanitizer( 'character-variant', $n2 ),
|
||||
new FontFeatureValueAtRuleSanitizer( 'swash', $n ),
|
||||
new FontFeatureValueAtRuleSanitizer( 'ornaments', $n ),
|
||||
new FontFeatureValueAtRuleSanitizer( 'annotation', $n ),
|
||||
];
|
||||
}
|
||||
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'font-feature-values' );
|
||||
}
|
||||
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-at-rule', $object, [ 'font-feature-values' ] );
|
||||
return null;
|
||||
}
|
||||
|
||||
if ( $object->getBlock() === null ) {
|
||||
$this->sanitizationError( 'at-rule-block-required', $object, [ 'font-feature-values' ] );
|
||||
return null;
|
||||
}
|
||||
|
||||
// Test the page selector
|
||||
if ( !$this->fontListMatcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
|
||||
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
|
||||
if ( $cv ) {
|
||||
$this->sanitizationError( 'invalid-font-feature-values-font-list', $cv );
|
||||
} else {
|
||||
$this->sanitizationError( 'missing-font-feature-values-font-list', $object );
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
$ret = clone( $object );
|
||||
$this->fixPreludeWhitespace( $ret, false );
|
||||
$this->sanitizeRuleBlock( $ret->getBlock(), $this->ruleSanitizers );
|
||||
|
||||
return $ret;
|
||||
}
|
||||
}
|
||||
|
|
@ -7,6 +7,7 @@
|
|||
namespace Wikimedia\CSS\Sanitizer;
|
||||
|
||||
use Wikimedia\CSS\Grammar\Alternative;
|
||||
use Wikimedia\CSS\Grammar\FunctionMatcher;
|
||||
use Wikimedia\CSS\Grammar\Juxtaposition;
|
||||
use Wikimedia\CSS\Grammar\Matcher;
|
||||
use Wikimedia\CSS\Grammar\MatcherFactory;
|
||||
|
|
@ -18,7 +19,7 @@ use Wikimedia\CSS\Util;
|
|||
|
||||
/**
|
||||
* Sanitizes a CSS \@import rule
|
||||
* @see https://www.w3.org/TR/2016/CR-css-cascade-3-20160519/#at-import
|
||||
* @see https://www.w3.org/TR/2018/CR-css-cascade-4-20180828/#at-import
|
||||
*/
|
||||
class ImportAtRuleSanitizer extends RuleSanitizer {
|
||||
|
||||
|
|
@ -27,27 +28,41 @@ class ImportAtRuleSanitizer extends RuleSanitizer {
|
|||
|
||||
/**
|
||||
* @param MatcherFactory $matcherFactory
|
||||
* @param array $options Additional options:
|
||||
* - strict: (bool) Only accept defined syntax in supports(). Default true.
|
||||
* - declarationSanitizer: (PropertySanitizer) Check supports() declarations against this
|
||||
* Sanitizer.
|
||||
*/
|
||||
public function __construct( MatcherFactory $matcherFactory ) {
|
||||
public function __construct( MatcherFactory $matcherFactory, array $options = [] ) {
|
||||
$declarationSanitizer = $options['declarationSanitizer'] ?? null;
|
||||
$strict = $options['strict'] ?? true;
|
||||
|
||||
$this->matcher = new Juxtaposition( [
|
||||
new Alternative( [
|
||||
$matcherFactory->url( 'css' ),
|
||||
$matcherFactory->urlstring( 'css' ),
|
||||
] ),
|
||||
Quantifier::optional( new FunctionMatcher( 'supports', new Alternative( [
|
||||
$matcherFactory->cssSupportsCondition( $declarationSanitizer, $strict ),
|
||||
$matcherFactory->cssDeclaration( $declarationSanitizer ),
|
||||
] ) ) ),
|
||||
$matcherFactory->cssMediaQueryList(),
|
||||
] );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function getIndex() {
|
||||
return -1000;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'import' );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-at-rule', $object, [ 'import' ] );
|
||||
return null;
|
||||
}
|
||||
|
|
@ -56,7 +71,7 @@ class ImportAtRuleSanitizer extends RuleSanitizer {
|
|||
$this->sanitizationError( 'at-rule-block-not-allowed', $object->getBlock(), [ 'import' ] );
|
||||
return null;
|
||||
}
|
||||
if ( !$this->matcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
|
||||
if ( !$this->matcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
|
||||
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
|
||||
if ( $cv ) {
|
||||
$this->sanitizationError( 'invalid-import-value', $cv );
|
||||
|
|
@ -65,8 +80,6 @@ class ImportAtRuleSanitizer extends RuleSanitizer {
|
|||
}
|
||||
return null;
|
||||
}
|
||||
$object = $this->fixPreludeWhitespace( $object, true );
|
||||
|
||||
return $object;
|
||||
return $this->fixPreludeWhitespace( $object, true );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,12 +18,12 @@ use Wikimedia\CSS\Util;
|
|||
|
||||
/**
|
||||
* Sanitizes a CSS \@keyframes rule
|
||||
* @see https://www.w3.org/TR/2013/WD-css3-animations-20130219/#keyframes
|
||||
* @see https://www.w3.org/TR/2018/WD-css-animations-1-20181011/#keyframes
|
||||
*/
|
||||
class KeyframesAtRuleSanitizer extends RuleSanitizer {
|
||||
|
||||
/** @var Matcher */
|
||||
protected $identMatcher;
|
||||
protected $nameMatcher;
|
||||
|
||||
/** @var Sanitizer */
|
||||
protected $ruleSanitizer;
|
||||
|
|
@ -35,7 +35,10 @@ class KeyframesAtRuleSanitizer extends RuleSanitizer {
|
|||
public function __construct(
|
||||
MatcherFactory $matcherFactory, PropertySanitizer $propertySanitizer
|
||||
) {
|
||||
$this->identMatcher = $matcherFactory->ident();
|
||||
$this->nameMatcher = new Alternative( [
|
||||
$matcherFactory->customIdent( [ 'none' ] ),
|
||||
$matcherFactory->string(),
|
||||
] );
|
||||
$this->ruleSanitizer = new StyleRuleSanitizer(
|
||||
Quantifier::hash( new Alternative( [
|
||||
new KeywordMatcher( [ 'from', 'to' ] ), $matcherFactory->rawPercentage()
|
||||
|
|
@ -44,12 +47,14 @@ class KeyframesAtRuleSanitizer extends RuleSanitizer {
|
|||
);
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'keyframes' );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-at-rule', $object, [ 'keyframes' ] );
|
||||
return null;
|
||||
}
|
||||
|
|
@ -60,7 +65,7 @@ class KeyframesAtRuleSanitizer extends RuleSanitizer {
|
|||
}
|
||||
|
||||
// Test the keyframe name
|
||||
if ( !$this->identMatcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
|
||||
if ( !$this->nameMatcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
|
||||
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
|
||||
if ( $cv ) {
|
||||
$this->sanitizationError( 'invalid-keyframe-name', $cv );
|
||||
|
|
@ -70,7 +75,7 @@ class KeyframesAtRuleSanitizer extends RuleSanitizer {
|
|||
return null;
|
||||
}
|
||||
|
||||
$ret = clone( $object );
|
||||
$ret = clone $object;
|
||||
$this->fixPreludeWhitespace( $ret, false );
|
||||
$this->sanitizeRuleBlock( $ret->getBlock(), [ $this->ruleSanitizer ] );
|
||||
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ use Wikimedia\CSS\Util;
|
|||
|
||||
/**
|
||||
* Sanitizes the margin at-rules inside a CSS \@page rule
|
||||
* @see https://www.w3.org/TR/2013/WD-css3-page-20130314/
|
||||
* @see https://www.w3.org/TR/2018/WD-css-page-3-20181018/
|
||||
*/
|
||||
class MarginAtRuleSanitizer extends RuleSanitizer {
|
||||
|
||||
|
|
@ -34,13 +34,15 @@ class MarginAtRuleSanitizer extends RuleSanitizer {
|
|||
$this->propertySanitizer = $propertySanitizer;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule &&
|
||||
in_array( strtolower( $rule->getName() ), self::$marginRuleNames, true );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-page-margin-at-rule', $object );
|
||||
return null;
|
||||
}
|
||||
|
|
@ -56,7 +58,7 @@ class MarginAtRuleSanitizer extends RuleSanitizer {
|
|||
return null;
|
||||
}
|
||||
|
||||
$ret = clone( $object );
|
||||
$ret = clone $object;
|
||||
$this->fixPreludeWhitespace( $ret, false );
|
||||
$this->sanitizeDeclarationBlock( $ret->getBlock(), $this->propertySanitizer );
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
namespace Wikimedia\CSS\Sanitizer;
|
||||
|
||||
use Wikimedia\CSS\Grammar\Matcher;
|
||||
use Wikimedia\CSS\Grammar\MatcherFactory;
|
||||
use Wikimedia\CSS\Objects\AtRule;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Objects\Rule;
|
||||
|
|
@ -50,12 +49,14 @@ class MediaAtRuleSanitizer extends RuleSanitizer {
|
|||
$this->ruleSanitizers = $ruleSanitizers;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'media' );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-at-rule', $object, [ 'media' ] );
|
||||
return null;
|
||||
}
|
||||
|
|
@ -66,7 +67,7 @@ class MediaAtRuleSanitizer extends RuleSanitizer {
|
|||
}
|
||||
|
||||
// Test the media query
|
||||
$match = $this->mediaQueryListMatcher->match(
|
||||
$match = $this->mediaQueryListMatcher->matchAgainst(
|
||||
$object->getPrelude(), [ 'mark-significance' => true ]
|
||||
);
|
||||
if ( !$match ) {
|
||||
|
|
@ -75,7 +76,7 @@ class MediaAtRuleSanitizer extends RuleSanitizer {
|
|||
return null;
|
||||
}
|
||||
|
||||
$ret = clone( $object );
|
||||
$ret = clone $object;
|
||||
$this->fixPreludeWhitespace( $ret, false );
|
||||
$this->sanitizeRuleBlock( $ret->getBlock(), $this->ruleSanitizers );
|
||||
|
||||
|
|
|
|||
|
|
@ -38,16 +38,19 @@ class NamespaceAtRuleSanitizer extends RuleSanitizer {
|
|||
] );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function getIndex() {
|
||||
return -900;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'namespace' );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-at-rule', $object, [ 'namespace' ] );
|
||||
return null;
|
||||
}
|
||||
|
|
@ -56,7 +59,7 @@ class NamespaceAtRuleSanitizer extends RuleSanitizer {
|
|||
$this->sanitizationError( 'at-rule-block-not-allowed', $object->getBlock(), [ 'namespace' ] );
|
||||
return null;
|
||||
}
|
||||
if ( !$this->matcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
|
||||
if ( !$this->matcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
|
||||
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
|
||||
if ( $cv ) {
|
||||
$this->sanitizationError( 'invalid-namespace-value', $cv );
|
||||
|
|
@ -65,8 +68,6 @@ class NamespaceAtRuleSanitizer extends RuleSanitizer {
|
|||
}
|
||||
return null;
|
||||
}
|
||||
$object = $this->fixPreludeWhitespace( $object, true );
|
||||
|
||||
return $object;
|
||||
return $this->fixPreludeWhitespace( $object, true );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ use Wikimedia\CSS\Util;
|
|||
|
||||
/**
|
||||
* Sanitizes a CSS \@page rule
|
||||
* @see https://www.w3.org/TR/2013/WD-css3-page-20130314/
|
||||
* @see https://www.w3.org/TR/2018/WD-css-page-3-20181018/
|
||||
*/
|
||||
class PageAtRuleSanitizer extends RuleSanitizer {
|
||||
|
||||
|
|
@ -63,26 +63,43 @@ class PageAtRuleSanitizer extends RuleSanitizer {
|
|||
] );
|
||||
$this->pageSelectorMatcher->setDefaultOptions( [ 'skip-whitespace' => false ] );
|
||||
|
||||
// Clone the $propertySanitizer and inject the special "size" property
|
||||
$this->propertySanitizer = clone( $propertySanitizer );
|
||||
$this->propertySanitizer->addKnownProperties( [ 'size' => new Alternative( [
|
||||
Quantifier::count( $matcherFactory->length(), 1, 2 ),
|
||||
new KeywordMatcher( 'auto' ),
|
||||
UnorderedGroup::someOf( [
|
||||
new KeywordMatcher( [ 'A5', 'A4', 'A3', 'B5', 'B4', 'letter', 'legal', 'ledger' ] ),
|
||||
new KeywordMatcher( [ 'portrait', 'landscape' ] ),
|
||||
// Clone the $propertySanitizer and inject the special properties
|
||||
$this->propertySanitizer = clone $propertySanitizer;
|
||||
$this->propertySanitizer->addKnownProperties( [
|
||||
'size' => new Alternative( [
|
||||
Quantifier::count( $matcherFactory->length(), 1, 2 ),
|
||||
new KeywordMatcher( 'auto' ),
|
||||
UnorderedGroup::someOf( [
|
||||
new KeywordMatcher( [
|
||||
'A5', 'A4', 'A3', 'B5', 'B4', 'JIS-B5', 'JIS-B4', 'letter', 'legal', 'ledger',
|
||||
] ),
|
||||
new KeywordMatcher( [ 'portrait', 'landscape' ] ),
|
||||
] ),
|
||||
] ),
|
||||
] ) ] );
|
||||
'marks' => new Alternative( [
|
||||
new KeywordMatcher( 'none' ),
|
||||
UnorderedGroup::someOf( [
|
||||
new KeywordMatcher( 'crop' ),
|
||||
new KeywordMatcher( 'cross' ),
|
||||
] ),
|
||||
] ),
|
||||
'bleed' => new Alternative( [
|
||||
new KeywordMatcher( 'auto' ),
|
||||
$matcherFactory->length(),
|
||||
] ),
|
||||
] );
|
||||
|
||||
$this->ruleSanitizer = new MarginAtRuleSanitizer( $propertySanitizer );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'page' );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-at-rule', $object, [ 'page' ] );
|
||||
return null;
|
||||
}
|
||||
|
|
@ -93,7 +110,7 @@ class PageAtRuleSanitizer extends RuleSanitizer {
|
|||
}
|
||||
|
||||
// Test the page selector
|
||||
$match = $this->pageSelectorMatcher->match(
|
||||
$match = $this->pageSelectorMatcher->matchAgainst(
|
||||
$object->getPrelude(), [ 'mark-significance' => true ]
|
||||
);
|
||||
if ( !$match ) {
|
||||
|
|
@ -102,7 +119,7 @@ class PageAtRuleSanitizer extends RuleSanitizer {
|
|||
return null;
|
||||
}
|
||||
|
||||
$ret = clone( $object );
|
||||
$ret = clone $object;
|
||||
$this->fixPreludeWhitespace( $ret, false );
|
||||
|
||||
// Parse the block's contents into a list of declarations and at-rules,
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ use InvalidArgumentException;
|
|||
use Wikimedia\CSS\Grammar\Matcher;
|
||||
use Wikimedia\CSS\Grammar\NothingMatcher;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\Declaration;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
|
|
@ -28,7 +27,7 @@ class PropertySanitizer extends Sanitizer {
|
|||
/**
|
||||
* @param Matcher[] $properties Array mapping declaration names (lowercase)
|
||||
* to Matchers for the values
|
||||
* @param Matcher $cssWideKeywordsMatcher Matcher for keywords that should
|
||||
* @param Matcher|null $cssWideKeywordsMatcher Matcher for keywords that should
|
||||
* be recognized for all known properties.
|
||||
*/
|
||||
public function __construct( array $properties = [], Matcher $cssWideKeywordsMatcher = null ) {
|
||||
|
|
@ -63,20 +62,20 @@ class PropertySanitizer extends Sanitizer {
|
|||
|
||||
/**
|
||||
* Merge a list of matchers into the list of known properties
|
||||
* @param Matcher[] $properties Array mapping declaration names (lowercase)
|
||||
* @param Matcher[] $props Array mapping declaration names (lowercase)
|
||||
* to Matchers for the values
|
||||
* @throws InvalidArgumentException if some property is already defined
|
||||
*/
|
||||
public function addKnownProperties( $props ) {
|
||||
$dups = [];
|
||||
foreach ( $props as $k => $v ) {
|
||||
if ( isset( $this->knownProperties[$k] ) && $props[$k] !== $this->knownProperties[$k] ) {
|
||||
if ( isset( $this->knownProperties[$k] ) && $v !== $this->knownProperties[$k] ) {
|
||||
$dups[] = $k;
|
||||
}
|
||||
}
|
||||
if ( $dups ) {
|
||||
throw new InvalidArgumentException(
|
||||
'Duplicate definitions for properties: ' . join( ' ', $dups )
|
||||
'Duplicate definitions for properties: ' . implode( ' ', $dups )
|
||||
);
|
||||
}
|
||||
$this->setKnownProperties( $this->knownProperties + $props );
|
||||
|
|
@ -98,6 +97,7 @@ class PropertySanitizer extends Sanitizer {
|
|||
$this->cssWideKeywords = $matcher;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Declaration ) {
|
||||
$this->sanitizationError( 'expected-declaration', $object );
|
||||
|
|
@ -112,8 +112,8 @@ class PropertySanitizer extends Sanitizer {
|
|||
}
|
||||
|
||||
$list = $object->getValue();
|
||||
if ( !$knownProperties[$name]->match( $list, [ 'mark-significance' => true ] ) &&
|
||||
!$this->getCssWideKeywordsMatcher()->match( $list, [ 'mark-significance' => true ] )
|
||||
if ( !$knownProperties[$name]->matchAgainst( $list, [ 'mark-significance' => true ] ) &&
|
||||
!$this->getCssWideKeywordsMatcher()->matchAgainst( $list, [ 'mark-significance' => true ] )
|
||||
) {
|
||||
$cv = Util::findFirstNonWhitespace( $list );
|
||||
if ( $cv ) {
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@
|
|||
|
||||
namespace Wikimedia\CSS\Sanitizer;
|
||||
|
||||
use Wikimedia\CSS\Objects\Rule;
|
||||
use Wikimedia\CSS\Objects\AtRule;
|
||||
use Wikimedia\CSS\Objects\CSSFunction;
|
||||
use Wikimedia\CSS\Objects\Rule;
|
||||
use Wikimedia\CSS\Objects\SimpleBlock;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
use Wikimedia\CSS\Parser\Parser;
|
||||
|
|
@ -88,14 +88,17 @@ abstract class RuleSanitizer extends Sanitizer {
|
|||
}
|
||||
|
||||
$significant = $cv instanceof CSSFunction ||
|
||||
$cv instanceof Token &&
|
||||
Token::separate( new Token( Token::T_AT_KEYWORD, $rule->getName() ), $cv );
|
||||
( $cv instanceof Token &&
|
||||
Token::separate( new Token( Token::T_AT_KEYWORD, $rule->getName() ), $cv )
|
||||
);
|
||||
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
if ( $prelude[0] instanceof Token && $prelude[0]->type() === Token::T_WHITESPACE ) {
|
||||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
|
||||
$prelude[0] = $prelude[0]->copyWithSignificance( $significant );
|
||||
} elseif ( $significant ) {
|
||||
if ( $cloneIfNecessary ) {
|
||||
$rule = clone( $rule );
|
||||
$rule = clone $rule;
|
||||
$prelude = $rule->getPrelude();
|
||||
}
|
||||
$prelude->add( new Token( Token::T_WHITESPACE ), 0 );
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ namespace Wikimedia\CSS\Sanitizer;
|
|||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Objects\CSSObjectList;
|
||||
use Wikimedia\CSS\Objects\RuleList;
|
||||
use Wikimedia\ScopedCallback;
|
||||
|
||||
/**
|
||||
* Base class for CSS sanitizers
|
||||
|
|
@ -26,6 +27,22 @@ abstract class Sanitizer {
|
|||
return $this->sanitizationErrors;
|
||||
}
|
||||
|
||||
/**
|
||||
* Temporarily clear sanitization errors
|
||||
*
|
||||
* Errors will be cleared, then restored when the returned ScopedCallback
|
||||
* goes out of scope or is consumed.
|
||||
*
|
||||
* @return ScopedCallback
|
||||
*/
|
||||
public function stashSanitizationErrors() {
|
||||
$reset = new ScopedCallback( function ( $e ) {
|
||||
$this->sanitizationErrors = $e;
|
||||
}, [ $this->sanitizationErrors ] );
|
||||
$this->sanitizationErrors = [];
|
||||
return $reset;
|
||||
}
|
||||
|
||||
/**
|
||||
* Clear sanitization errors
|
||||
*/
|
||||
|
|
@ -40,7 +57,7 @@ abstract class Sanitizer {
|
|||
* @param array $data Extra data about the error.
|
||||
*/
|
||||
protected function sanitizationError( $tag, CSSObject $object, array $data = [] ) {
|
||||
list( $line, $pos ) = $object->getPosition();
|
||||
[ $line, $pos ] = $object->getPosition();
|
||||
$this->sanitizationErrors[] = array_merge( [ $tag, $line, $pos ], $data );
|
||||
}
|
||||
|
||||
|
|
@ -99,7 +116,7 @@ abstract class Sanitizer {
|
|||
if ( $sanitizer->handlesRule( $rule ) ) {
|
||||
$indexes = $sanitizer->getIndex();
|
||||
if ( is_array( $indexes ) ) {
|
||||
list( $testIndex, $setIndex ) = $indexes;
|
||||
[ $testIndex, $setIndex ] = $indexes;
|
||||
} else {
|
||||
$testIndex = $setIndex = $indexes;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@
|
|||
|
||||
namespace Wikimedia\CSS\Sanitizer;
|
||||
|
||||
use Wikimedia\CSS\Grammar\MatcherFactory;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Objects\DeclarationList;
|
||||
use Wikimedia\CSS\Grammar\MatcherFactory;
|
||||
use Wikimedia\CSS\Parser\Parser;
|
||||
|
||||
/**
|
||||
|
|
@ -43,11 +43,10 @@ class StyleAttributeSanitizer extends Sanitizer {
|
|||
$propertySanitizer = new StylePropertySanitizer( $matcherFactory );
|
||||
|
||||
// StyleAttributeSanitizer brings it all together
|
||||
$sanitizer = new StyleAttributeSanitizer( $propertySanitizer );
|
||||
|
||||
return $sanitizer;
|
||||
return new StyleAttributeSanitizer( $propertySanitizer );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof DeclarationList ) {
|
||||
$this->sanitizationError( 'expected-declaration-list', $object );
|
||||
|
|
@ -65,6 +64,7 @@ class StyleAttributeSanitizer extends Sanitizer {
|
|||
$parser = Parser::newFromString( $string );
|
||||
$declarations = $parser->parseDeclarationList();
|
||||
$this->sanitizationErrors = array_merge( $this->sanitizationErrors, $parser->getParseErrors() );
|
||||
// @phan-suppress-next-line PhanTypeMismatchReturnSuperType
|
||||
return $this->sanitizeList( $this->propertySanitizer, $declarations );
|
||||
}
|
||||
}
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
|
|
@ -6,11 +6,14 @@
|
|||
|
||||
namespace Wikimedia\CSS\Sanitizer;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Grammar\Juxtaposition;
|
||||
use Wikimedia\CSS\Grammar\Matcher;
|
||||
use Wikimedia\CSS\Grammar\MatcherFactory;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Grammar\Quantifier;
|
||||
use Wikimedia\CSS\Objects\ComponentValue;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Objects\QualifiedRule;
|
||||
use Wikimedia\CSS\Objects\Rule;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
|
|
@ -18,7 +21,7 @@ use Wikimedia\CSS\Util;
|
|||
|
||||
/**
|
||||
* Sanitizes a CSS style rule
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#style-rules
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#style-rules
|
||||
*/
|
||||
class StyleRuleSanitizer extends RuleSanitizer {
|
||||
|
||||
|
|
@ -28,6 +31,9 @@ class StyleRuleSanitizer extends RuleSanitizer {
|
|||
/** @var ComponentValue[] */
|
||||
protected $prependSelectors;
|
||||
|
||||
/** @var Matcher|null */
|
||||
protected $hoistableMatcher;
|
||||
|
||||
/** @var PropertySanitizer */
|
||||
protected $propertySanitizer;
|
||||
|
||||
|
|
@ -37,21 +43,43 @@ class StyleRuleSanitizer extends RuleSanitizer {
|
|||
* @param PropertySanitizer $propertySanitizer Sanitizer to test property declarations.
|
||||
* Probably an instance of StylePropertySanitizer.
|
||||
* @param array $options Additional options
|
||||
* - prependSelectors: (ComponentValue[]) Prepend this to all selectors.
|
||||
* Include trailing whitespace if necessary. Note $selectorMatcher must
|
||||
* capture each selector with the name 'selector'.
|
||||
* - prependSelectors: (ComponentValue[]) Prepend this (and a whitespace) to all selectors.
|
||||
* Note: $selectorMatcher must capture each selector with the name 'selector'.
|
||||
* - hoistableComponentMatcher: (Matcher) Component groups (simple selector sequences,
|
||||
* in CSS3 Selectors terminology) matched by this will be hoisted before the prepended
|
||||
* selector sequence. (To be more precise: the hoisted part is the longest prefix of
|
||||
* the selector that only contains matching simple selector sequences and descendant
|
||||
* combinators, and is not followed by a non-descendant combinator.)
|
||||
* This can be used to allow filtering by top-level conditional classes/IDs emitted by
|
||||
* some framework (e.g. html.no-js) while still jailing selectors into some subsection
|
||||
* of the content. For example, if prependSelectors is equivalent to '#content' and
|
||||
* hoistableComponentMatcher to [html|body]<simple selector>* will turn
|
||||
* 'html.no-js body.ltr div.list' into 'html.no-js body.ltr #content div.list'.
|
||||
* Note: $selectorMatcher must capture each simple selector group with the name 'simple'
|
||||
* and the combinators with 'combinator'.
|
||||
*/
|
||||
public function __construct(
|
||||
Matcher $selectorMatcher, PropertySanitizer $propertySanitizer, array $options = []
|
||||
) {
|
||||
$options += [
|
||||
'prependSelectors' => [],
|
||||
'hoistableComponentMatcher' => null,
|
||||
];
|
||||
Util::assertAllInstanceOf(
|
||||
$options['prependSelectors'], ComponentValue::class, 'prependSelectors'
|
||||
);
|
||||
if ( $options['hoistableComponentMatcher'] !== null &&
|
||||
!$options['hoistableComponentMatcher'] instanceof Matcher
|
||||
) {
|
||||
throw new InvalidArgumentException( 'hoistableComponentMatcher must be a Matcher' );
|
||||
}
|
||||
|
||||
$matcherFactory = MatcherFactory::singleton();
|
||||
|
||||
// Add optional whitespace around the selector-matcher, because
|
||||
// selector-matchers don't usually have it.
|
||||
if ( !$selectorMatcher->getDefaultOptions()['skip-whitespace'] ) {
|
||||
$ows = MatcherFactory::singleton()->optionalWhitespace();
|
||||
$ows = $matcherFactory->optionalWhitespace();
|
||||
$this->selectorMatcher = new Juxtaposition( [
|
||||
$ows,
|
||||
$selectorMatcher,
|
||||
|
|
@ -64,12 +92,29 @@ class StyleRuleSanitizer extends RuleSanitizer {
|
|||
|
||||
$this->propertySanitizer = $propertySanitizer;
|
||||
$this->prependSelectors = $options['prependSelectors'];
|
||||
if ( $options['hoistableComponentMatcher'] ) {
|
||||
$hoistablePrefixMatcher = new Juxtaposition( [
|
||||
$options['hoistableComponentMatcher'],
|
||||
Quantifier::star( new Juxtaposition( [
|
||||
$matcherFactory->significantWhitespace(),
|
||||
$options['hoistableComponentMatcher'],
|
||||
] ) )
|
||||
] );
|
||||
$this->hoistableMatcher = new Juxtaposition( [
|
||||
$hoistablePrefixMatcher->capture( 'prefix' ),
|
||||
$matcherFactory->significantWhitespace()->capture( 'ws' ),
|
||||
$matcherFactory->cssSelector()->capture( 'postfix' ),
|
||||
] );
|
||||
$this->hoistableMatcher->setDefaultOptions( [ 'skip-whitespace' => false ] );
|
||||
}
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof QualifiedRule;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof QualifiedRule ) {
|
||||
$this->sanitizationError( 'expected-qualified-rule', $object );
|
||||
|
|
@ -77,7 +122,7 @@ class StyleRuleSanitizer extends RuleSanitizer {
|
|||
}
|
||||
|
||||
// Test that the prelude is a valid selector list
|
||||
$match = $this->selectorMatcher->match( $object->getPrelude(), [ 'mark-significance' => true ] );
|
||||
$match = $this->selectorMatcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] );
|
||||
if ( !$match ) {
|
||||
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
|
||||
if ( $cv ) {
|
||||
|
|
@ -88,26 +133,40 @@ class StyleRuleSanitizer extends RuleSanitizer {
|
|||
return null;
|
||||
}
|
||||
|
||||
$ret = clone( $object );
|
||||
$ret = clone $object;
|
||||
|
||||
// If necessary, munge the selector list
|
||||
if ( $this->prependSelectors ) {
|
||||
$prelude = $ret->getPrelude();
|
||||
$comma = [
|
||||
new Token( Token::T_COMMA ),
|
||||
new Token( Token::T_WHITESPACE, [ 'significant' => false ] )
|
||||
];
|
||||
$oldPrelude = $object->getPrelude();
|
||||
$space = [
|
||||
new Token( Token::T_WHITESPACE, [ 'significant' => true ] )
|
||||
];
|
||||
$prelude->clear();
|
||||
foreach ( $match->getCapturedMatches() as $m ) {
|
||||
if ( $m->getName() === 'selector' ) {
|
||||
foreach ( $match->getCapturedMatches() as $selectorOrWs ) {
|
||||
if ( $selectorOrWs->getName() === 'selector' ) {
|
||||
if ( $prelude->count() ) {
|
||||
$prelude->add( $comma );
|
||||
}
|
||||
$prelude->add( $this->prependSelectors );
|
||||
$prelude->add( $m->getValues() );
|
||||
} elseif ( $m->getName() === 'trailingWS' && $m->getLength() > 0 ) {
|
||||
$prelude->add( $m->getValues() );
|
||||
|
||||
$valueList = new ComponentValueList( $selectorOrWs->getValues() );
|
||||
$hoistMatch = $this->hoistableMatcher ? $this->hoistableMatcher->matchAgainst( $valueList ) : null;
|
||||
if ( $hoistMatch ) {
|
||||
[ $prefix, , $postfix ] = $hoistMatch->getCapturedMatches();
|
||||
$prelude->add( $prefix->getValues() );
|
||||
$prelude->add( $space );
|
||||
$prelude->add( $this->prependSelectors );
|
||||
$prelude->add( $space );
|
||||
$prelude->add( $postfix->getValues() );
|
||||
} else {
|
||||
$prelude->add( $this->prependSelectors );
|
||||
$prelude->add( $space );
|
||||
$prelude->add( $valueList );
|
||||
}
|
||||
} elseif ( $selectorOrWs->getName() === 'trailingWS' && $selectorOrWs->getLength() > 0 ) {
|
||||
$prelude->add( $selectorOrWs->getValues() );
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -116,4 +175,5 @@ class StyleRuleSanitizer extends RuleSanitizer {
|
|||
|
||||
return $ret;
|
||||
}
|
||||
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ use Wikimedia\CSS\Util;
|
|||
|
||||
/**
|
||||
* Sanitizes a CSS stylesheet or rule list
|
||||
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#css-stylesheets
|
||||
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#css-stylesheets
|
||||
*/
|
||||
class StylesheetSanitizer extends Sanitizer {
|
||||
|
||||
|
|
@ -52,7 +52,6 @@ class StylesheetSanitizer extends Sanitizer {
|
|||
$ruleSanitizers = [
|
||||
'style' => new StyleRuleSanitizer( $matcherFactory->cssSelectorList(), $propertySanitizer ),
|
||||
'@font-face' => new FontFaceAtRuleSanitizer( $matcherFactory ),
|
||||
'@font-feature-values' => new FontFeatureValuesAtRuleSanitizer( $matcherFactory ),
|
||||
'@keyframes' => new KeyframesAtRuleSanitizer( $matcherFactory, $propertySanitizer ),
|
||||
'@page' => new PageAtRuleSanitizer( $matcherFactory, $propertySanitizer ),
|
||||
'@media' => new MediaAtRuleSanitizer( $matcherFactory->cssMediaQueryList() ),
|
||||
|
|
@ -66,15 +65,15 @@ class StylesheetSanitizer extends Sanitizer {
|
|||
$ruleSanitizers['@supports']->setRuleSanitizers( $ruleSanitizers );
|
||||
|
||||
// Now we can put together the StylesheetSanitizer
|
||||
$sanitizer = new StylesheetSanitizer( $ruleSanitizers + [
|
||||
return new StylesheetSanitizer( $ruleSanitizers + [
|
||||
// Note there's intentionally no "@charset" sanitizer, as that at-rule
|
||||
// was removed in the Editor's Draft in favor of special handling
|
||||
// in the parser.
|
||||
'@import' => new ImportAtRuleSanitizer( $matcherFactory ),
|
||||
'@import' => new ImportAtRuleSanitizer( $matcherFactory, [
|
||||
'declarationSanitizer' => $propertySanitizer,
|
||||
] ),
|
||||
'@namespace' => new NamespaceAtRuleSanitizer( $matcherFactory ),
|
||||
] );
|
||||
|
||||
return $sanitizer;
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -94,9 +93,11 @@ class StylesheetSanitizer extends Sanitizer {
|
|||
$this->ruleSanitizers = $ruleSanitizers;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
$isSheet = $object instanceof Stylesheet;
|
||||
if ( $isSheet ) {
|
||||
'@phan-var Stylesheet $object';
|
||||
$object = $object->getRuleList();
|
||||
}
|
||||
if ( !$object instanceof RuleList ) {
|
||||
|
|
|
|||
|
|
@ -6,24 +6,11 @@
|
|||
|
||||
namespace Wikimedia\CSS\Sanitizer;
|
||||
|
||||
use Wikimedia\CSS\Grammar\Alternative;
|
||||
use Wikimedia\CSS\Grammar\AnythingMatcher;
|
||||
use Wikimedia\CSS\Grammar\BlockMatcher;
|
||||
use Wikimedia\CSS\Grammar\CheckedMatcher;
|
||||
use Wikimedia\CSS\Grammar\FunctionMatcher;
|
||||
use Wikimedia\CSS\Grammar\Juxtaposition;
|
||||
use Wikimedia\CSS\Grammar\KeywordMatcher;
|
||||
use Wikimedia\CSS\Grammar\Match;
|
||||
use Wikimedia\CSS\Grammar\Matcher;
|
||||
use Wikimedia\CSS\Grammar\MatcherFactory;
|
||||
use Wikimedia\CSS\Grammar\NothingMatcher;
|
||||
use Wikimedia\CSS\Grammar\Quantifier;
|
||||
use Wikimedia\CSS\Objects\AtRule;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\Rule;
|
||||
use Wikimedia\CSS\Objects\Token;
|
||||
use Wikimedia\CSS\Parser\Parser;
|
||||
use Wikimedia\CSS\Util;
|
||||
|
||||
/**
|
||||
|
|
@ -41,76 +28,14 @@ class SupportsAtRuleSanitizer extends RuleSanitizer {
|
|||
/**
|
||||
* @param MatcherFactory $matcherFactory
|
||||
* @param array $options Additional options:
|
||||
* strict: (bool) Only accept defined syntax. Default true.
|
||||
* declarationSanitizer: (PropertySanitizer) Check declarations against this Sanitizer.
|
||||
* - strict: (bool) Only accept defined syntax. Default true.
|
||||
* - declarationSanitizer: (PropertySanitizer) Check declarations against this Sanitizer.
|
||||
*/
|
||||
public function __construct( MatcherFactory $matcherFactory, array $options = [] ) {
|
||||
$options += [
|
||||
'strict' => true,
|
||||
];
|
||||
$declarationSanitizer = null;
|
||||
if ( isset( $options['declarationSanitizer'] ) ) {
|
||||
$declarationSanitizer = $options['declarationSanitizer'];
|
||||
if ( !$declarationSanitizer instanceof PropertySanitizer ) {
|
||||
throw new \InvalidArgumentException(
|
||||
'declarationSanitizer must be an instance of ' . PropertySanitizer::class
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
$ws = $matcherFactory->significantWhitespace();
|
||||
$anythingPlus = new AnythingMatcher( [ 'quantifier' => '+' ] );
|
||||
|
||||
if ( $options['strict'] ) {
|
||||
$generalEnclosed = new NothingMatcher();
|
||||
} else {
|
||||
$generalEnclosed = new Alternative( [
|
||||
new FunctionMatcher( null, $anythingPlus ),
|
||||
new BlockMatcher( Token::T_LEFT_PAREN, new Juxtaposition( [
|
||||
$matcherFactory->ident(), $anythingPlus
|
||||
] ) ),
|
||||
] );
|
||||
}
|
||||
|
||||
$supportsConditionBlock = new NothingMatcher(); // temp
|
||||
$supportsConditionInParens = new Alternative( [
|
||||
&$supportsConditionBlock,
|
||||
new BlockMatcher( Token::T_LEFT_PAREN, new CheckedMatcher(
|
||||
$anythingPlus,
|
||||
function ( ComponentValueList $list, Match $match, array $options )
|
||||
use ( $declarationSanitizer )
|
||||
{
|
||||
$cvlist = new ComponentValueList( $match->getValues() );
|
||||
$parser = Parser::newFromTokens( $cvlist->toTokenArray() );
|
||||
$declaration = $parser->parseDeclaration();
|
||||
if ( $parser->getParseErrors() || !$declaration ) {
|
||||
return false;
|
||||
}
|
||||
if ( !$declarationSanitizer ) {
|
||||
return true;
|
||||
}
|
||||
$oldErrors = $declarationSanitizer->sanitizationErrors;
|
||||
$ret = $declarationSanitizer->doSanitize( $declaration );
|
||||
$errors = $declarationSanitizer->getSanitizationErrors();
|
||||
$declarationSanitizer->sanitizationErrors = $oldErrors;
|
||||
return $ret === $declaration && !$errors;
|
||||
}
|
||||
) ),
|
||||
$generalEnclosed,
|
||||
] );
|
||||
$supportsCondition = new Alternative( [
|
||||
new Juxtaposition( [ new KeywordMatcher( 'not' ), $ws, $supportsConditionInParens ] ),
|
||||
new Juxtaposition( [ $supportsConditionInParens, Quantifier::plus( new Juxtaposition( [
|
||||
$ws, new KeywordMatcher( 'and' ), $ws, $supportsConditionInParens
|
||||
] ) ) ] ),
|
||||
new Juxtaposition( [ $supportsConditionInParens, Quantifier::plus( new Juxtaposition( [
|
||||
$ws, new KeywordMatcher( 'or' ), $ws, $supportsConditionInParens
|
||||
] ) ) ] ),
|
||||
$supportsConditionInParens,
|
||||
] );
|
||||
$supportsConditionBlock = new BlockMatcher( Token::T_LEFT_PAREN, $supportsCondition );
|
||||
|
||||
$this->conditionMatcher = $supportsCondition;
|
||||
$this->conditionMatcher = $matcherFactory->cssSupportsCondition(
|
||||
$options['declarationSanitizer'] ?? null,
|
||||
$options['strict'] ?? true
|
||||
);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
@ -130,12 +55,14 @@ class SupportsAtRuleSanitizer extends RuleSanitizer {
|
|||
$this->ruleSanitizers = $ruleSanitizers;
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
public function handlesRule( Rule $rule ) {
|
||||
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'supports' );
|
||||
}
|
||||
|
||||
/** @inheritDoc */
|
||||
protected function doSanitize( CSSObject $object ) {
|
||||
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
|
||||
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
|
||||
$this->sanitizationError( 'expected-at-rule', $object, [ 'supports' ] );
|
||||
return null;
|
||||
}
|
||||
|
|
@ -146,7 +73,7 @@ class SupportsAtRuleSanitizer extends RuleSanitizer {
|
|||
}
|
||||
|
||||
// Test the media query
|
||||
if ( !$this->conditionMatcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
|
||||
if ( !$this->conditionMatcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
|
||||
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
|
||||
if ( $cv ) {
|
||||
$this->sanitizationError( 'invalid-supports-condition', $cv );
|
||||
|
|
@ -156,7 +83,7 @@ class SupportsAtRuleSanitizer extends RuleSanitizer {
|
|||
return null;
|
||||
}
|
||||
|
||||
$ret = clone( $object );
|
||||
$ret = clone $object;
|
||||
$this->fixPreludeWhitespace( $ret, false );
|
||||
$this->sanitizeRuleBlock( $ret->getBlock(), $this->ruleSanitizers );
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
|
||||
namespace Wikimedia\CSS;
|
||||
|
||||
use InvalidArgumentException;
|
||||
use Wikimedia\CSS\Objects\ComponentValue;
|
||||
use Wikimedia\CSS\Objects\ComponentValueList;
|
||||
use Wikimedia\CSS\Objects\CSSObject;
|
||||
|
|
@ -22,13 +23,13 @@ class Util {
|
|||
* @param array $array
|
||||
* @param string $class
|
||||
* @param string $what Describe the array being checked
|
||||
* @throws \InvalidArgumentException
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function assertAllInstanceOf( array $array, $class, $what ) {
|
||||
foreach ( $array as $k => $v ) {
|
||||
if ( !$v instanceof $class ) {
|
||||
$vtype = is_object( $v ) ? get_class( $v ) : gettype( $v );
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
"$what may only contain instances of $class" .
|
||||
" (found $vtype at index $k)"
|
||||
);
|
||||
|
|
@ -37,23 +38,23 @@ class Util {
|
|||
}
|
||||
|
||||
/**
|
||||
* Check that a set of tokens are all of the same type
|
||||
* Check that a set of tokens are all the same type
|
||||
* @param Token[] $array
|
||||
* @param string $type
|
||||
* @param string $what Describe the array being checked
|
||||
* @throws \InvalidArgumentException
|
||||
* @throws InvalidArgumentException
|
||||
*/
|
||||
public static function assertAllTokensOfType( array $array, $type, $what ) {
|
||||
foreach ( $array as $k => $v ) {
|
||||
if ( !$v instanceof Token ) {
|
||||
$vtype = is_object( $v ) ? get_class( $v ) : gettype( $v );
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
"$what may only contain instances of " . Token::class .
|
||||
" (found $vtype at index $k)"
|
||||
);
|
||||
}
|
||||
if ( $v->type() !== $type ) {
|
||||
throw new \InvalidArgumentException(
|
||||
throw new InvalidArgumentException(
|
||||
"$what may only contain \"$type\" tokens" .
|
||||
" (found \"{$v->type()}\" at index $k)"
|
||||
);
|
||||
|
|
@ -68,7 +69,7 @@ class Util {
|
|||
*/
|
||||
public static function findFirstNonWhitespace( $list ) {
|
||||
if ( !$list instanceof TokenList && !$list instanceof ComponentValueList ) {
|
||||
throw new \InvalidArgumentException( 'List must be TokenList or ComponentValueList' );
|
||||
throw new InvalidArgumentException( 'List must be TokenList or ComponentValueList' );
|
||||
}
|
||||
foreach ( $list as $v ) {
|
||||
if ( !$v instanceof Token || $v->type() !== Token::T_WHITESPACE ) {
|
||||
|
|
@ -80,13 +81,19 @@ class Util {
|
|||
|
||||
/**
|
||||
* Turn a CSSObject into a string
|
||||
* @param CSSObject $object
|
||||
* @param array $options Serialziation options:
|
||||
* @param CSSObject|CSSObject[] $object
|
||||
* @param array $options Serialization options:
|
||||
* - minify: (bool) Skip comments and insignificant tokens
|
||||
* @return string
|
||||
*/
|
||||
public static function stringify( CSSObject $object, $options = [] ) {
|
||||
$tokens = $object->toTokenArray();
|
||||
public static function stringify( $object, $options = [] ) {
|
||||
if ( is_array( $object ) ) {
|
||||
$tokens = array_reduce( $object, static function ( array $carry, CSSObject $item ) {
|
||||
return array_merge( $carry, $item->toTokenArray() );
|
||||
}, [] );
|
||||
} else {
|
||||
$tokens = $object->toTokenArray();
|
||||
}
|
||||
if ( !$tokens ) {
|
||||
return '';
|
||||
}
|
||||
|
|
@ -97,23 +104,30 @@ class Util {
|
|||
for ( $i = 1; $i < $e; $i++ ) {
|
||||
$t = $tokens[$i];
|
||||
if ( $t->type() === Token::T_WHITESPACE && !$t->significant() &&
|
||||
Token::separate( $tokens[$i-1], $tokens[$i+1] )
|
||||
Token::separate( $tokens[$i - 1], $tokens[$i + 1] )
|
||||
) {
|
||||
$tokens[$i] = $t->copyWithSignificance( true );
|
||||
}
|
||||
}
|
||||
|
||||
// Filter!
|
||||
$tokens = array_filter( $tokens, function ( $t ) {
|
||||
$tokens = array_filter( $tokens, static function ( $t ) {
|
||||
return $t->significant();
|
||||
} );
|
||||
}
|
||||
|
||||
$prev = reset( $tokens );
|
||||
$ret = (string)$prev;
|
||||
$urangeHack = 0;
|
||||
while ( ( $token = next( $tokens ) ) !== false ) {
|
||||
if ( Token::separate( $prev, $token ) ) {
|
||||
// Per https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#serialization
|
||||
// Avoid serializing tokens that are part of a <urange> with extraneous comments
|
||||
// by checking for a hack-flag in the type.
|
||||
// @see Wikimedia\CSS\Matcher\UrangeMatcher
|
||||
// @phan-suppress-next-line PhanAccessMethodInternal
|
||||
$urangeHack = max( $urangeHack, $prev->urangeHack() );
|
||||
|
||||
if ( --$urangeHack <= 0 && Token::separate( $prev, $token ) ) {
|
||||
// Per https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#serialization
|
||||
$ret .= '/**/';
|
||||
}
|
||||
$ret .= (string)$token;
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user