Upgrade Wikimedia CSS parser (#2126)

Fixes #2119
This commit is contained in:
Starmind 2023-06-17 02:33:53 +08:00 committed by GitHub
parent f0d9f53ded
commit 2ef97f83f1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
59 changed files with 1798 additions and 1612 deletions

View File

@ -11,7 +11,7 @@ use Wikimedia\CSS\Util;
/**
* Matcher that matches one out of a set of Matchers ("|" combiner).
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#comb-one
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#comb-one
*/
class Alternative extends Matcher {
/** @var Matcher[] */
@ -25,6 +25,7 @@ class Alternative extends Matcher {
$this->matchers = $matchers;
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$used = [];
foreach ( $this->matchers as $matcher ) {

View File

@ -6,6 +6,8 @@
namespace Wikimedia\CSS\Grammar;
use InvalidArgumentException;
use UnexpectedValueException;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\CSSFunction;
use Wikimedia\CSS\Objects\SimpleBlock;
@ -15,7 +17,7 @@ use Wikimedia\CSS\Objects\Token;
* Matcher that matches anything except bad strings, bad urls, and unmatched
* left-paren, left-brace, or left-bracket.
* @warning Be very careful using this!
* @see https://drafts.csswg.org/css-syntax/#any-value for where this roughly comes from.
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#any-value
*/
class AnythingMatcher extends Matcher {
@ -42,9 +44,9 @@ class AnythingMatcher extends Matcher {
*/
public function __construct( array $options = [] ) {
$this->toplevel = !empty( $options['toplevel'] );
$this->quantifier = isset( $options['quantifier'] ) ? $options['quantifier'] : '';
$this->quantifier = $options['quantifier'] ?? '';
if ( !in_array( $this->quantifier, [ '', '+', '*' ], true ) ) {
throw new \InvalidArgumentException( 'Invalid quantifier' );
throw new InvalidArgumentException( 'Invalid quantifier' );
}
$recurse = !$this->toplevel && $this->quantifier === '*'
@ -55,12 +57,13 @@ class AnythingMatcher extends Matcher {
}
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$origStart = $start;
$lastMatch = $this->quantifier === '*' ? $this->makeMatch( $values, $start, $start ) : null;
do {
$newMatch = null;
$cv = isset( $values[$start] ) ? $values[$start] : null;
$cv = $values[$start] ?? null;
if ( $cv instanceof Token ) {
switch ( $cv->type() ) {
case Token::T_BAD_STRING:
@ -92,7 +95,7 @@ class AnythingMatcher extends Matcher {
// If we encounter whitespace, assume it's significant.
$newMatch = $this->makeMatch(
$values, $origStart, $this->next( $values, $start, $options ),
new Match( $values, $start, 1, 'significantWhitespace' ),
new GrammarMatch( $values, $start, 1, 'significantWhitespace' ),
[ [ $lastMatch ] ]
);
break;
@ -103,7 +106,7 @@ class AnythingMatcher extends Matcher {
case Token::T_LEFT_BRACKET:
// Should never happen
// @codeCoverageIgnoreStart
throw new \UnexpectedValueException( "How did a \"{$cv->type()}\" token get here?" );
throw new UnexpectedValueException( "How did a \"{$cv->type()}\" token get here?" );
// @codeCoverageIgnoreEnd
default:

View File

@ -6,9 +6,9 @@
namespace Wikimedia\CSS\Grammar;
use InvalidArgumentException;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\SimpleBlock;
use Wikimedia\CSS\Objects\Token;
/**
* Matcher that matches a SimpleBlock
@ -34,7 +34,7 @@ class BlockMatcher extends Matcher {
*/
public function __construct( $blockType, Matcher $matcher ) {
if ( SimpleBlock::matchingDelimiter( $blockType ) === null ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
'A block is delimited by either {}, [], or ().'
);
}
@ -42,8 +42,9 @@ class BlockMatcher extends Matcher {
$this->matcher = $matcher;
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$cv = isset( $values[$start] ) ? $values[$start] : null;
$cv = $values[$start] ?? null;
if ( $cv instanceof SimpleBlock && $cv->getStartTokenType() === $this->blockType ) {
// To successfully match, our sub-Matcher needs to match the whole
// content of the block.

View File

@ -13,7 +13,7 @@ use Wikimedia\CSS\Objects\ComponentValueList;
*/
class CheckedMatcher extends Matcher {
/** @var Matcher */
private $matcher = null;
private $matcher;
/** @var callable */
protected $check;
@ -21,13 +21,14 @@ class CheckedMatcher extends Matcher {
/**
* @param Matcher $matcher Base matcher
* @param callable $check Function to check the match is really valid.
* Prototype is bool func( ComponentValueList $values, Match $match, array $options )
* Prototype is bool func( ComponentValueList $values, GrammarMatch $match, array $options )
*/
public function __construct( Matcher $matcher, callable $check ) {
$this->matcher = $matcher;
$this->check = $check;
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
foreach ( $this->matcher->generateMatches( $values, $start, $options ) as $match ) {
if ( call_user_func( $this->check, $values, $match, $options ) ) {

View File

@ -16,7 +16,7 @@ use Wikimedia\CSS\Objects\Token;
* other types (case-sensitively) too. For the more common case-insensitive
* identifier matching, use KeywordMatcher.
*
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-types
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-types
*/
class DelimMatcher extends Matcher {
/** @var string One of the Token::T_* constants */
@ -39,8 +39,9 @@ class DelimMatcher extends Matcher {
$this->type = $options['type'];
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$cv = isset( $values[$start] ) ? $values[$start] : null;
$cv = $values[$start] ?? null;
if ( $cv instanceof Token && $cv->type() === $this->type &&
in_array( $cv->value(), $this->values, true )
) {

View File

@ -6,9 +6,10 @@
namespace Wikimedia\CSS\Grammar;
use Closure;
use InvalidArgumentException;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\CSSFunction;
use Wikimedia\CSS\Objects\Token;
/**
* Matcher that matches a CSSFunction
@ -29,25 +30,26 @@ class FunctionMatcher extends Matcher {
protected $matcher;
/**
* @param string|callable|null $name Function name, case-insensitive, or a
* @param string|Closure|null $name Function name, case-insensitive, or a
* function to check the name.
* @param Matcher $matcher Matcher for the contents of the function
*/
public function __construct( $name, Matcher $matcher ) {
if ( is_string( $name ) ) {
$this->nameCheck = function ( $s ) use ( $name ) {
$this->nameCheck = static function ( $s ) use ( $name ) {
return !strcasecmp( $s, $name );
};
} elseif ( is_callable( $name ) || $name === null ) {
$this->nameCheck = $name;
} else {
throw new \InvalidArgumentException( '$name must be a string, callable, or null' );
throw new InvalidArgumentException( '$name must be a string, callable, or null' );
}
$this->matcher = $matcher;
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$cv = isset( $values[$start] ) ? $values[$start] : null;
$cv = $values[$start] ?? null;
if ( $cv instanceof CSSFunction &&
( !$this->nameCheck || call_user_func( $this->nameCheck, $cv->getName() ) )
) {

View File

@ -8,18 +8,19 @@ namespace Wikimedia\CSS\Grammar;
use Wikimedia\CSS\Objects\ComponentValue;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\CSSFunction;
use Wikimedia\CSS\Objects\SimpleBlock;
use Wikimedia\CSS\Objects\Token;
use Wikimedia\CSS\Util;
/**
* Represent a match from a Matcher.
*/
class Match {
class GrammarMatch {
/** @var int */
protected $start, $length;
protected $start;
/** @var int */
protected $length;
/** @var ComponentValue[] Matched ComponentValues */
protected $values;
@ -27,7 +28,7 @@ class Match {
/** @var string|null */
protected $name = null;
/** @var Match[] Captured submatches */
/** @var GrammarMatch[] Captured submatches */
protected $capturedMatches = [];
/**
@ -35,12 +36,12 @@ class Match {
* @param int $start Starting index of the match.
* @param int $length Number of tokens in the match.
* @param string|null $name Give a name to this match.
* @param Match[] $capturedMatches Captured submatches of this match.
* @param GrammarMatch[] $capturedMatches Captured submatches of this match.
*/
public function __construct(
ComponentValueList $list, $start, $length, $name = null, array $capturedMatches = []
) {
Util::assertAllInstanceOf( $capturedMatches, Match::class, '$capturedMatches' );
Util::assertAllInstanceOf( $capturedMatches, self::class, '$capturedMatches' );
$this->values = $list->slice( $start, $length );
$this->start = $start;
@ -95,21 +96,21 @@ class Match {
* This returns the matches from capturing submatchers (see
* Matcher::capture()) that matched during the matching of the top-level
* matcher that returned this match. If capturing submatchers were nested,
* the Match objects returned here will themselves have captured submatches to
* return.
* the GrammarMatch objects returned here will themselves have captured sub-
* matches to return.
*
* To borrow PCRE regular expression syntax, if the "pattern" described by
* the Matchers resembled `www(?<A>xxx(?<B>yyy)xxx)(?<C>zzz)*` then the
* top-level Match's getCapturedMatches() would return a Match named "A"
* (containing the "xxxyyyxxx" bit) and zero or more matches named "C" (for
* each "zzz"), and that "A" Match's getCapturedMatches() would return a Match
* named "B" (containing just the "yyy").
* top-level GrammarMatch's getCapturedMatches() would return a GrammarMatch
* named "A" (containing the "xxxyyyxxx" bit) and zero or more matches named
* "C" (for each "zzz"), and that "A" GrammarMatch's getCapturedMatches()
* would return a GrammarMatch named "B" (containing just the "yyy").
*
* Note that the start and end positions reported by captured matches may be
* relative to a containing SimpleBlock or CSSFunction's value rather than
* to the ComponentValueList passed to the top-level Matcher.
*
* @return Match[]
* @return GrammarMatch[]
*/
public function getCapturedMatches() {
return $this->capturedMatches;
@ -124,7 +125,7 @@ class Match {
foreach ( $this->capturedMatches as $m ) {
$data[] = $m->getUniqueId();
}
return md5( join( "\n", $data ) );
return md5( implode( "\n", $data ) );
}
/**
@ -143,4 +144,8 @@ class Match {
$m->fixWhitespace( $old, $new );
}
}
public function __toString() {
return Util::stringify( $this->getValues() );
}
}

View File

@ -6,14 +6,15 @@
namespace Wikimedia\CSS\Grammar;
use Iterator;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\Token;
use Wikimedia\CSS\Util;
/**
* Matcher that groups other matchers (juxtaposition)
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-combinators
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#comb-comma
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-combinators
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#comb-comma
*/
class Juxtaposition extends Matcher {
/** @var Matcher[] */
@ -32,25 +33,25 @@ class Juxtaposition extends Matcher {
$this->commas = (bool)$commas;
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$used = [];
// Match each of our matchers in turn, pushing each one onto a stack as
// we process it and popping a match once its exhausted.
// we process it and popping a match once it's exhausted.
$stack = [
[
new Match( $values, $start, 0 ),
new GrammarMatch( $values, $start, 0 ),
$start,
$this->matchers[0]->generateMatches( $values, $start, $options ),
false
]
];
do {
/** @var $lastMatch Match */
/** @var $lastEnd int */
/** @var $iter \Iterator<Match> */
/** @var $iter Iterator<GrammarMatch> */
/** @var $needEmpty bool */
list( $lastMatch, $lastEnd, $iter, $needEmpty ) = $stack[count( $stack ) - 1];
[ , $lastEnd, $iter, $needEmpty ] = $stack[count( $stack ) - 1];
// If the top of the stack has no more matches, pop it and loop.
if ( !$iter->valid() ) {
@ -72,30 +73,29 @@ class Juxtaposition extends Matcher {
$thisEnd = $nextFrom = $match->getNext();
// Dealing with commas is a bit tricky. There are three cases:
// 1. If the current match is empty, don't look for a following
// comma now and reset $thisEnd to $lastEnd.
// 2. If there is a comma following, update $nextFrom to be after
// the comma.
// 3. If there's no comma following, every subsequent Matcher must
// be empty in order for the group as a whole to match, so set
// the flag.
// 1. If the current match is empty, don't look for a following
// comma now and reset $thisEnd to $lastEnd.
// 2. If there is a comma following, update $nextFrom to be after
// the comma.
// 3. If there's no comma following, every subsequent Matcher must
// be empty in order for the group as a whole to match, so set
// the flag.
// Unlike '#', this doesn't specify skipping whitespace around the
// commas if the production isn't already skipping whitespace.
if ( $this->commas ) {
if ( $match->getLength() === 0 ) {
$thisEnd = $lastEnd;
} elseif ( isset( $values[$nextFrom] ) && $values[$nextFrom] instanceof Token &&
// @phan-suppress-next-line PhanNonClassMethodCall False positive
$values[$nextFrom]->type() === Token::T_COMMA
) {
$nextFrom = $this->next( $values, $nextFrom, $options );
} else {
if ( isset( $values[$nextFrom] ) && $values[$nextFrom] instanceof Token &&
$values[$nextFrom]->type() === Token::T_COMMA
) {
$nextFrom = $this->next( $values, $nextFrom, $options );
} else {
$needEmpty = true;
}
$needEmpty = true;
}
}
// If we ran out of Matchers, yield the final position. Otherwise
// If we ran out of Matchers, yield the final position. Otherwise,
// push the next matcher onto the stack.
if ( count( $stack ) >= count( $this->matchers ) ) {
$newMatch = $this->makeMatch( $values, $start, $thisEnd, $match, $stack );

View File

@ -16,7 +16,7 @@ use Wikimedia\CSS\Objects\Token;
* other types (case-insensitively) too. For delimiter (or case-sensitive)
* matching, use DelimMatcher.
*
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-types
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-types
*/
class KeywordMatcher extends Matcher {
/** @var string One of the Token::T_* constants */
@ -39,8 +39,9 @@ class KeywordMatcher extends Matcher {
$this->type = $options['type'];
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$cv = isset( $values[$start] ) ? $values[$start] : null;
$cv = $values[$start] ?? null;
if ( $cv instanceof Token && $cv->type() === $this->type &&
isset( $this->values[strtolower( $cv->value() )] )
) {

View File

@ -6,10 +6,11 @@
namespace Wikimedia\CSS\Grammar;
use Iterator;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\Token;
use Wikimedia\CSS\Objects\SimpleBlock;
use Wikimedia\CSS\Objects\CSSFunction;
use Wikimedia\CSS\Objects\SimpleBlock;
use Wikimedia\CSS\Objects\Token;
/**
* Base class for grammar matchers.
@ -20,16 +21,16 @@ use Wikimedia\CSS\Objects\CSSFunction;
* object that will determine whether a ComponentValueList actually matches
* this grammar.
*
* [SYN3]: https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/
* [VAL3]: https://www.w3.org/TR/2016/CR-css-values-3-20160929/
* [SYN3]: https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/
* [VAL3]: https://www.w3.org/TR/2019/CR-css-values-3-20190606/
*/
abstract class Matcher {
/** @var string|null Name to set on Match objects */
/** @var string|null Name to set on GrammarMatch objects */
protected $captureName = null;
/**
* @var array Default options for self::match()
* @var array Default options for self::matchAgainst()
* - skip-whitespace: (bool) Allow whitespace in between any two tokens
* - nonterminal: (bool) Don't require the whole of $values is matched
* - mark-significance: (bool) On a successful match, replace T_WHITESPACE
@ -43,49 +44,29 @@ abstract class Matcher {
/**
* Create an instance.
* @param mixed ... See static::__construct()
* @param mixed ...$args See static::__construct()
* @return static
*/
public static function create() {
// @todo Once we drop support for PHP 5.5, just do this:
// public static function create( ...$args ) {
// return new static( ...$args );
// }
$args = func_get_args();
switch ( count( $args ) ) {
case 0:
return new static();
case 1:
return new static( $args[0] );
case 2:
return new static( $args[0], $args[1] );
case 3:
return new static( $args[0], $args[1], $args[2] );
case 4:
return new static( $args[0], $args[1], $args[2], $args[3] );
default:
// Slow, but all the existing Matchers have a max of 4 args.
$rc = new \ReflectionClass( static::class );
return $rc->newInstanceArgs( $args );
}
public static function create( ...$args ) {
// @phan-suppress-next-line PhanParamTooManyUnpack,PhanTypeInstantiateAbstractStatic
return new static( ...$args );
}
/**
* Return a copy of this matcher that will capture its matches
*
* A "capturing" Matcher will produce Matches that return a value from the
* Match::getName() method. The Match::getCapturedMatches() method may be
* used to retrieve them from the top-level Match.
* A "capturing" Matcher will produce GrammarMatches that return a value from
* the GrammarMatch::getName() method. The GrammarMatch::getCapturedMatches()
* method may be used to retrieve them from the top-level GrammarMatch.
*
* The concept is similar to capturing groups in PCRE and other regex
* languages.
*
* @param string|null $captureName Name to apply to captured Match objects
* @param string|null $captureName Name to apply to captured GrammarMatch objects
* @return static
*/
public function capture( $captureName ) {
$ret = clone( $this );
$ret = clone $this;
$ret->captureName = $captureName;
return $ret;
}
@ -94,14 +75,14 @@ abstract class Matcher {
* Match against a list of ComponentValues
* @param ComponentValueList $values
* @param array $options Matching options, see self::$defaultOptions
* @return Match|null
* @return GrammarMatch|null
*/
public function match( ComponentValueList $values, array $options = [] ) {
public function matchAgainst( ComponentValueList $values, array $options = [] ) {
$options += $this->getDefaultOptions();
$start = $this->next( $values, -1, $options );
$l = count( $values );
foreach ( $this->generateMatches( $values, $start, $options ) as $match ) {
if ( $match->getNext() === $l || $options['nonterminal'] ) {
if ( $options['nonterminal'] || $match->getNext() === $l ) {
if ( $options['mark-significance'] ) {
$significantWS = self::collectSignificantWhitespace( $match );
self::markSignificantWhitespace( $values, $match, $significantWS, $match->getNext() );
@ -114,11 +95,11 @@ abstract class Matcher {
/**
* Collect any 'significantWhitespace' matches
* @param Match $match
* @param Token[]|null &$ret
* @param GrammarMatch $match
* @param Token[] &$ret
* @return Token[]
*/
private static function collectSignificantWhitespace( Match $match, &$ret = [] ) {
private static function collectSignificantWhitespace( GrammarMatch $match, &$ret = [] ) {
if ( $match->getName() === 'significantWhitespace' ) {
$ret = array_merge( $ret, $match->getValues() );
}
@ -131,7 +112,7 @@ abstract class Matcher {
/**
* Mark whitespace as significant or not
* @param ComponentValueList $list
* @param Match $match
* @param GrammarMatch $match
* @param Token[] $significantWS
* @param int $end
*/
@ -141,8 +122,9 @@ abstract class Matcher {
if ( $cv instanceof Token && $cv->type() === Token::T_WHITESPACE ) {
$significant = in_array( $cv, $significantWS, true );
if ( $significant !== $cv->significant() ) {
$list[$i] = $cv->copyWithSignificance( $significant );
$match->fixWhitespace( $cv, $list[$i] );
$newCv = $cv->copyWithSignificance( $significant );
$match->fixWhitespace( $cv, $newCv );
$list[$i] = $newCv;
}
} elseif ( $cv instanceof CSSFunction || $cv instanceof SimpleBlock ) {
self::markSignificantWhitespace(
@ -186,27 +168,28 @@ abstract class Matcher {
do {
$i++;
} while ( $skipWS && $i < $l &&
// @phan-suppress-next-line PhanNonClassMethodCall False positive
$values[$i] instanceof Token && $values[$i]->type() === Token::T_WHITESPACE
);
return $i;
}
/**
* Create a Match
* Create a GrammarMatch
* @param ComponentValueList $list
* @param int $start
* @param int $end First position after the match
* @param Match|null $submatch Submatch, for capturing. If $submatch itself
* named it will be kept as a capture in the returned Match, otherwise its
* captured matches (if any) as returned by getCapturedMatches() will be
* kept as captures in the returned Match.
* @param GrammarMatch|null $submatch Sub-match, for capturing. If $submatch
* itself named it will be kept as a capture in the returned GrammarMatch,
* otherwise its captured matches (if any) as returned by getCapturedMatches()
* will be kept as captures in the returned GrammarMatch.
* @param array $stack Stack from which to fetch more submatches for
* capturing (see $submatch). The stack is expected to be an array of
* arrays, with the first element of each subarray being a Match.
* @return Match
* arrays, with the first element of each subarray being a GrammarMatch.
* @return GrammarMatch
*/
protected function makeMatch(
ComponentValueList $list, $start, $end, Match $submatch = null, array $stack = []
ComponentValueList $list, $start, $end, GrammarMatch $submatch = null, array $stack = []
) {
$matches = array_column( $stack, 0 );
$matches[] = $submatch;
@ -214,7 +197,7 @@ abstract class Matcher {
$keptMatches = [];
while ( $matches ) {
$m = array_shift( $matches );
if ( !$m instanceof Match ) {
if ( !$m instanceof GrammarMatch ) {
// skip it, probably null
} elseif ( $m->getName() !== null ) {
$keptMatches[] = $m;
@ -223,7 +206,7 @@ abstract class Matcher {
}
}
return new Match( $list, $start, $end - $start, $this->captureName, $keptMatches );
return new GrammarMatch( $list, $start, $end - $start, $this->captureName, $keptMatches );
}
/**
@ -231,18 +214,18 @@ abstract class Matcher {
*
* The job of a Matcher is to determine all the ways its particular grammar
* fragment can consume ComponentValues starting at a particular location
* in the ComponentValueList, represented by returning Match objects. For
* example, a matcher implementing `IDENT*` at a starting position where
* in the ComponentValueList, represented by returning GrammarMatch objects.
* For example, a matcher implementing `IDENT*` at a starting position where
* there are three IDENT tokens in a row would be able to match 0, 1, 2, or
* all 3 of those IDENT tokens, and therefore should return an iterator
* over that set of Match objects.
* over that set of GrammarMatch objects.
*
* Some matchers take other matchers as input, for example `IDENT*` is
* probably going to be implemented as a matcher for `*` that repeatedly
* applies a matcher for `IDENT`. The `*` matcher would call the `IDENT`
* matcher's generateMatches() method directly.
*
* Most Matchers implement this method as a generator so as to not build up
* Most Matchers implement this method as a generator to not build up
* the full set of results when it's reasonably likely the caller is going
* to terminate early.
*
@ -250,8 +233,8 @@ abstract class Matcher {
* @param int $start Starting position in $values
* @param array $options See self::$defaultOptions.
* Always use the options passed in, don't use $this->defaultOptions yourself.
* @return \Iterator<Match> Iterates over the set of Match objects
* defining all the ways this matcher can match.
* @return Iterator<GrammarMatch> Iterates over the set of GrammarMatch
* objects defining all the ways this matcher can match.
*/
abstract protected function generateMatches( ComponentValueList $values, $start, array $options );
}

View File

@ -6,7 +6,10 @@
namespace Wikimedia\CSS\Grammar;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\Token;
use Wikimedia\CSS\Parser\Parser;
use Wikimedia\CSS\Sanitizer\PropertySanitizer;
/**
* Factory for predefined Grammar matchers
@ -16,12 +19,14 @@ class MatcherFactory {
/** @var MatcherFactory|null */
private static $instance = null;
/** @var Matcher[] Cache of constructed matchers */
/** @var (Matcher|Matcher[])[] Cache of constructed matchers */
protected $cache = [];
/** @var string[] length units */
protected static $lengthUnits = [ 'em', 'ex', 'ch', 'rem', 'vw', 'vh',
'vmin', 'vmax', 'cm', 'mm', 'Q', 'in', 'pc', 'pt', 'px' ];
protected static $lengthUnits = [
'em', 'ex', 'ch', 'rem', 'vw', 'vh', 'vmin', 'vmax',
'cm', 'mm', 'Q', 'in', 'pc', 'pt', 'px'
];
/** @var string[] angle units */
protected static $angleUnits = [ 'deg', 'grad', 'rad', 'turn' ];
@ -87,9 +92,32 @@ class MatcherFactory {
return $this->cache[__METHOD__];
}
/**
* Matcher for a <custom-ident>
*
* Note this doesn't implement the semantic restriction about assigning
* meaning to various idents in a complex value, as CSS Sanitizer doesn't
* deal with semantics on that level.
*
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#identifier-value
* @param string[] $exclude Additional values to exclude, all-lowercase.
* @return Matcher
*/
public function customIdent( array $exclude = [] ) {
$exclude = array_merge( [
// https://www.w3.org/TR/2019/CR-css-values-3-20190606/#common-keywords
'initial', 'inherit', 'unset', 'default',
// https://www.w3.org/TR/2018/CR-css-cascade-4-20180828/#all-shorthand
'revert'
], $exclude );
return new TokenMatcher( Token::T_IDENT, static function ( Token $t ) use ( $exclude ) {
return !in_array( strtolower( $t->value() ), $exclude, true );
} );
}
/**
* Matcher for a string
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#strings
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#strings
* @warning If the string will be used as a URL, use self::urlstring() instead.
* @return Matcher
*/
@ -112,7 +140,7 @@ class MatcherFactory {
/**
* Matcher for a URL
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#urls
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#urls
* @param string $type Type of resource referenced, e.g. "image" or "audio".
* Not used here, but might be used by a subclass to validate the URL more strictly.
* @return Matcher
@ -126,24 +154,28 @@ class MatcherFactory {
/**
* CSS-wide value keywords
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#common-keywords
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#common-keywords
* @return Matcher
*/
public function cssWideKeywords() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$this->cache[__METHOD__] = new KeywordMatcher( [ 'initial', 'inherit', 'unset' ] );
$this->cache[__METHOD__] = new KeywordMatcher( [
// https://www.w3.org/TR/2019/CR-css-values-3-20190606/#common-keywords
'initial', 'inherit', 'unset',
// added by https://www.w3.org/TR/2018/CR-css-cascade-4-20180828/#all-shorthand
'revert'
] );
}
return $this->cache[__METHOD__];
}
/**
* Add calc() support to a basic type matcher
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#calc-notation
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#calc-notation
* @param Matcher $typeMatcher Matcher for the type
* @param string $type Type being matched
* @return Matcher
* @return Matcher[]
*/
public function calc( Matcher $typeMatcher, $type ) {
protected function calcInternal( Matcher $typeMatcher, $type ) {
if ( $type === 'integer' ) {
$num = $this->rawInteger();
} else {
@ -164,13 +196,23 @@ class MatcherFactory {
&$calcValue,
Quantifier::star( new Juxtaposition( [ $ows, new DelimMatcher( '*' ), $ows, &$calcValue ] ) )
] );
} else {
} elseif ( $typeMatcher === $this->rawNumber() ) {
$calcProduct = new Juxtaposition( [
&$calcValue,
Quantifier::star( new Alternative( [
new Juxtaposition( [ $ows, new DelimMatcher( '*' ), $ows, &$calcValue ] ),
new Juxtaposition( [ $ows, new DelimMatcher( '/' ), $ows, $this->rawNumber() ] ),
] ) ),
Quantifier::star(
new Juxtaposition( [ $ows, new DelimMatcher( [ '*', '/' ] ), $ows, &$calcValue ] )
),
] );
} else {
$calcNumValue = $this->calcInternal( $this->rawNumber(), 'number' )[1];
$calcProduct = new Juxtaposition( [
&$calcValue,
Quantifier::star(
new Alternative( [
new Juxtaposition( [ $ows, new DelimMatcher( '*' ), $ows, &$calcValue ] ),
new Juxtaposition( [ $ows, new DelimMatcher( '/' ), $ows, $calcNumValue, ] ),
] )
),
] );
}
@ -200,17 +242,31 @@ class MatcherFactory {
] );
}
return new Alternative( [ $typeMatcher, $calcFunc ] );
return [
new Alternative( [ $typeMatcher, $calcFunc ] ),
$calcValue,
];
}
/**
* Add calc() support to a basic type matcher
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#calc-notation
* @param Matcher $typeMatcher Matcher for the type
* @param string $type Type being matched
* @return Matcher
*/
public function calc( Matcher $typeMatcher, $type ) {
return $this->calcInternal( $typeMatcher, $type )[0];
}
/**
* Matcher for an integer value, without calc()
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#integers
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#integers
* @return Matcher
*/
protected function rawInteger() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$this->cache[__METHOD__] = new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
$this->cache[__METHOD__] = new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
// The spec says it must match /^[+-]\d+$/, but the tokenizer
// should have marked any other number token as a 'number'
// anyway so let's not bother checking.
@ -222,7 +278,7 @@ class MatcherFactory {
/**
* Matcher for an integer value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#integers
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#integers
* @return Matcher
*/
public function integer() {
@ -234,7 +290,7 @@ class MatcherFactory {
/**
* Matcher for a real number, without calc()
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#numbers
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#numbers
* @return Matcher
*/
public function rawNumber() {
@ -246,7 +302,7 @@ class MatcherFactory {
/**
* Matcher for a real number
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#numbers
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#numbers
* @return Matcher
*/
public function number() {
@ -258,7 +314,7 @@ class MatcherFactory {
/**
* Matcher for a percentage value, without calc()
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#percentages
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#percentages
* @return Matcher
*/
public function rawPercentage() {
@ -270,7 +326,7 @@ class MatcherFactory {
/**
* Matcher for a percentage value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#percentages
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#percentages
* @return Matcher
*/
public function percentage() {
@ -282,7 +338,7 @@ class MatcherFactory {
/**
* Matcher for a length-percentage value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-length-percentage
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-length-percentage
* @return Matcher
*/
public function lengthPercentage() {
@ -297,7 +353,7 @@ class MatcherFactory {
/**
* Matcher for a frequency-percentage value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-frequency-percentage
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-frequency-percentage
* @return Matcher
*/
public function frequencyPercentage() {
@ -311,8 +367,8 @@ class MatcherFactory {
}
/**
* Matcher for a angle-percentage value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-angle-percentage
* Matcher for an angle-percentage value
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-angle-percentage
* @return Matcher
*/
public function anglePercentage() {
@ -327,7 +383,7 @@ class MatcherFactory {
/**
* Matcher for a time-percentage value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-time-percentage
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-time-percentage
* @return Matcher
*/
public function timePercentage() {
@ -342,7 +398,7 @@ class MatcherFactory {
/**
* Matcher for a number-percentage value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#typedef-number-percentage
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-number-percentage
* @return Matcher
*/
public function numberPercentage() {
@ -357,7 +413,7 @@ class MatcherFactory {
/**
* Matcher for a dimension value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#dimensions
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#dimensions
* @return Matcher
*/
public function dimension() {
@ -371,9 +427,9 @@ class MatcherFactory {
* Matches the number 0
* @return Matcher
*/
protected function zero() {
public function zero() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$this->cache[__METHOD__] = new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
$this->cache[__METHOD__] = new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
return $t->value() === 0 || $t->value() === 0.0;
} );
}
@ -382,16 +438,16 @@ class MatcherFactory {
/**
* Matcher for a length value, without calc()
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#lengths
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#lengths
* @return Matcher
*/
protected function rawLength() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$unitsRe = '/^(' . join( '|', self::$lengthUnits ) . ')$/i';
$unitsRe = '/^(' . implode( '|', self::$lengthUnits ) . ')$/i';
$this->cache[__METHOD__] = new Alternative( [
$this->zero(),
new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) use ( $unitsRe ) {
new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) use ( $unitsRe ) {
return preg_match( $unitsRe, $t->unit() );
} ),
] );
@ -401,7 +457,7 @@ class MatcherFactory {
/**
* Matcher for a length value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#lengths
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#lengths
* @return Matcher
*/
public function length() {
@ -413,26 +469,25 @@ class MatcherFactory {
/**
* Matcher for an angle value, without calc()
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#angles
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#angles
* @return Matcher
*/
protected function rawAngle() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$unitsRe = '/^(' . join( '|', self::$angleUnits ) . ')$/i';
$unitsRe = '/^(' . implode( '|', self::$angleUnits ) . ')$/i';
$this->cache[__METHOD__] = new Alternative( [
$this->zero(),
new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) use ( $unitsRe ) {
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION,
static function ( Token $t ) use ( $unitsRe ) {
return preg_match( $unitsRe, $t->unit() );
} ),
] );
}
);
}
return $this->cache[__METHOD__];
}
/**
* Matcher for an angle value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#angles
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#angles
* @return Matcher
*/
public function angle() {
@ -444,15 +499,15 @@ class MatcherFactory {
/**
* Matcher for a duration (time) value, without calc()
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#time
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#time
* @return Matcher
*/
protected function rawTime() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$unitsRe = '/^(' . join( '|', self::$timeUnits ) . ')$/i';
$unitsRe = '/^(' . implode( '|', self::$timeUnits ) . ')$/i';
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION,
function ( Token $t ) use ( $unitsRe ) {
static function ( Token $t ) use ( $unitsRe ) {
return preg_match( $unitsRe, $t->unit() );
}
);
@ -462,7 +517,7 @@ class MatcherFactory {
/**
* Matcher for a duration (time) value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#time
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#time
* @return Matcher
*/
public function time() {
@ -474,15 +529,15 @@ class MatcherFactory {
/**
* Matcher for a frequency value, without calc()
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#frequency
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#frequency
* @return Matcher
*/
protected function rawFrequency() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$unitsRe = '/^(' . join( '|', self::$frequencyUnits ) . ')$/i';
$unitsRe = '/^(' . implode( '|', self::$frequencyUnits ) . ')$/i';
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION,
function ( Token $t ) use ( $unitsRe ) {
static function ( Token $t ) use ( $unitsRe ) {
return preg_match( $unitsRe, $t->unit() );
}
);
@ -492,7 +547,7 @@ class MatcherFactory {
/**
* Matcher for a frequency value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#frequency
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#frequency
* @return Matcher
*/
public function frequency() {
@ -504,12 +559,12 @@ class MatcherFactory {
/**
* Matcher for a resolution value
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#resolution
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#resolution
* @return Matcher
*/
public function resolution() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) {
$this->cache[__METHOD__] = new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) {
return preg_match( '/^(dpi|dpcm|dppx)$/i', $t->unit() );
} );
}
@ -543,7 +598,7 @@ class MatcherFactory {
/**
* Matcher for a color value
* @see https://www.w3.org/TR/2011/REC-css3-color-20110607/#colorunits
* @see https://www.w3.org/TR/2018/REC-css-color-3-20180619/#colorunits
* @return Matcher
*/
public function color() {
@ -592,7 +647,7 @@ class MatcherFactory {
// Other keywords. Intentionally omitting the deprecated system colors.
'transparent', 'currentColor',
] ),
new TokenMatcher( Token::T_HASH, function ( Token $t ) {
new TokenMatcher( Token::T_HASH, static function ( Token $t ) {
return preg_match( '/^([0-9a-f]{3}|[0-9a-f]{6})$/i', $t->value() );
} ),
], $this->colorFuncs() ) );
@ -602,31 +657,33 @@ class MatcherFactory {
/**
* Matcher for an image value
* @see https://www.w3.org/TR/2012/CR-css3-images-20120417/#image-values
* @see https://www.w3.org/TR/2019/CR-css-images-3-20191010/#image-values
* @return Matcher
*/
public function image() {
if ( !isset( $this->cache[__METHOD__] ) ) {
// https://www.w3.org/TR/2012/CR-css3-images-20120417/#image-list-type
// Note the undefined <element-reference> production has been dropped from the Editor's Draft.
$imageDecl = new Alternative( [
$this->url( 'image' ),
$this->urlstring( 'image' ),
] );
// https://www.w3.org/TR/2012/CR-css3-images-20120417/#gradients
// https://www.w3.org/TR/2019/CR-css-images-3-20191010/#gradients
$c = $this->comma();
$colorStops = Quantifier::hash( new Juxtaposition( [
$colorStop = UnorderedGroup::allOf( [
$this->color(),
// Not really <length-percentage>, but grammatically the same
Quantifier::optional( $this->lengthPercentage() ),
] ), 2, INF );
] );
$colorStopList = new Juxtaposition( [
$colorStop,
Quantifier::hash( new Juxtaposition( [
Quantifier::optional( $this->lengthPercentage() ),
$colorStop
], true ) ),
], true );
$atPosition = new Juxtaposition( [ new KeywordMatcher( 'at' ), $this->position() ] );
$linearGradient = new Juxtaposition( [
Quantifier::optional( new Juxtaposition( [
new Alternative( [
$this->angle(),
new Alternative( [
$this->zero(),
$this->angle(),
] ),
new Juxtaposition( [ new KeywordMatcher( 'to' ), UnorderedGroup::someOf( [
new KeywordMatcher( [ 'left', 'right' ] ),
new KeywordMatcher( [ 'top', 'bottom' ] ),
@ -634,7 +691,7 @@ class MatcherFactory {
] ),
$c
] ) ),
$colorStops,
$colorStopList,
] );
$radialGradient = new Juxtaposition( [
Quantifier::optional( new Juxtaposition( [
@ -644,13 +701,12 @@ class MatcherFactory {
UnorderedGroup::someOf( [ new KeywordMatcher( 'circle' ), $this->length() ] ),
UnorderedGroup::someOf( [
new KeywordMatcher( 'ellipse' ),
// Not really <length-percentage>, but grammatically the same
Quantifier::count( $this->lengthPercentage(), 2, 2 )
] ),
UnorderedGroup::someOf( [
new KeywordMatcher( [ 'circle', 'ellipse' ] ),
new KeywordMatcher( [
'closest-side', 'farthest-side', 'closest-corner', 'farthest-corner'
'closest-corner', 'closest-side', 'farthest-corner', 'farthest-side',
] ),
] ),
] ),
@ -660,16 +716,12 @@ class MatcherFactory {
] ),
$c
] ) ),
$colorStops,
$colorStopList,
] );
// Putting it all together
$this->cache[__METHOD__] = new Alternative( [
$this->url( 'image' ),
new FunctionMatcher( 'image', new Juxtaposition( [
Quantifier::star( new Juxtaposition( [ $imageDecl, $c ] ) ),
new Alternative( [ $imageDecl, $this->color() ] ),
] ) ),
new FunctionMatcher( 'linear-gradient', $linearGradient ),
new FunctionMatcher( 'radial-gradient', $radialGradient ),
new FunctionMatcher( 'repeating-linear-gradient', $linearGradient ),
@ -681,10 +733,41 @@ class MatcherFactory {
/**
* Matcher for a position value
* @see https://www.w3.org/TR/2014/CR-css3-background-20140909/#ltpositiongt
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#typedef-position
* @return Matcher
*/
public function position() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$lp = $this->lengthPercentage();
$center = new KeywordMatcher( 'center' );
$leftRight = new KeywordMatcher( [ 'left', 'right' ] );
$topBottom = new KeywordMatcher( [ 'top', 'bottom' ] );
$this->cache[__METHOD__] = new Alternative( [
UnorderedGroup::someOf( [
new Alternative( [ $center, $leftRight ] ),
new Alternative( [ $center, $topBottom ] ),
] ),
new Juxtaposition( [
new Alternative( [ $center, $leftRight, $lp ] ),
Quantifier::optional( new Alternative( [ $center, $topBottom, $lp ] ) ),
] ),
UnorderedGroup::allOf( [
new Juxtaposition( [ $leftRight, $lp ] ),
new Juxtaposition( [ $topBottom, $lp ] ),
] ),
] );
}
return $this->cache[__METHOD__];
}
/**
* Matcher for a bg-position value
* @see https://www.w3.org/TR/2017/CR-css-backgrounds-3-20171017/#typedef-bg-position
* @return Matcher
*/
public function bgPosition() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$lp = $this->lengthPercentage();
$olp = Quantifier::optional( $lp );
@ -709,7 +792,7 @@ class MatcherFactory {
/**
* Matcher for a CSS media query
* @see https://www.w3.org/TR/2016/WD-mediaqueries-4-20160706/#mq-syntax
* @see https://www.w3.org/TR/2017/CR-mediaqueries-4-20170905/#mq-syntax
* @param bool $strict Only allow defined query types
* @return Matcher
*/
@ -736,10 +819,10 @@ class MatcherFactory {
];
$mfName = new KeywordMatcher( array_merge(
$rangeFeatures,
array_map( function ( $f ) {
array_map( static function ( $f ) {
return "min-$f";
}, $rangeFeatures ),
array_map( function ( $f ) {
array_map( static function ( $f ) {
return "max-$f";
}, $rangeFeatures ),
$discreteFeatures
@ -757,7 +840,7 @@ class MatcherFactory {
}
$posInt = $this->calc(
new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
return $t->typeFlag() === 'integer' && preg_match( '/^\+?\d+$/', $t->representation() );
} ),
'integer'
@ -777,25 +860,38 @@ class MatcherFactory {
new Juxtaposition( [ $posInt, new DelimMatcher( '/' ), $posInt ] ),
] );
$mediaInParens = new NothingMatcher(); // temporary
// temporary
$mediaInParens = new NothingMatcher();
$mediaNot = new Juxtaposition( [ new KeywordMatcher( 'not' ), &$mediaInParens ] );
$mediaAnd = new Juxtaposition( [
&$mediaInParens,
Quantifier::plus( new Juxtaposition( [ new KeywordMatcher( 'and' ), &$mediaInParens ] ) )
$mediaAnd = new Juxtaposition( [ new KeywordMatcher( 'and' ), &$mediaInParens ] );
$mediaOr = new Juxtaposition( [ new KeywordMatcher( 'or' ), &$mediaInParens ] );
$mediaCondition = new Alternative( [
$mediaNot,
new Juxtaposition( [
&$mediaInParens,
new Alternative( [
Quantifier::star( $mediaAnd ),
Quantifier::star( $mediaOr ),
] )
] ),
] );
$mediaOr = new Juxtaposition( [
&$mediaInParens,
Quantifier::plus( new Juxtaposition( [ new KeywordMatcher( 'or' ), &$mediaInParens ] ) )
$mediaConditionWithoutOr = new Alternative( [
$mediaNot,
new Juxtaposition( [ &$mediaInParens, Quantifier::star( $mediaAnd ) ] ),
] );
$mediaCondition = new Alternative( [ $mediaNot, $mediaAnd, $mediaOr, &$mediaInParens ] );
$mediaConditionWithoutOr = new Alternative( [ $mediaNot, $mediaAnd, &$mediaInParens ] );
$mediaFeature = new BlockMatcher( Token::T_LEFT_PAREN, new Alternative( [
new Juxtaposition( [ $mfName, new TokenMatcher( Token::T_COLON ), $mfValue ] ), // <mf-plain>
$mfName, // <mf-boolean>
new Juxtaposition( [ $mfName, $ltgteq, $mfValue ] ), // <mf-range>, 1st alternative
new Juxtaposition( [ $mfValue, $ltgteq, $mfName ] ), // <mf-range>, 2nd alternative
new Juxtaposition( [ $mfValue, $lteq, $mfName, $lteq, $mfValue ] ), // <mf-range>, 3rd alt
new Juxtaposition( [ $mfValue, $gteq, $mfName, $gteq, $mfValue ] ), // <mf-range>, 4th alt
// <mf-plain>
new Juxtaposition( [ $mfName, new TokenMatcher( Token::T_COLON ), $mfValue ] ),
// <mf-boolean>
$mfName,
// <mf-range>, 1st alternative
new Juxtaposition( [ $mfName, $ltgteq, $mfValue ] ),
// <mf-range>, 2nd alternative
new Juxtaposition( [ $mfValue, $ltgteq, $mfName ] ),
// <mf-range>, 3rd alt
new Juxtaposition( [ $mfValue, $lteq, $mfName, $lteq, $mfValue ] ),
// <mf-range>, 4th alt
new Juxtaposition( [ $mfValue, $gteq, $mfName, $gteq, $mfValue ] ),
] ) );
$mediaInParens = new Alternative( [
new BlockMatcher( Token::T_LEFT_PAREN, $mediaCondition ),
@ -821,7 +917,7 @@ class MatcherFactory {
/**
* Matcher for a CSS media query list
* @see https://www.w3.org/TR/2016/WD-mediaqueries-4-20160706/#mq-syntax
* @see https://www.w3.org/TR/2017/CR-mediaqueries-4-20170905/#mq-syntax
* @param bool $strict Only allow defined query types
* @return Matcher
*/
@ -834,15 +930,114 @@ class MatcherFactory {
return $this->cache[$key];
}
/************************************************************************//**
/**
* Matcher for a "supports-condition"
* @see https://www.w3.org/TR/2013/CR-css3-conditional-20130404/#supports_condition
* @param PropertySanitizer|null $declarationSanitizer Check declarations against this Sanitizer
* @param bool $strict Only accept defined syntax. Default true.
* @return Matcher
*/
public function cssSupportsCondition(
PropertySanitizer $declarationSanitizer = null, $strict = true
) {
$ws = $this->significantWhitespace();
$anythingPlus = new AnythingMatcher( [ 'quantifier' => '+' ] );
if ( $strict ) {
$generalEnclosed = new NothingMatcher();
} else {
$generalEnclosed = new Alternative( [
new FunctionMatcher( null, $anythingPlus ),
new BlockMatcher( Token::T_LEFT_PAREN, new Juxtaposition( [ $this->ident(), $anythingPlus ] ) ),
] );
}
// temp
$supportsConditionBlock = new NothingMatcher();
$supportsConditionInParens = new Alternative( [
&$supportsConditionBlock,
new BlockMatcher( Token::T_LEFT_PAREN, $this->cssDeclaration( $declarationSanitizer ) ),
$generalEnclosed,
] );
$supportsCondition = new Alternative( [
new Juxtaposition( [ new KeywordMatcher( 'not' ), $ws, $supportsConditionInParens ] ),
new Juxtaposition( [ $supportsConditionInParens, Quantifier::plus( new Juxtaposition( [
$ws, new KeywordMatcher( 'and' ), $ws, $supportsConditionInParens
] ) ) ] ),
new Juxtaposition( [ $supportsConditionInParens, Quantifier::plus( new Juxtaposition( [
$ws, new KeywordMatcher( 'or' ), $ws, $supportsConditionInParens
] ) ) ] ),
$supportsConditionInParens,
] );
$supportsConditionBlock = new BlockMatcher( Token::T_LEFT_PAREN, $supportsCondition );
return $supportsCondition;
}
/**
* Matcher for a declaration
* @param PropertySanitizer|null $declarationSanitizer Check declarations against this Sanitizer
* @return Matcher
*/
public function cssDeclaration( PropertySanitizer $declarationSanitizer = null ) {
$anythingPlus = new AnythingMatcher( [ 'quantifier' => '+' ] );
return new CheckedMatcher(
$anythingPlus,
static function ( ComponentValueList $list, GrammarMatch $match, array $options )
use ( $declarationSanitizer )
{
$cvlist = new ComponentValueList( $match->getValues() );
$parser = Parser::newFromTokens( $cvlist->toTokenArray() );
$declaration = $parser->parseDeclaration();
if ( !$declaration || $parser->getParseErrors() ) {
return false;
}
if ( !$declarationSanitizer ) {
return true;
}
$reset = $declarationSanitizer->stashSanitizationErrors();
$ret = $declarationSanitizer->sanitize( $declaration );
$errors = $declarationSanitizer->getSanitizationErrors();
unset( $reset );
return $ret === $declaration && !$errors;
}
);
}
/**
* Matcher for single easing functions from CSS Easing Functions Level 1
* @see https://www.w3.org/TR/2019/CR-css-easing-1-20190430/#typedef-easing-function
* @return Matcher
*/
public function cssSingleEasingFunction() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$this->cache[__METHOD__] = new Alternative( [
new KeywordMatcher( [
'ease', 'linear', 'ease-in', 'ease-out', 'ease-in-out', 'step-start', 'step-end'
] ),
new FunctionMatcher( 'steps', new Juxtaposition( [
$this->integer(),
Quantifier::optional( new KeywordMatcher( [
'jump-start', 'jump-end', 'jump-none', 'jump-both', 'start', 'end'
] ) ),
], true ) ),
new FunctionMatcher( 'cubic-bezier', Quantifier::hash( $this->number(), 4, 4 ) ),
] );
}
return $this->cache[__METHOD__];
}
/**
* @name CSS Selectors Level 3
* @{
*
* https://www.w3.org/TR/2011/REC-css3-selectors-20110929/#w3cselgrammar
* https://www.w3.org/TR/2018/REC-selectors-3-20181106/#w3cselgrammar
*/
/**
* List of selectors
* List of selectors (selectors_group)
*
* selector [ COMMA S* selector ]*
*
@ -862,7 +1057,7 @@ class MatcherFactory {
}
/**
* A single selector
* A single selector (selector)
*
* simple_selector_sequence [ combinator simple_selector_sequence ]*
*
@ -886,7 +1081,7 @@ class MatcherFactory {
}
/**
* A CSS combinator
* A CSS combinator (combinator)
*
* PLUS S* | GREATER S* | TILDE S* | S+
*
@ -910,7 +1105,7 @@ class MatcherFactory {
}
/**
* A simple selector sequence
* A simple selector sequence (simple_selector_sequence)
*
* [ type_selector | universal ]
* [ HASH | class | attrib | pseudo | negation ]*
@ -952,7 +1147,7 @@ class MatcherFactory {
}
/**
* A type selector (i.e. a tag name)
* A type selector, i.e. a tag name (type_selector)
*
* [ namespace_prefix ] ? element_name
*
@ -974,7 +1169,7 @@ class MatcherFactory {
}
/**
* A namespace prefix
* A namespace prefix (namespace_prefix)
*
* [ IDENT | '*' ]? '|'
*
@ -1010,7 +1205,7 @@ class MatcherFactory {
}
/**
* The universal selector
* The universal selector (universal)
*
* [ namespace_prefix ]? '*'
*
@ -1036,7 +1231,7 @@ class MatcherFactory {
*/
public function cssID() {
if ( !isset( $this->cache[__METHOD__] ) ) {
$this->cache[__METHOD__] = new TokenMatcher( Token::T_HASH, function ( Token $t ) {
$this->cache[__METHOD__] = new TokenMatcher( Token::T_HASH, static function ( Token $t ) {
return $t->typeFlag() === 'id';
} );
$this->cache[__METHOD__]->setDefaultOptions( [ 'skip-whitespace' => false ] );
@ -1045,7 +1240,7 @@ class MatcherFactory {
}
/**
* A class selector
* A class selector (class)
*
* '.' IDENT
*
@ -1063,7 +1258,7 @@ class MatcherFactory {
}
/**
* An attribute selector
* An attribute selector (attrib)
*
* '[' S* [ namespace_prefix ]? IDENT S*
* [ [ PREFIXMATCH |
@ -1094,14 +1289,12 @@ class MatcherFactory {
] )->capture( 'attribute' ),
$this->optionalWhitespace(),
Quantifier::optional( new Juxtaposition( [
Alternative::create( [
new TokenMatcher( Token::T_PREFIX_MATCH ),
new TokenMatcher( Token::T_SUFFIX_MATCH ),
new TokenMatcher( Token::T_SUBSTRING_MATCH ),
// Sigh. They removed various tokens from CSS Syntax 3, but didn't update the grammar
// in CSS Selectors 3. Wing it with a hint from CSS Selectors 4's <attr-matcher>
( new Juxtaposition( [
Quantifier::optional( new DelimMatcher( [ '^', '$', '*', '~', '|' ] ) ),
new DelimMatcher( [ '=' ] ),
new TokenMatcher( Token::T_INCLUDE_MATCH ),
new TokenMatcher( Token::T_DASH_MATCH ),
] )->capture( 'test' ),
] ) )->capture( 'test' ),
$this->optionalWhitespace(),
Alternative::create( [
$this->ident(),
@ -1117,14 +1310,18 @@ class MatcherFactory {
}
/**
* A pseudo-class or pseudo-element
* A pseudo-class or pseudo-element (pseudo)
*
* ':' ':'? [ IDENT | functional_pseudo ]
*
* Where functional_pseudo is
*
* FUNCTION S* expression ')'
*
* Although this actually only matches the pseudo-selectors defined in the
* following sources:
* - https://www.w3.org/TR/2011/REC-css3-selectors-20110929/#pseudo-classes
* - https://www.w3.org/TR/2016/WD-css-pseudo-4-20160607/
* - https://www.w3.org/TR/2018/REC-selectors-3-20181106/#pseudo-classes
* - https://www.w3.org/TR/2019/WD-css-pseudo-4-20190225/
*
* @return Matcher
*/
@ -1156,7 +1353,7 @@ class MatcherFactory {
$colon,
new KeywordMatcher( [
'first-line', 'first-letter', 'before', 'after', 'selection', 'inactive-selection',
'spelling-error', 'grammar-error', 'placeholder'
'spelling-error', 'grammar-error', 'marker', 'placeholder'
] ),
] ),
] );
@ -1168,44 +1365,43 @@ class MatcherFactory {
/**
* An "AN+B" form
*
* https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#anb
* https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#anb-microsyntax
*
* @return Matcher
*/
public function cssANplusB() {
if ( !isset( $this->cache[__METHOD__] ) ) {
// Quoth the spec:
// > The An+B notation was originally defined using a slightly
// > different tokenizer than the rest of CSS, resulting in a
// > somewhat odd definition when expressed in terms of CSS tokens.
// > The An+B notation was originally defined using a slightly
// > different tokenizer than the rest of CSS, resulting in a
// > somewhat odd definition when expressed in terms of CSS tokens.
// That's a bit of an understatement
$plus = new DelimMatcher( [ '+' ] );
$plusQ = Quantifier::optional( new DelimMatcher( [ '+' ] ) );
$n = new KeywordMatcher( [ 'n' ] );
$dashN = new KeywordMatcher( [ '-n' ] );
$nDash = new KeywordMatcher( [ 'n-' ] );
$plusQN = new Juxtaposition( [ $plusQ, $n ] );
$plusQNDash = new Juxtaposition( [ $plusQ, $nDash ] );
$nDimension = new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) {
$nDimension = new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) {
return $t->typeFlag() === 'integer' && !strcasecmp( $t->unit(), 'n' );
} );
$nDashDimension = new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) {
$nDashDimension = new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) {
return $t->typeFlag() === 'integer' && !strcasecmp( $t->unit(), 'n-' );
} );
$nDashDigitDimension = new TokenMatcher( Token::T_DIMENSION, function ( Token $t ) {
$nDashDigitDimension = new TokenMatcher( Token::T_DIMENSION, static function ( Token $t ) {
return $t->typeFlag() === 'integer' && preg_match( '/^n-\d+$/i', $t->unit() );
} );
$nDashDigitIdent = new TokenMatcher( Token::T_IDENT, function ( Token $t ) {
$nDashDigitIdent = new TokenMatcher( Token::T_IDENT, static function ( Token $t ) {
return preg_match( '/^n-\d+$/i', $t->value() );
} );
$dashNDashDigitIdent = new TokenMatcher( Token::T_IDENT, function ( Token $t ) {
$dashNDashDigitIdent = new TokenMatcher( Token::T_IDENT, static function ( Token $t ) {
return preg_match( '/^-n-\d+$/i', $t->value() );
} );
$signedInt = new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
$signedInt = new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
return $t->typeFlag() === 'integer' && preg_match( '/^[+-]/', $t->representation() );
} );
$signlessInt = new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
$signlessInt = new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
return $t->typeFlag() === 'integer' && preg_match( '/^\d/', $t->representation() );
} );
$plusOrMinus = new DelimMatcher( [ '+', '-' ] );
@ -1213,7 +1409,7 @@ class MatcherFactory {
$this->cache[__METHOD__] = new Alternative( [
new KeywordMatcher( [ 'odd', 'even' ] ),
new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
return $t->typeFlag() === 'integer';
} ),
$nDimension,
@ -1238,7 +1434,7 @@ class MatcherFactory {
}
/**
* A negation
* A negation (negation)
*
* ':' not( S* [ type_selector | universal | HASH | class | attrib | pseudo ] S* ')'
*
@ -1272,7 +1468,7 @@ class MatcherFactory {
return $this->cache[__METHOD__];
}
/**@}*/
/** @} */
}

View File

@ -14,8 +14,9 @@ use Wikimedia\CSS\Objects\Token;
*/
class NoWhitespace extends Matcher {
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$cv = isset( $values[$start-1] ) ? $values[$start-1] : null;
$cv = $values[$start - 1] ?? null;
if ( !$cv instanceof Token || $cv->type() !== Token::T_WHITESPACE ) {
yield $this->makeMatch( $values, $start, $start );
}

View File

@ -9,8 +9,8 @@ namespace Wikimedia\CSS\Grammar;
use Wikimedia\CSS\Objects\ComponentValueList;
/**
* Matcher that requires its sub-Matcher has only non-empty matches ("!" multipier)
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-req
* Matcher that requires its sub-Matcher has only non-empty matches ("!" multiplier)
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-req
*/
class NonEmpty extends Matcher {
/** @var Matcher */
@ -23,6 +23,7 @@ class NonEmpty extends Matcher {
$this->matcher = $matcher;
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
foreach ( $this->matcher->generateMatches( $values, $start, $options ) as $match ) {
if ( $match->getLength() !== 0 ) {

View File

@ -6,14 +6,15 @@
namespace Wikimedia\CSS\Grammar;
use Wikimedia\CSS\Objects\ComponentValue;
use EmptyIterator;
use Wikimedia\CSS\Objects\ComponentValueList;
/**
* Matcher that matches nothing
*/
class NothingMatcher extends Matcher {
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
return new \EmptyIterator;
return new EmptyIterator;
}
}

View File

@ -6,20 +6,25 @@
namespace Wikimedia\CSS\Grammar;
use Iterator;
use UnexpectedValueException;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\Token;
/**
* Matcher that matches a sub-Matcher a certain number of times
* ("?", "*", "+", "#", "{A,B}" multipliers)
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-multipliers
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-multipliers
*/
class Quantifier extends Matcher {
/** @var Matcher */
protected $matcher;
/** @var int */
protected $min, $max;
protected $min;
/** @var int */
protected $max;
/** @var bool Whether matches are comma-separated */
protected $commas;
@ -39,7 +44,7 @@ class Quantifier extends Matcher {
/**
* Implements "?": 0 or 1 matches
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-opt
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-opt
* @param Matcher $matcher
* @return static
*/
@ -49,7 +54,7 @@ class Quantifier extends Matcher {
/**
* Implements "*": 0 or more matches
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-zero-plus
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-zero-plus
* @param Matcher $matcher
* @return static
*/
@ -59,7 +64,7 @@ class Quantifier extends Matcher {
/**
* Implements "+": 1 or more matches
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-one-plus
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-one-plus
* @param Matcher $matcher
* @return static
*/
@ -69,7 +74,7 @@ class Quantifier extends Matcher {
/**
* Implements "{A,B}": Between A and B matches
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-num-range
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-num-range
* @param Matcher $matcher
* @param int|float $min Minimum number of matches
* @param int|float $max Maximum number of matches
@ -81,7 +86,7 @@ class Quantifier extends Matcher {
/**
* Implements "#" and "#{A,B}": Between A and B matches, comma-separated
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#mult-comma
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#mult-comma
* @param Matcher $matcher
* @param int|float $min Minimum number of matches
* @param int|float $max Maximum number of matches
@ -91,17 +96,21 @@ class Quantifier extends Matcher {
return new static( $matcher, $min, $max, true );
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$used = [];
// Maintain a stack of matches for backtracking purposes.
$stack = [
[ new Match( $values, $start, 0 ), $this->matcher->generateMatches( $values, $start, $options ) ]
[
new GrammarMatch( $values, $start, 0 ),
$this->matcher->generateMatches( $values, $start, $options )
]
];
do {
/** @var $lastMatch Match */
/** @var $iter \Iterator<Match> */
list( $lastMatch, $iter ) = $stack[count( $stack ) - 1];
/** @var $lastMatch GrammarMatch */
/** @var $iter Iterator<GrammarMatch> */
[ $lastMatch, $iter ] = $stack[count( $stack ) - 1];
// If the top of the stack has no more matches, pop it, maybe
// yield the last matched position, and loop.
@ -126,7 +135,7 @@ class Quantifier extends Matcher {
// Quantifiers don't work well when the quantified thing can be empty.
if ( $match->getLength() === 0 ) {
throw new \UnexpectedValueException( 'Empty match in quantifier!' );
throw new UnexpectedValueException( 'Empty match in quantifier!' );
}
$nextFrom = $match->getNext();
@ -136,17 +145,19 @@ class Quantifier extends Matcher {
$canBeMore = count( $stack ) < $this->max;
// Commas are slightly tricky:
// 1. If there is a following comma, start the next Matcher after it.
// 2. If not, there can't be any more Matchers following.
// 1. If there is a following comma, start the next Matcher after it.
// 2. If not, there can't be any more Matchers following.
// And in either case optional whitespace is always allowed.
if ( $this->commas ) {
$n = $nextFrom;
if ( isset( $values[$n] ) && $values[$n] instanceof Token &&
// @phan-suppress-next-line PhanNonClassMethodCall False positive
$values[$n]->type() === Token::T_WHITESPACE
) {
$n = $this->next( $values, $n, [ 'skip-whitespace' => true ] + $options );
}
if ( isset( $values[$n] ) && $values[$n] instanceof Token &&
// @phan-suppress-next-line PhanNonClassMethodCall False positive
$values[$n]->type() === Token::T_COMMA
) {
$nextFrom = $this->next( $values, $n, [ 'skip-whitespace' => true ] + $options );
@ -156,7 +167,7 @@ class Quantifier extends Matcher {
}
// If there can be more matches, push another one onto the stack
// and try it. Otherwise yield and continue with the current match.
// and try it. Otherwise, yield and continue with the current match.
if ( $canBeMore ) {
$stack[] = [ $match, $this->matcher->generateMatches( $values, $nextFrom, $options ) ];
} else {

View File

@ -11,7 +11,7 @@ use Wikimedia\CSS\Objects\Token;
/**
* Matcher that matches a token of a particular type
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-types
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-types
*/
class TokenMatcher extends Matcher {
/** @var string One of the Token::T_* constants */
@ -30,8 +30,9 @@ class TokenMatcher extends Matcher {
$this->callback = $callback;
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$cv = isset( $values[$start] ) ? $values[$start] : null;
$cv = $values[$start] ?? null;
if ( $cv instanceof Token && $cv->type() === $this->type &&
( !$this->callback || call_user_func( $this->callback, $cv ) )
) {

View File

@ -6,12 +6,15 @@
namespace Wikimedia\CSS\Grammar;
use ArrayIterator;
use EmptyIterator;
use Iterator;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Util;
/**
* Matcher that groups other matchers without ordering ("&&" and "||" combiners)
* @see https://www.w3.org/TR/2016/CR-css-values-3-20160929/#component-combinators
* @see https://www.w3.org/TR/2019/CR-css-values-3-20190606/#component-combinators
*/
class UnorderedGroup extends Matcher {
/** @var Matcher[] */
@ -48,6 +51,7 @@ class UnorderedGroup extends Matcher {
return new static( $matchers, false );
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$used = [];
@ -55,20 +59,20 @@ class UnorderedGroup extends Matcher {
// of remaining matchers.
$stack = [
[
new Match( $values, $start, 0 ),
new GrammarMatch( $values, $start, 0 ),
$this->matchers,
new \ArrayIterator( $this->matchers ),
new ArrayIterator( $this->matchers ),
null,
new \EmptyIterator
new EmptyIterator
]
];
do {
/** @var $lastMatch Match */
/** @var $lastMatch GrammarMatch */
/** @var $matchers Matcher[] */
/** @var $matcherIter \Iterator<Matcher> */
/** @var $matcherIter Iterator<Matcher> */
/** @var $curMatcher Matcher|null */
/** @var $iter \Iterator<Match> */
list( $lastMatch, $matchers, $matcherIter, $curMatcher, $iter ) = $stack[count( $stack ) - 1];
/** @var $iter Iterator<GrammarMatch> */
[ $lastMatch, $matchers, $matcherIter, $curMatcher, $iter ] = $stack[count( $stack ) - 1];
// If the top of the stack has more matches, process the next one.
if ( $iter->valid() ) {
@ -76,9 +80,9 @@ class UnorderedGroup extends Matcher {
$iter->next();
// If we have unused matchers to try after this one, do so.
// Otherwise yield and continue with the current one.
// Otherwise, yield and continue with the current one.
if ( $matchers ) {
$stack[] = [ $match, $matchers, new \ArrayIterator( $matchers ), null, new \EmptyIterator ];
$stack[] = [ $match, $matchers, new ArrayIterator( $matchers ), null, new EmptyIterator ];
} else {
$newMatch = $this->makeMatch( $values, $start, $match->getNext(), $match, $stack );
$mid = $newMatch->getUniqueID();
@ -91,7 +95,7 @@ class UnorderedGroup extends Matcher {
}
// We ran out of matches for the current top of the stack. Pop it,
// and put $curMatcher back into $matchers so it can be tried again
// and put $curMatcher back into $matchers, so it can be tried again
// at a later position.
array_pop( $stack );
if ( $curMatcher ) {
@ -109,14 +113,12 @@ class UnorderedGroup extends Matcher {
unset( $matchers[$matcherIter->key()] );
$iter = $curMatcher->generateMatches( $values, $fromPos, $options );
$stack[] = [ $lastMatch, $matchers, $matcherIter, $curMatcher, $iter ];
} else {
if ( $stack && !$this->all ) {
$newMatch = $this->makeMatch( $values, $start, $fromPos, $lastMatch, $stack );
$mid = $newMatch->getUniqueID();
if ( !isset( $used[$mid] ) ) {
$used[$mid] = 1;
yield $newMatch;
}
} elseif ( $stack && !$this->all ) {
$newMatch = $this->makeMatch( $values, $start, $fromPos, $lastMatch, $stack );
$mid = $newMatch->getUniqueID();
if ( !isset( $used[$mid] ) ) {
$used[$mid] = 1;
yield $newMatch;
}
}
} while ( $stack );

View File

@ -0,0 +1,98 @@
<?php
/**
* @file
* @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
*/
namespace Wikimedia\CSS\Grammar;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\Token;
/**
* Match the special "<urange>" notation
*
* If this matcher is marked for capturing, its matches will have submatches
* "start" and "end" holding T_NUMBER tokens representing the starting and
* ending codepoints in the range.
*
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#urange
*/
class UrangeMatcher extends Matcher {
/** @var Matcher Syntax matcher */
private $matcher;
public function __construct() {
$u = new KeywordMatcher( [ 'u' ] );
$plus = new DelimMatcher( [ '+' ] );
$ident = new TokenMatcher( Token::T_IDENT );
$number = new TokenMatcher( Token::T_NUMBER );
$dimension = new TokenMatcher( Token::T_DIMENSION );
$q = new DelimMatcher( [ '?' ] );
$qs = Quantifier::count( $q, 0, 6 );
// This matches a lot of things; we post-process in generateMatches() to limit it to
// only what's actually supposed to be accepted.
$this->matcher = new Alternative( [
new Juxtaposition( [ $u, $plus, $ident, $qs ] ),
new Juxtaposition( [ $u, $number, $dimension ] ),
new Juxtaposition( [ $u, $number, $number ] ),
new Juxtaposition( [ $u, $dimension, $qs ] ),
new Juxtaposition( [ $u, $number, $qs ] ),
new Juxtaposition( [ $u, $plus, Quantifier::count( $q, 1, 6 ) ] ),
] );
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
foreach ( $this->matcher->generateMatches( $values, $start, $options ) as $match ) {
// <urange> is basically defined as a series of tokens that happens to have a certain string
// representation. So stringify and regex it to see if it actually matches.
$v = trim( $match->__toString(), "\n\t " );
// Strip interpolated comments
$v = strtr( $v, [ '/**/' => '' ] );
$l = strlen( $v );
if ( preg_match( '/^u\+([0-9a-f]{1,6})-([0-9a-f]{1,6})$/iD', $v, $m ) ) {
$ustart = intval( $m[1], 16 );
$uend = intval( $m[2], 16 );
} elseif ( $l > 2 && $l <= 8 && preg_match( '/^u\+([0-9a-f]*\?*)$/iD', $v, $m ) ) {
$ustart = intval( strtr( $m[1], [ '?' => '0' ] ), 16 );
$uend = intval( strtr( $m[1], [ '?' => 'f' ] ), 16 );
} else {
continue;
}
if ( $ustart >= 0 && $ustart <= $uend && $uend <= 0x10ffff ) {
$len = $match->getNext() - $start;
$matches = [];
if ( $this->captureName !== null ) {
$tstart = new Token( Token::T_NUMBER, [ 'value' => $ustart, 'typeFlag' => 'integer' ] );
$tend = new Token( Token::T_NUMBER, [ 'value' => $uend, 'typeFlag' => 'integer' ] );
$matches = [
new GrammarMatch(
new ComponentValueList( $tstart->toComponentValueArray() ),
0,
1,
'start',
[]
),
new GrammarMatch(
new ComponentValueList( $tend->toComponentValueArray() ),
0,
1,
'end',
[]
),
];
}
// Mark the 'U' T_IDENT beginning a <urange>, to later avoid
// serializing it with extraneous comments.
// @see Wikimedia\CSS\Util::stringify()
// @phan-suppress-next-line PhanNonClassMethodCall False positive
$values[$start]->urangeHack( $len );
yield new GrammarMatch( $values, $start, $len, $this->captureName, $matches );
}
}
}
}

View File

@ -6,12 +6,13 @@
namespace Wikimedia\CSS\Grammar;
use InvalidArgumentException;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\CSSFunction;
use Wikimedia\CSS\Objects\Token;
/**
* Matcher that matches a CSSFunction for a url or a T_URL token
* Matcher that matches a CSSFunction for a URL or a T_URL token
*/
class UrlMatcher extends FunctionMatcher {
/** @var callable|null */
@ -28,7 +29,7 @@ class UrlMatcher extends FunctionMatcher {
if ( isset( $options['modifierMatcher'] ) ) {
$modifierMatcher = $options['modifierMatcher'];
if ( !$modifierMatcher instanceof Matcher ) {
throw new \InvalidArgumentException( 'modifierMatcher must be a Matcher' );
throw new InvalidArgumentException( 'modifierMatcher must be a Matcher' );
}
} else {
$modifierMatcher = new NothingMatcher;
@ -54,13 +55,14 @@ class UrlMatcher extends FunctionMatcher {
] );
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
// First, is it a URL token?
$cv = isset( $values[$start] ) ? $values[$start] : null;
$cv = $values[$start] ?? null;
if ( $cv instanceof Token && $cv->type() === Token::T_URL ) {
$url = $cv->value();
if ( !$this->urlCheck || call_user_func( $this->urlCheck, $url, [] ) ) {
$match = new Match( $values, $start, 1, 'url' );
$match = new GrammarMatch( $values, $start, 1, 'url' );
yield $this->makeMatch( $values, $start, $this->next( $values, $start, $options ), $match );
}
return;
@ -73,12 +75,12 @@ class UrlMatcher extends FunctionMatcher {
$modifiers = [];
foreach ( $match->getCapturedMatches() as $submatch ) {
$cvs = $submatch->getValues();
if ( $submatch->getName() === 'url' ) {
if ( $cvs[0] instanceof Token && $submatch->getName() === 'url' ) {
$url = $cvs[0]->value();
} elseif ( $submatch->getName() === 'modifier' ) {
if ( $cvs[0] instanceof CSSFunction ) {
$modifiers[] = $cvs[0];
} elseif ( $cvs[0]->type() === Token::T_IDENT ) {
} elseif ( $cvs[0] instanceof Token && $cvs[0]->type() === Token::T_IDENT ) {
$modifiers[] = $cvs[0];
}
}

View File

@ -26,9 +26,11 @@ class WhitespaceMatcher extends Matcher {
$this->significant = !empty( $options['significant'] );
}
/** @inheritDoc */
protected function generateMatches( ComponentValueList $values, $start, array $options ) {
$end = $start;
while ( isset( $values[$end] ) &&
// @phan-suppress-next-line PhanNonClassMethodCall False positive
$values[$end] instanceof Token && $values[$end]->type() === Token::T_WHITESPACE
) {
$end++;
@ -46,6 +48,7 @@ class WhitespaceMatcher extends Matcher {
if ( $end === $start ) {
$start--;
if ( !$options['skip-whitespace'] || !isset( $values[$start] ) ||
// @phan-suppress-next-line PhanNonClassMethodCall False positive
!$values[$start] instanceof Token || $values[$start]->type() !== Token::T_WHITESPACE
) {
return;
@ -54,7 +57,7 @@ class WhitespaceMatcher extends Matcher {
// Return the match. Include a 'significantWhitespace' capture.
yield $this->makeMatch( $values, $start, $end,
new Match( $values, $start, 1, 'significantWhitespace' )
new GrammarMatch( $values, $start, 1, 'significantWhitespace' )
);
}
}

View File

@ -6,6 +6,7 @@
namespace Wikimedia\CSS\Objects;
use InvalidArgumentException;
use Wikimedia\CSS\Util;
/**
@ -27,7 +28,7 @@ class AtRule extends Rule implements DeclarationOrAtRule {
*/
public function __construct( Token $token ) {
if ( $token->type() !== Token::T_AT_KEYWORD ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
"At rule must begin with an at-keyword token, got {$token->type()}"
);
}
@ -38,9 +39,9 @@ class AtRule extends Rule implements DeclarationOrAtRule {
}
public function __clone() {
$this->prelude = clone( $this->prelude );
$this->prelude = clone $this->prelude;
if ( $this->block ) {
$this->block = clone( $this->block );
$this->block = clone $this->block;
}
}
@ -83,13 +84,14 @@ class AtRule extends Rule implements DeclarationOrAtRule {
*/
public function setBlock( SimpleBlock $block = null ) {
if ( $block->getStartTokenType() !== Token::T_LEFT_BRACE ) {
throw new \InvalidArgumentException( 'At-rule block must be delimited by {}' );
throw new InvalidArgumentException( 'At-rule block must be delimited by {}' );
}
$this->block = $block;
}
/**
* @param string $function Function to call, toTokenArray() or toComponentValueArray()
* @return Token[]|ComponentValue[]
*/
private function toTokenOrCVArray( $function ) {
$ret = [];
@ -97,7 +99,7 @@ class AtRule extends Rule implements DeclarationOrAtRule {
$ret[] = new Token(
Token::T_AT_KEYWORD, [ 'value' => $this->name, 'position' => [ $this->line, $this->pos ] ]
);
// Manually looping and appending turns out to be noticably faster than array_merge.
// Manually looping and appending turns out to be noticeably faster than array_merge.
foreach ( $this->prelude->$function() as $v ) {
$ret[] = $v;
}
@ -112,10 +114,12 @@ class AtRule extends Rule implements DeclarationOrAtRule {
return $ret;
}
/** @inheritDoc */
public function toTokenArray() {
return $this->toTokenOrCVArray( __FUNCTION__ );
}
/** @inheritDoc */
public function toComponentValueArray() {
return $this->toTokenOrCVArray( __FUNCTION__ );
}

View File

@ -6,6 +6,7 @@
namespace Wikimedia\CSS\Objects;
use InvalidArgumentException;
use Wikimedia\CSS\Util;
/**
@ -24,18 +25,18 @@ class CSSFunction extends ComponentValue {
*/
public function __construct( Token $token ) {
if ( $token->type() !== Token::T_FUNCTION ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
"CSS function must begin with a function token, got {$token->type()}"
);
}
list( $this->line, $this->pos ) = $token->getPosition();
[ $this->line, $this->pos ] = $token->getPosition();
$this->name = $token->value();
$this->value = new ComponentValueList();
}
public function __clone() {
$this->value = clone( $this->value );
$this->value = clone $this->value;
}
/**
@ -48,7 +49,7 @@ class CSSFunction extends ComponentValue {
}
/**
* Return the functions's name
* Return the function's name
* @return string
*/
public function getName() {
@ -74,7 +75,7 @@ class CSSFunction extends ComponentValue {
Token::T_FUNCTION,
[ 'value' => $this->name, 'position' => [ $this->line, $this->pos ] ]
);
// Manually looping and appending turns out to be noticably faster than array_merge.
// Manually looping and appending turns out to be noticeably faster than array_merge.
foreach ( $this->value->toTokenArray() as $v ) {
$ret[] = $v;
}

View File

@ -6,12 +6,17 @@
namespace Wikimedia\CSS\Objects;
use ArrayAccess;
use Countable;
use InvalidArgumentException;
use OutOfBoundsException;
use SeekableIterator;
use Wikimedia\CSS\Util;
/**
* Represent a list of CSS objects
*/
class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSObject {
class CSSObjectList implements Countable, SeekableIterator, ArrayAccess, CSSObject {
/** @var string The specific class of object contained */
protected static $objectType;
@ -41,7 +46,7 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
/**
* Insert one or more objects into the list
* @param CSSObject|CSSObject[]|CSSObjectList $objects An object to add, or an array of objects.
* @param int $index Insert the objects at this index. If omitted, the
* @param int|null $index Insert the objects at this index. If omitted, the
* objects are added at the end.
*/
public function add( $objects, $index = null ) {
@ -53,7 +58,7 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
static::testObjects( $objects );
} else {
if ( !$objects instanceof static::$objectType ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
static::class . ' may only contain instances of ' . static::$objectType . '.'
);
}
@ -64,7 +69,7 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
if ( $index === null ) {
$index = count( $this->objects );
} elseif ( $index < 0 || $index > count( $this->objects ) ) {
throw new \OutOfBoundsException( 'Index is out of range.' );
throw new OutOfBoundsException( 'Index is out of range.' );
}
array_splice( $this->objects, $index, 0, $objects );
@ -80,7 +85,7 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
*/
public function remove( $index ) {
if ( $index < 0 || $index >= count( $this->objects ) ) {
throw new \OutOfBoundsException( 'Index is out of range.' );
throw new OutOfBoundsException( 'Index is out of range.' );
}
$ret = $this->objects[$index];
array_splice( $this->objects, $index, 1 );
@ -111,88 +116,101 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
$this->offset = 0;
}
// \Countable interface
// Countable interface
public function count() {
/** @inheritDoc */
public function count(): int {
return count( $this->objects );
}
// \SeekableIterator interface
// SeekableIterator interface
public function seek( $offset ) {
/** @inheritDoc */
public function seek( int $offset ): void {
if ( $offset < 0 || $offset >= count( $this->objects ) ) {
throw new \OutOfBoundsException( 'Offset is out of range.' );
throw new OutOfBoundsException( 'Offset is out of range.' );
}
$this->offset = $offset;
}
/** @inheritDoc */
#[\ReturnTypeWillChange]
public function current() {
return isset( $this->objects[$this->offset] ) ? $this->objects[$this->offset] : null;
return $this->objects[$this->offset] ?? null;
}
public function key() {
/** @inheritDoc */
public function key(): int {
return $this->offset;
}
public function next() {
/** @inheritDoc */
public function next(): void {
$this->offset++;
}
public function rewind() {
/** @inheritDoc */
public function rewind(): void {
$this->offset = 0;
}
public function valid() {
/** @inheritDoc */
public function valid(): bool {
return isset( $this->objects[$this->offset] );
}
// \ArrayAccess interface
// ArrayAccess interface
public function offsetExists( $offset ) {
/** @inheritDoc */
public function offsetExists( $offset ): bool {
return isset( $this->objects[$offset] );
}
public function offsetGet( $offset ) {
/** @inheritDoc */
public function offsetGet( $offset ): CSSObject {
if ( !is_numeric( $offset ) || (float)(int)$offset !== (float)$offset ) {
throw new \InvalidArgumentException( 'Offset must be an integer.' );
throw new InvalidArgumentException( 'Offset must be an integer.' );
}
if ( $offset < 0 || $offset > count( $this->objects ) ) {
throw new \OutOfBoundsException( 'Offset is out of range.' );
throw new OutOfBoundsException( 'Offset is out of range.' );
}
return $this->objects[$offset];
}
public function offsetSet( $offset, $value ) {
/** @inheritDoc */
public function offsetSet( $offset, $value ): void {
if ( !$value instanceof static::$objectType ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
static::class . ' may only contain instances of ' . static::$objectType . '.'
);
}
static::testObjects( [ $value ] );
if ( !is_numeric( $offset ) || (float)(int)$offset !== (float)$offset ) {
throw new \InvalidArgumentException( 'Offset must be an integer.' );
throw new InvalidArgumentException( 'Offset must be an integer.' );
}
if ( $offset < 0 || $offset > count( $this->objects ) ) {
throw new \OutOfBoundsException( 'Offset is out of range.' );
throw new OutOfBoundsException( 'Offset is out of range.' );
}
$this->objects[$offset] = $value;
}
public function offsetUnset( $offset ) {
/** @inheritDoc */
public function offsetUnset( $offset ): void {
if ( isset( $this->objects[$offset] ) && $offset !== count( $this->objects ) - 1 ) {
throw new \OutOfBoundsException( 'Cannot leave holes in the list.' );
throw new OutOfBoundsException( 'Cannot leave holes in the list.' );
}
unset( $this->objects[$offset] );
}
// CSSObject interface
/** @inheritDoc */
public function getPosition() {
$ret = null;
foreach ( $this->objects as $obj ) {
$pos = $obj->getPosition();
if ( $pos[0] >= 0 && (
!$ret || $pos[0] < $ret[0] || $pos[0] === $ret[0] && $pos[1] < $ret[1]
!$ret || $pos[0] < $ret[0] || ( $pos[0] === $ret[0] && $pos[1] < $ret[1] )
) ) {
$ret = $pos;
}
@ -212,27 +230,31 @@ class CSSObjectList implements \Countable, \SeekableIterator, \ArrayAccess, CSSO
/**
* @param string $function Function to call, toTokenArray() or toComponentValueArray()
* @return Token[]|ComponentValue[]
*/
private function toTokenOrCVArray( $function ) {
$ret = [];
$l = count( $this->objects );
for ( $i = 0; $i < $l; $i++ ) {
// Manually looping and appending turns out to be noticably faster than array_merge.
foreach ( $this->objects[$i]->$function() as $v ) {
foreach ( $this->objects as $i => $iValue ) {
// Manually looping and appending turns out to be noticeably faster than array_merge.
foreach ( $iValue->$function() as $v ) {
$ret[] = $v;
}
$sep = $this->getSeparator( $this->objects[$i], $i + 1 < $l ? $this->objects[$i + 1] : null );
$sep = $this->getSeparator( $iValue, $i + 1 < $l ? $this->objects[$i + 1] : null );
foreach ( $sep as $v ) {
$ret[] = $v;
}
}
return $ret;
}
/** @inheritDoc */
public function toTokenArray() {
return $this->toTokenOrCVArray( __FUNCTION__ );
}
/** @inheritDoc */
public function toComponentValueArray() {
return $this->toTokenOrCVArray( __FUNCTION__ );
}

View File

@ -11,8 +11,11 @@ namespace Wikimedia\CSS\Objects;
*/
abstract class ComponentValue implements CSSObject {
/** @var int Line and position in the input where this component value starts */
protected $line = -1, $pos = -1;
/** @var int Line in the input where this component value starts */
protected $line = -1;
/** @var int Position in the input where this component value starts */
protected $pos = -1;
/**
* Get the position of this ComponentValue in the input stream
@ -22,6 +25,7 @@ abstract class ComponentValue implements CSSObject {
return [ $this->line, $this->pos ];
}
/** @inheritDoc */
public function toComponentValueArray() {
return [ $this ];
}

View File

@ -6,12 +6,18 @@
namespace Wikimedia\CSS\Objects;
use InvalidArgumentException;
/**
* Represent a list of CSS declarations
*/
class ComponentValueList extends CSSObjectList {
/**
* @var string
*/
protected static $objectType = ComponentValue::class;
/** @inheritDoc */
protected static function testObjects( array $objects ) {
foreach ( $objects as $object ) {
$type = $object instanceof Token ? $object->type() : 'n/a';
@ -20,15 +26,16 @@ class ComponentValueList extends CSSObjectList {
case Token::T_LEFT_BRACKET:
case Token::T_LEFT_PAREN:
case Token::T_LEFT_BRACE:
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
static::class . " may not contain tokens of type \"$type\"."
);
}
}
}
// Much simpler
/** @inheritDoc */
public function toComponentValueArray() {
// Much simpler
return $this->objects;
}
}

View File

@ -6,6 +6,7 @@
namespace Wikimedia\CSS\Objects;
use InvalidArgumentException;
use Wikimedia\CSS\Util;
/**
@ -13,8 +14,11 @@ use Wikimedia\CSS\Util;
*/
class Declaration implements DeclarationOrAtRule {
/** @var int Line and position in the input where this declaration starts */
protected $line = -1, $pos = -1;
/** @var int Line in the input where this declaration starts */
protected $line = -1;
/** @var int Position in the input where this declaration starts */
protected $pos = -1;
/** @var string */
protected $name;
@ -30,18 +34,18 @@ class Declaration implements DeclarationOrAtRule {
*/
public function __construct( Token $token ) {
if ( $token->type() !== Token::T_IDENT ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
"Declaration must begin with an ident token, got {$token->type()}"
);
}
list( $this->line, $this->pos ) = $token->getPosition();
[ $this->line, $this->pos ] = $token->getPosition();
$this->name = $token->value();
$this->value = new ComponentValueList();
}
public function __clone() {
$this->value = clone( $this->value );
$this->value = clone $this->value;
}
/**
@ -86,6 +90,7 @@ class Declaration implements DeclarationOrAtRule {
/**
* @param string $function Function to call, toTokenArray() or toComponentValueArray()
* @return Token[]|ComponentValue[]
*/
private function toTokenOrCVArray( $function ) {
$ret = [];
@ -95,7 +100,7 @@ class Declaration implements DeclarationOrAtRule {
[ 'value' => $this->name, 'position' => [ $this->line, $this->pos ] ]
);
$ret[] = $v = new Token( Token::T_COLON );
// Manually looping and appending turns out to be noticably faster than array_merge.
// Manually looping and appending turns out to be noticeably faster than array_merge.
foreach ( $this->value->$function() as $v ) {
$ret[] = $v;
}
@ -109,10 +114,12 @@ class Declaration implements DeclarationOrAtRule {
return $ret;
}
/** @inheritDoc */
public function toTokenArray() {
return $this->toTokenOrCVArray( __FUNCTION__ );
}
/** @inheritDoc */
public function toComponentValueArray() {
return $this->toTokenOrCVArray( __FUNCTION__ );
}

View File

@ -10,16 +10,20 @@ namespace Wikimedia\CSS\Objects;
* Represent a list of declarations
*/
class DeclarationList extends CSSObjectList {
/**
* @var string
*/
protected static $objectType = Declaration::class;
/** @inheritDoc */
protected function getSeparator( CSSObject $left, CSSObject $right = null ) {
if ( $right ) {
return [
new Token( Token::T_SEMICOLON ),
new Token( Token::T_WHITESPACE, [ 'significant' => false ] ),
];
} else {
return [ new Token( Token::T_SEMICOLON, [ 'significant' => false ] ) ];
}
return [ new Token( Token::T_SEMICOLON, [ 'significant' => false ] ) ];
}
}

View File

@ -10,8 +10,12 @@ namespace Wikimedia\CSS\Objects;
* Represent a list of CSS declarations and at-rules
*/
class DeclarationOrAtRuleList extends CSSObjectList {
/**
* @var string
*/
protected static $objectType = DeclarationOrAtRule::class;
/** @inheritDoc */
protected function getSeparator( CSSObject $left, CSSObject $right = null ) {
$ret = [];
if ( $left instanceof Declaration ) {

View File

@ -6,6 +6,7 @@
namespace Wikimedia\CSS\Objects;
use InvalidArgumentException;
use Wikimedia\CSS\Util;
/**
@ -19,6 +20,7 @@ class QualifiedRule extends Rule {
/** @var SimpleBlock */
protected $block;
/** @inheritDoc */
public function __construct( Token $token = null ) {
parent::__construct( $token ?: new Token( Token::T_EOF ) );
$this->prelude = new ComponentValueList();
@ -26,8 +28,8 @@ class QualifiedRule extends Rule {
}
public function __clone() {
$this->prelude = clone( $this->prelude );
$this->block = clone( $this->block );
$this->prelude = clone $this->prelude;
$this->block = clone $this->block;
}
/**
@ -48,22 +50,23 @@ class QualifiedRule extends Rule {
/**
* Set the block
* @param SimpleBlock $block
* @param SimpleBlock|null $block
*/
public function setBlock( SimpleBlock $block = null ) {
if ( $block->getStartTokenType() !== Token::T_LEFT_BRACE ) {
throw new \InvalidArgumentException( 'Qualified rule block must be delimited by {}' );
throw new InvalidArgumentException( 'Qualified rule block must be delimited by {}' );
}
$this->block = $block;
}
/**
* @param string $function Function to call, toTokenArray() or toComponentValueArray()
* @return Token[]|ComponentValue[]
*/
private function toTokenOrCVArray( $function ) {
$ret = [];
// Manually looping and appending turns out to be noticably faster than array_merge.
// Manually looping and appending turns out to be noticeably faster than array_merge.
foreach ( $this->prelude->$function() as $v ) {
$ret[] = $v;
}
@ -73,10 +76,12 @@ class QualifiedRule extends Rule {
return $ret;
}
/** @inheritDoc */
public function toTokenArray() {
return $this->toTokenOrCVArray( __FUNCTION__ );
}
/** @inheritDoc */
public function toComponentValueArray() {
return $this->toTokenOrCVArray( __FUNCTION__ );
}

View File

@ -6,21 +6,22 @@
namespace Wikimedia\CSS\Objects;
use Wikimedia\CSS\Util;
/**
* Represent an abstract CSS rule
*/
abstract class Rule implements CSSObject {
/** @var int Line and position in the input where this rule starts */
protected $line = -1, $pos = -1;
/** @var int Line in the input where this rule starts */
protected $line = -1;
/** @var int Position in the input where this rule starts */
protected $pos = -1;
/**
* @param Token $token Token starting the rule
*/
public function __construct( Token $token ) {
list( $this->line, $this->pos ) = $token->getPosition();
[ $this->line, $this->pos ] = $token->getPosition();
}
/**

View File

@ -10,8 +10,12 @@ namespace Wikimedia\CSS\Objects;
* Represent a list of CSS rules
*/
class RuleList extends CSSObjectList {
/**
* @var string
*/
protected static $objectType = Rule::class;
/** @inheritDoc */
protected function getSeparator( CSSObject $left, CSSObject $right = null ) {
return $right ? [ new Token( Token::T_WHITESPACE, [ 'significant' => false ] ) ] : [];
}

View File

@ -6,6 +6,7 @@
namespace Wikimedia\CSS\Objects;
use InvalidArgumentException;
use Wikimedia\CSS\Util;
/**
@ -14,7 +15,10 @@ use Wikimedia\CSS\Util;
class SimpleBlock extends ComponentValue {
/** @var string */
protected $startTokenType, $endTokenType;
protected $startTokenType;
/** @var string */
protected $endTokenType;
/** @var ComponentValueList */
protected $value;
@ -25,18 +29,18 @@ class SimpleBlock extends ComponentValue {
public function __construct( Token $token ) {
$this->endTokenType = static::matchingDelimiter( $token->type() );
if ( $this->endTokenType === null ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
'A SimpleBlock is delimited by either {}, [], or ().'
);
}
list( $this->line, $this->pos ) = $token->getPosition();
[ $this->line, $this->pos ] = $token->getPosition();
$this->startTokenType = $token->type();
$this->value = new ComponentValueList();
}
public function __clone() {
$this->value = clone( $this->value );
$this->value = clone $this->value;
}
/**
@ -51,7 +55,7 @@ class SimpleBlock extends ComponentValue {
/**
* Return the ending delimiter for a starting delimiter
* @param string Token::T_* constant
* @param string $delim Token::T_* constant
* @return string|null Matching Token::T_* constant, if any
*/
public static function matchingDelimiter( $delim ) {
@ -91,12 +95,13 @@ class SimpleBlock extends ComponentValue {
return $this->value;
}
/** @inheritDoc */
public function toTokenArray() {
$ret = [
new Token( $this->startTokenType, [ 'position' => [ $this->line, $this->pos ] ] ),
];
// Manually looping and appending turns out to be noticably faster than array_merge.
// Manually looping and appending turns out to be noticeably faster than array_merge.
$tokens = $this->value->toTokenArray();
if ( $tokens && $this->startTokenType === Token::T_LEFT_BRACE ) {
if ( $tokens[0]->type() !== Token::T_WHITESPACE ) {

View File

@ -7,7 +7,6 @@
namespace Wikimedia\CSS\Objects;
use Wikimedia\CSS\Util;
use Wikimedia\CSS\Sanitizer\Sanitizer;
/**
* Represent a stylesheet
@ -23,14 +22,14 @@ class Stylesheet implements CSSObject {
protected $ruleList;
/**
* @param RuleList $rules
* @param RuleList|null $rules
*/
public function __construct( RuleList $rules = null ) {
$this->ruleList = $rules ?: new RuleList();
}
public function __clone() {
$this->ruleList = clone( $this->ruleList );
$this->ruleList = clone $this->ruleList;
}
/**
@ -40,15 +39,18 @@ class Stylesheet implements CSSObject {
return $this->ruleList;
}
/** @inheritDoc */
public function getPosition() {
// Stylesheets don't really have a position
return [ 0, 0 ];
}
/** @inheritDoc */
public function toTokenArray() {
return $this->ruleList->toTokenArray();
}
/** @inheritDoc */
public function toComponentValueArray() {
return $this->ruleList->toComponentValueArray();
}

View File

@ -6,42 +6,38 @@
namespace Wikimedia\CSS\Objects;
use InvalidArgumentException;
use UnexpectedValueException;
/**
* Represent a CSS token
*/
class Token extends ComponentValue {
const T_IDENT = "ident";
const T_FUNCTION = "function";
const T_AT_KEYWORD = "at-keyword";
const T_HASH = "hash";
const T_STRING = "string";
const T_BAD_STRING = "bad-string";
const T_URL = "url";
const T_BAD_URL = "bad-url";
const T_DELIM = "delim";
const T_NUMBER = "number";
const T_PERCENTAGE = "percentage";
const T_DIMENSION = "dimension";
const T_UNICODE_RANGE = "unicode-range";
const T_INCLUDE_MATCH = "include-match";
const T_DASH_MATCH = "dash-match";
const T_PREFIX_MATCH = "prefix-match";
const T_SUFFIX_MATCH = "suffix-match";
const T_SUBSTRING_MATCH = "substring-match";
const T_COLUMN = "column";
const T_WHITESPACE = "whitespace";
const T_CDO = "CDO";
const T_CDC = "CDC";
const T_COLON = "colon";
const T_SEMICOLON = "semicolon";
const T_COMMA = "comma";
const T_LEFT_BRACKET = "[";
const T_RIGHT_BRACKET = "]";
const T_LEFT_PAREN = "(";
const T_RIGHT_PAREN = ")";
const T_LEFT_BRACE = "{";
const T_RIGHT_BRACE = "}";
const T_EOF = "EOF";
public const T_IDENT = "ident";
public const T_FUNCTION = "function";
public const T_AT_KEYWORD = "at-keyword";
public const T_HASH = "hash";
public const T_STRING = "string";
public const T_BAD_STRING = "bad-string";
public const T_URL = "url";
public const T_BAD_URL = "bad-url";
public const T_DELIM = "delim";
public const T_NUMBER = "number";
public const T_PERCENTAGE = "percentage";
public const T_DIMENSION = "dimension";
public const T_WHITESPACE = "whitespace";
public const T_CDO = "CDO";
public const T_CDC = "CDC";
public const T_COLON = "colon";
public const T_SEMICOLON = "semicolon";
public const T_COMMA = "comma";
public const T_LEFT_BRACKET = "[";
public const T_RIGHT_BRACKET = "]";
public const T_LEFT_PAREN = "(";
public const T_RIGHT_PAREN = ")";
public const T_LEFT_BRACE = "{";
public const T_RIGHT_BRACE = "}";
public const T_EOF = "EOF";
/** @var string One of the T_* constants */
protected $type;
@ -58,12 +54,12 @@ class Token extends ComponentValue {
/** @var string Unit for dimension tokens */
protected $unit = '';
/** @var int Start and end for unicode-range tokens */
protected $start = 0, $end = 0;
/** @var bool Whether this token is considered "significant" */
protected $significant = true;
/** @var int See ::urangeHack() */
private $urangeHack = 0;
/**
* @param string $type One of the T_* constants
* @param string|array $value Value of the token, or an array with the
@ -78,8 +74,6 @@ class Token extends ComponentValue {
* - representation: (string) String representation of the value for
* T_NUMBER, T_PERCENTAGE, and T_DIMENSION.
* - unit: (string) Unit for T_DIMENSION.
* - start: (int) Start code point for T_UNICODE_RANGE.
* - end: (int) End code point for T_UNICODE_RANGE.
* - significant: (bool) Whether the token is considered "significant"
*/
public function __construct( $type, $value = [] ) {
@ -89,11 +83,11 @@ class Token extends ComponentValue {
if ( isset( $value['position'] ) ) {
if ( !is_array( $value['position'] ) || count( $value['position'] ) !== 2 ) {
throw new \InvalidArgumentException( 'Position must be an array of two integers' );
throw new InvalidArgumentException( 'Position must be an array of two integers' );
}
list( $this->line, $this->pos ) = $value['position'];
[ $this->line, $this->pos ] = $value['position'];
if ( !is_int( $this->line ) || !is_int( $this->pos ) ) {
throw new \InvalidArgumentException( 'Position must be an array of two integers' );
throw new InvalidArgumentException( 'Position must be an array of two integers' );
}
}
if ( isset( $value['significant'] ) ) {
@ -108,20 +102,20 @@ class Token extends ComponentValue {
case self::T_STRING:
case self::T_URL:
if ( !isset( $value['value'] ) ) {
throw new \InvalidArgumentException( "Token type $this->type requires a value" );
throw new InvalidArgumentException( "Token type $this->type requires a value" );
}
$this->value = (string)$value['value'];
break;
case self::T_HASH:
if ( !isset( $value['value'] ) ) {
throw new \InvalidArgumentException( "Token type $this->type requires a value" );
throw new InvalidArgumentException( "Token type $this->type requires a value" );
}
if ( !isset( $value['typeFlag'] ) ) {
throw new \InvalidArgumentException( "Token type $this->type requires a typeFlag" );
throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" );
}
if ( !in_array( $value['typeFlag'], [ 'id', 'unrestricted' ], true ) ) {
throw new \InvalidArgumentException( "Invalid type flag for Token type $this->type" );
throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
}
$this->value = (string)$value['value'];
$this->typeFlag = $value['typeFlag'];
@ -129,11 +123,11 @@ class Token extends ComponentValue {
case self::T_DELIM:
if ( !isset( $value['value'] ) ) {
throw new \InvalidArgumentException( "Token type $this->type requires a value" );
throw new InvalidArgumentException( "Token type $this->type requires a value" );
}
$this->value = (string)$value['value'];
if ( mb_strlen( $this->value, 'UTF-8' ) !== 1 ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
"Value for Token type $this->type must be a single character"
);
}
@ -145,32 +139,32 @@ class Token extends ComponentValue {
if ( !isset( $value['value'] ) ||
!is_numeric( $value['value'] ) || !is_finite( $value['value'] )
) {
throw new \InvalidArgumentException( "Token type $this->type requires a numeric value" );
throw new InvalidArgumentException( "Token type $this->type requires a numeric value" );
}
if ( !isset( $value['typeFlag'] ) ) {
throw new \InvalidArgumentException( "Token type $this->type requires a typeFlag" );
throw new InvalidArgumentException( "Token type $this->type requires a typeFlag" );
}
$this->typeFlag = $value['typeFlag'];
if ( $this->typeFlag === 'integer' ) {
$this->value = (int)$value['value'];
if ( (float)$this->value !== (float)$value['value'] ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
"typeFlag is 'integer', but value supplied is not an integer"
);
}
} elseif ( $this->typeFlag === 'number' ) {
$this->value = (float)$value['value'];
} else {
throw new \InvalidArgumentException( "Invalid type flag for Token type $this->type" );
throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
}
if ( isset( $value['representation'] ) ) {
if ( !is_numeric( $value['representation'] ) ) {
throw new \InvalidArgumentException( 'Representation must be numeric' );
throw new InvalidArgumentException( 'Representation must be numeric' );
}
$this->representation = $value['representation'];
if ( (float)$this->representation !== (float)$this->value ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
"Representation \"$this->representation\" does not match value \"$this->value\""
);
}
@ -178,36 +172,14 @@ class Token extends ComponentValue {
if ( $type === self::T_DIMENSION ) {
if ( !isset( $value['unit'] ) ) {
throw new \InvalidArgumentException( "Token type $this->type requires a unit" );
throw new InvalidArgumentException( "Token type $this->type requires a unit" );
}
$this->unit = $value['unit'];
}
break;
case self::T_UNICODE_RANGE:
if ( !isset( $value['start'] ) || !is_int( $value['start'] ) ) {
throw new \InvalidArgumentException(
"Token type $this->type requires a starting code point as an integer"
);
}
$this->start = $value['start'];
if ( !isset( $value['end'] ) ) {
$this->end = $this->start;
} elseif ( !is_int( $value['end'] ) ) {
throw new \InvalidArgumentException( 'Ending code point must be an integer' );
} else {
$this->end = $value['end'];
}
break;
case self::T_BAD_STRING:
case self::T_BAD_URL:
case self::T_INCLUDE_MATCH:
case self::T_DASH_MATCH:
case self::T_PREFIX_MATCH:
case self::T_SUFFIX_MATCH:
case self::T_SUBSTRING_MATCH:
case self::T_COLUMN:
case self::T_WHITESPACE:
case self::T_CDO:
case self::T_CDC:
@ -228,13 +200,13 @@ class Token extends ComponentValue {
if ( isset( $value['typeFlag'] ) && $value['typeFlag'] !== '' ) {
$this->typeFlag = $value['typeFlag'];
if ( $this->typeFlag !== 'recursion-depth-exceeded' ) {
throw new \InvalidArgumentException( "Invalid type flag for Token type $this->type" );
throw new InvalidArgumentException( "Invalid type flag for Token type $this->type" );
}
}
break;
default:
throw new \InvalidArgumentException( "Unknown token type \"$this->type\"." );
throw new InvalidArgumentException( "Unknown token type \"$this->type\"." );
}
}
@ -278,14 +250,6 @@ class Token extends ComponentValue {
return $this->unit;
}
/**
* Get the unicode range for this T_UNICODE_RANGE token
* @return array [ int $start, int $end ]
*/
public function range() {
return [ $this->start, $this->end ];
}
/**
* Whether this token is considered "significant"
*
@ -309,22 +273,24 @@ class Token extends ComponentValue {
if ( $significant === $this->significant ) {
return $this;
}
$ret = clone( $this );
$ret = clone $this;
$ret->significant = $significant;
return $ret;
}
/** @inheritDoc */
public function toTokenArray() {
return [ $this ];
}
/** @inheritDoc */
public function toComponentValueArray() {
switch ( $this->type ) {
case self::T_FUNCTION:
case self::T_LEFT_BRACKET:
case self::T_LEFT_PAREN:
case self::T_LEFT_BRACE:
throw new \UnexpectedValueException(
throw new UnexpectedValueException(
"Token type \"$this->type\" is not valid in a ComponentValueList."
);
@ -341,20 +307,54 @@ class Token extends ComponentValue {
private static function escapeIdent( $s ) {
return preg_replace_callback(
'/
[^a-zA-Z0-9_\-\x{80}-\x{10ffff}] # Characters that are never allowed
| (?:^|(?<=^-))[0-9] # Digits are not allowed at the start of an identifier
| (?<=^-)- # Two dashes are not allowed at the start of an identifier
[^a-zA-Z0-9_\-\x{80}-\x{10ffff}] # Characters that are never allowed
| (?:^|(?<=^-))[0-9] # Digits are not allowed at the start of an identifier
| [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}] # To be safe, control characters and whitespace
/ux',
function ( $m ) {
if ( $m[0] === "\n" || ctype_xdigit( $m[0] ) ) {
return sprintf( '\\%x ', ord( $m[0] ) );
}
return '\\' . $m[0];
},
[ __CLASS__, 'escapePregCallback' ],
$s
);
}
/**
* Escape characters in a string
*
* - Double quote needs escaping as the string delimiter.
* - Backslash needs escaping since it's the escape character.
* - Newline (\n) isn't valid in a string, and so needs escaping.
* - Carriage return (\r), form feed (\f), and U+0000 would be changed by
* CSS's input conversion rules, and so need escaping.
* - Other non-space whitespace and controls don't need escaping, but it's
* safer to do so.
* - Angle brackets are escaped numerically to make it safer to embed in HTML.
*
* @param string $s
* @return string
*/
private static function escapeString( $s ) {
return preg_replace_callback(
'/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}"\x5c<>]/u',
[ __CLASS__, 'escapePregCallback' ],
$s
);
}
/**
* Callback for escaping functions
* @param array $m Matches
* @return string
*/
private static function escapePregCallback( $m ) {
// Newlines, carriage returns, form feeds, and hex digits have to be
// escaped numerically. Other non-space whitespace and controls don't
// have to be, but it's saner to do so. Angle brackets are escaped
// numerically too to make it safer to embed in HTML.
if ( preg_match( '/[^ \P{Z}]|[\p{Cc}\p{Cf}\p{Co}\p{Cs}0-9a-fA-F<>]/u', $m[0] ) ) {
return sprintf( '\\%x ', mb_ord( $m[0] ) );
}
return '\\' . $m[0];
}
public function __toString() {
switch ( $this->type ) {
case self::T_IDENT:
@ -369,29 +369,26 @@ class Token extends ComponentValue {
case self::T_HASH:
if ( $this->typeFlag === 'id' ) {
return '#' . self::escapeIdent( $this->value );
} else {
return '#' . preg_replace_callback( '/[^a-zA-Z0-9_\-\x{80}-\x{10ffff}]/u', function ( $m ) {
return $m[0] === "\n" ? '\\a ' : '\\' . $m[0];
}, $this->value );
}
return '#' . preg_replace_callback(
'/
[^a-zA-Z0-9_\-\x{80}-\x{10ffff}] # Characters that are never allowed
| [\p{Z}\p{Cc}\p{Cf}\p{Co}\p{Cs}] # To be safe, control characters and whitespace
/ux',
[ __CLASS__, 'escapePregCallback' ],
$this->value
);
case self::T_STRING:
// We could try to decide whether single or double quote is
// better, but it doesn't seem worth the effort.
return '"' . strtr( $this->value, [
'"' => '\\"',
'\\' => '\\\\',
"\n" => '\\a ',
] ) . '"';
return '"' . self::escapeString( $this->value ) . '"';
case self::T_URL:
// We could try to decide whether single or double quote is
// better, but it doesn't seem worth the effort.
return 'url("' . strtr( $this->value, [
'"' => '\\"',
'\\' => '\\\\',
"\n" => '\\a ',
] ) . '")';
return 'url("' . self::escapeString( $this->value ) . '")';
case self::T_BAD_STRING:
// It's supposed to round trip, so...
@ -435,41 +432,6 @@ class Token extends ComponentValue {
return $number . $unit;
case self::T_UNICODE_RANGE:
if ( $this->start === 0 && $this->end === 0xffffff ) {
return 'U+??????';
}
$fmt = 'U+%x';
for ( $b = 0; $b < 24; $b += 4, $fmt .= '?' ) {
$mask = ( 1 << $b ) - 1;
if (
( $this->start & $mask ) === 0 &&
( $this->end & $mask ) === $mask &&
( $this->start & ~$mask ) === ( $this->end & ~$mask )
) {
return sprintf( $fmt, $this->start >> $b );
}
}
return sprintf( 'U+%x-%x', $this->start, $this->end );
case self::T_INCLUDE_MATCH:
return '~=';
case self::T_DASH_MATCH:
return '|=';
case self::T_PREFIX_MATCH:
return '^=';
case self::T_SUFFIX_MATCH:
return '$=';
case self::T_SUBSTRING_MATCH:
return '*=';
case self::T_COLUMN:
return '||';
case self::T_WHITESPACE:
return ' ';
@ -500,13 +462,13 @@ class Token extends ComponentValue {
return '';
default:
throw new \UnexpectedValueException( "Unknown token type \"$this->type\"." );
throw new UnexpectedValueException( "Unknown token type \"$this->type\"." );
}
}
/**
* Indicate whether the two tokens need to be separated
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#serialization
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#serialization
* @param Token $firstToken
* @param Token $secondToken
* @return bool
@ -516,53 +478,69 @@ class Token extends ComponentValue {
static $sepTable = [
self::T_IDENT => [
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE, self::T_CDC, self::T_LEFT_PAREN
self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC, self::T_LEFT_PAREN,
// Internet Explorer is buggy in some contexts (T191134)
self::T_HASH,
],
self::T_AT_KEYWORD => [
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE, self::T_CDC
self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
],
self::T_HASH => [
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE, self::T_CDC
self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
// Internet Explorer is buggy in some contexts (T191134)
self::T_HASH,
],
self::T_DIMENSION => [
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE, self::T_CDC
self::T_PERCENTAGE, self::T_DIMENSION, self::T_CDC,
// Internet Explorer is buggy in some contexts (T191134)
self::T_HASH,
],
'#' => [
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE
self::T_PERCENTAGE, self::T_DIMENSION,
],
'-' => [
// Add '-' here from Editor's Draft, to go with the draft's
// adding of tokens beginning with "--" that we also picked up.
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_NUMBER,
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE
self::T_PERCENTAGE, self::T_DIMENSION,
],
self::T_NUMBER => [
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, self::T_NUMBER,
self::T_PERCENTAGE, self::T_DIMENSION, self::T_UNICODE_RANGE
self::T_PERCENTAGE, self::T_DIMENSION, '%',
// Internet Explorer is buggy in some contexts
self::T_HASH,
],
'@' => [
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-', self::T_UNICODE_RANGE
],
self::T_UNICODE_RANGE => [
self::T_IDENT, self::T_FUNCTION, self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION, '?'
self::T_IDENT, self::T_FUNCTION, self::T_URL, self::T_BAD_URL, '-',
],
'.' => [ self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION ],
'+' => [ self::T_NUMBER, self::T_PERCENTAGE, self::T_DIMENSION ],
'$' => [ '=' ],
'*' => [ '=' ],
'^' => [ '=' ],
'~' => [ '=' ],
'|' => [ '=', '|' ],
'/' => [ '*' ],
];
$t1 = $firstToken->type === Token::T_DELIM ? $firstToken->value : $firstToken->type;
$t2 = $secondToken->type === Token::T_DELIM ? $secondToken->value : $secondToken->type;
$t1 = $firstToken->type === self::T_DELIM ? $firstToken->value : $firstToken->type;
$t2 = $secondToken->type === self::T_DELIM ? $secondToken->value : $secondToken->type;
return isset( $sepTable[$t1] ) && in_array( $t2, $sepTable[$t1], true );
}
/**
* Allow for marking the 'U' T_IDENT beginning a <urange>, to later avoid
* serializing it with extraneous comments.
* @internal
* @see \Wikimedia\CSS\Util::stringify()
* @see \Wikimedia\CSS\Grammar\UrangeMatcher
* @param int|null $hack Set the hack value
* @return int Current/old hack value
*/
public function urangeHack( $hack = null ) {
$ret = $this->urangeHack;
if ( $hack !== null ) {
$this->urangeHack = max( (int)$this->urangeHack, $hack );
}
return $ret;
}
}

View File

@ -6,25 +6,35 @@
namespace Wikimedia\CSS\Objects;
use UnexpectedValueException;
use Wikimedia\CSS\Parser\Parser;
/**
* Represent a list of CSS tokens
*/
class TokenList extends CSSObjectList {
/**
* @var string
*/
protected static $objectType = Token::class;
// We can greatly simplify this, assuming no separator
/** @var Token[] The objects contained */
protected $objects;
/** @inheritDoc */
public function toTokenArray() {
// We can greatly simplify this, assuming no separator
return $this->objects;
}
// This one, though, is complicated
/** @inheritDoc */
public function toComponentValueArray() {
// This one, though, is complicated
$parser = Parser::newFromTokens( $this->objects );
$ret = $parser->parseComponentValueList();
if ( $parser->getParseErrors() ) {
$ex = new \UnexpectedValueException( 'TokenList cannot be converted to a ComponentValueList' );
$ex = new UnexpectedValueException( 'TokenList cannot be converted to a ComponentValueList' );
// @phan-suppress-next-line PhanUndeclaredProperty
$ex->parseErrors = $parser->getParseErrors();
throw $ex;
}

View File

@ -11,7 +11,7 @@ namespace Wikimedia\CSS\Parser;
*/
interface DataSource {
const EOF = '';
public const EOF = '';
/**
* Read a character from the data source.

View File

@ -6,21 +6,28 @@
namespace Wikimedia\CSS\Parser;
use InvalidArgumentException;
use UnexpectedValueException;
use UtfNormal\Constants;
use UtfNormal\Utils;
use Wikimedia\CSS\Objects\Token;
/**
* Parse CSS into tokens
*
* This implements the tokenizer from the CSS Syntax Module Level 3 candidate recommendation.
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/
*/
class DataSourceTokenizer implements Tokenizer {
/** @var DataSource */
protected $source;
/** @var int position in the input */
protected $line = 1, $pos = 0;
/** @var int line in the input */
protected $line = 1;
/** @var int position in the line in the input */
protected $pos = 0;
/** @var string|null|object The most recently consumed character */
protected $currentCharacter = null;
@ -42,7 +49,7 @@ class DataSourceTokenizer implements Tokenizer {
/**
* Read a character from the data source
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#input-preprocessing
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#input-preprocessing
* @return string One UTF-8 character, or empty string on EOF
*/
protected function nextChar() {
@ -50,17 +57,19 @@ class DataSourceTokenizer implements Tokenizer {
// Perform transformations per the spec
// Any U+0000 becomes U+FFFD
if ( $char === "\0" ) {
return \UtfNormal\Constants::UTF8_REPLACEMENT;
// Any U+0000 or surrogate code point becomes U+FFFD
if ( $char === "\0" || ( $char >= "\u{D800}" && $char <= "\u{DFFF}" ) ) {
return Constants::UTF8_REPLACEMENT;
}
// Any U+000D, U+000C, or pair of U+000D + U+000A becomes U+000A
if ( $char === "\f" ) { // U+000C
if ( $char === "\f" ) {
// U+000C
return "\n";
}
if ( $char === "\r" ) { // Either U+000D + U+000A or a lone U+000D
if ( $char === "\r" ) {
// Either U+000D + U+000A or a lone U+000D
$char2 = $this->source->readCharacter();
if ( $char2 !== "\n" ) {
$this->source->putBackCharacter( $char2 );
@ -90,13 +99,13 @@ class DataSourceTokenizer implements Tokenizer {
/**
* Reconsume the next character
*
* In more normal terms, this pushes a character back onto the data source
* In more normal terms, this pushes a character back onto the data source,
* so it will be read again for the next call to self::consumeCharacter().
*/
protected function reconsumeCharacter() {
// @codeCoverageIgnoreStart
if ( !is_string( $this->currentCharacter ) ) {
throw new \UnexpectedValueException( "[$this->line:$this->pos] Can't reconsume" );
throw new UnexpectedValueException( "[$this->line:$this->pos] Can't reconsume" );
}
// @codeCoverageIgnoreEnd
@ -128,10 +137,12 @@ class DataSourceTokenizer implements Tokenizer {
return $ret;
}
/** @inheritDoc */
public function getParseErrors() {
return $this->parseErrors;
}
/** @inheritDoc */
public function clearParseErrors() {
$this->parseErrors = [];
}
@ -162,10 +173,13 @@ class DataSourceTokenizer implements Tokenizer {
/**
* Read a token from the data source
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-token
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-token
* @return Token
* @suppress PhanPluginDuplicateAdjacentStatement,PhanPluginDuplicateSwitchCaseLooseEquality
*/
public function consumeToken() {
// We "consume comments" inline below, see `case '/'`.
$this->consumeCharacter();
$pos = [ 'position' => [ $this->line, $this->pos ] ];
@ -185,7 +199,7 @@ class DataSourceTokenizer implements Tokenizer {
return $this->consumeStringToken( $this->currentCharacter, $pos );
case '#':
list( $next, $next2, $next3 ) = $this->lookAhead();
[ $next, $next2, $next3 ] = $this->lookAhead();
if ( self::isNameCharacter( $this->nextCharacter ) ||
self::isValidEscape( $next, $next2 )
) {
@ -197,31 +211,15 @@ class DataSourceTokenizer implements Tokenizer {
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
case '$':
if ( $this->nextCharacter === '=' ) {
$this->consumeCharacter();
return new Token( Token::T_SUFFIX_MATCH, $pos );
}
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
case '(':
return new Token( Token::T_LEFT_PAREN, $pos );
case ')':
return new Token( Token::T_RIGHT_PAREN, $pos );
case '*':
if ( $this->nextCharacter === '=' ) {
$this->consumeCharacter();
return new Token( Token::T_SUBSTRING_MATCH, $pos );
}
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
case '+':
case '.':
list( $next, $next2, $next3 ) = $this->lookAhead();
[ $next, $next2, ] = $this->lookAhead();
if ( self::wouldStartNumber( $this->currentCharacter, $next, $next2 ) ) {
$this->reconsumeCharacter();
return $this->consumeNumericToken( $pos );
@ -233,7 +231,7 @@ class DataSourceTokenizer implements Tokenizer {
return new Token( Token::T_COMMA, $pos );
case '-':
list( $next, $next2, $next3 ) = $this->lookAhead();
[ $next, $next2, ] = $this->lookAhead();
if ( self::wouldStartNumber( $this->currentCharacter, $next, $next2 ) ) {
$this->reconsumeCharacter();
return $this->consumeNumericToken( $pos );
@ -257,15 +255,16 @@ class DataSourceTokenizer implements Tokenizer {
$this->consumeCharacter();
$this->consumeCharacter();
while ( $this->currentCharacter !== DataSource::EOF &&
// @phan-suppress-next-line PhanSuspiciousValueComparisonInLoop
!( $this->currentCharacter === '*' && $this->nextCharacter === '/' )
) {
$this->consumeCharacter();
}
if ( $this->currentCharacter === DataSource::EOF ) {
// Parse error from the editor's draft as of 2017-01-06
$this->parseError( 'unclosed-comment', $pos );
}
$this->consumeCharacter();
// @phan-suppress-next-line PhanPossiblyInfiniteRecursionSameParams
return $this->consumeToken();
}
@ -278,7 +277,7 @@ class DataSourceTokenizer implements Tokenizer {
return new Token( Token::T_SEMICOLON, $pos );
case '<':
list( $next, $next2, $next3 ) = $this->lookAhead();
[ $next, $next2, $next3 ] = $this->lookAhead();
if ( $next === '!' && $next2 === '-' && $next3 === '-' ) {
$this->consumeCharacter();
$this->consumeCharacter();
@ -289,7 +288,7 @@ class DataSourceTokenizer implements Tokenizer {
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
case '@':
list( $next, $next2, $next3 ) = $this->lookAhead();
[ $next, $next2, $next3 ] = $this->lookAhead();
if ( self::wouldStartIdentifier( $next, $next2, $next3 ) ) {
return new Token( Token::T_AT_KEYWORD, $pos + [ 'value' => $this->consumeName() ] );
}
@ -311,14 +310,6 @@ class DataSourceTokenizer implements Tokenizer {
case ']':
return new Token( Token::T_RIGHT_BRACKET, $pos );
case '^':
if ( $this->nextCharacter === '=' ) {
$this->consumeCharacter();
return new Token( Token::T_PREFIX_MATCH, $pos );
}
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
case '{':
return new Token( Token::T_LEFT_BRACE, $pos );
@ -338,40 +329,6 @@ class DataSourceTokenizer implements Tokenizer {
$this->reconsumeCharacter();
return $this->consumeNumericToken( $pos );
case 'u':
case 'U':
if ( $this->nextCharacter === '+' ) {
list( $next, $next2 ) = $this->lookAhead();
if ( self::isHexDigit( $next2 ) || $next2 === '?' ) {
$this->consumeCharacter();
return $this->consumeUnicodeRangeToken( $pos );
}
}
$this->reconsumeCharacter();
return $this->consumeIdentLikeToken( $pos );
case '|':
if ( $this->nextCharacter === '=' ) {
$this->consumeCharacter();
return new Token( Token::T_DASH_MATCH, $pos );
}
if ( $this->nextCharacter === '|' ) {
$this->consumeCharacter();
return new Token( Token::T_COLUMN, $pos );
}
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
case '~':
if ( $this->nextCharacter === '=' ) {
$this->consumeCharacter();
return new Token( Token::T_INCLUDE_MATCH, $pos );
}
return new Token( Token::T_DELIM, $pos + [ 'value' => $this->currentCharacter ] );
case DataSource::EOF:
return new Token( Token::T_EOF, $pos );
@ -387,14 +344,14 @@ class DataSourceTokenizer implements Tokenizer {
/**
* Consume a numeric token
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-numeric-token
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-numeric-token
* @param array $data Data for the new token (typically contains just 'position')
* @return Token
*/
protected function consumeNumericToken( array $data ) {
list( $data['representation'], $data['value'], $data['typeFlag'] ) = $this->consumeNumber();
[ $data['representation'], $data['value'], $data['typeFlag'] ] = $this->consumeNumber();
list( $next, $next2, $next3 ) = $this->lookAhead();
[ $next, $next2, $next3 ] = $this->lookAhead();
if ( self::wouldStartIdentifier( $next, $next2, $next3 ) ) {
return new Token( Token::T_DIMENSION, $data + [ 'unit' => $this->consumeName() ] );
} elseif ( $this->nextCharacter === '%' ) {
@ -407,10 +364,7 @@ class DataSourceTokenizer implements Tokenizer {
/**
* Consume an ident-like token
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-an-ident-like-token
* @note Per the draft as of January 2017, quoted URLs are parsed as
* functions named 'url'. This is needed in order to implement the `<url>`
* type in the [Values specification](https://www.w3.org/TR/2016/CR-css-values-3-20160929/#urls).
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-ident-like-token
* @param array $data Data for the new token (typically contains just 'position')
* @return Token
*/
@ -422,14 +376,14 @@ class DataSourceTokenizer implements Tokenizer {
if ( !strcasecmp( $name, 'url' ) ) {
while ( true ) {
list( $next, $next2 ) = $this->lookAhead();
[ $next, $next2 ] = $this->lookAhead();
if ( !self::isWhitespace( $next ) || !self::isWhitespace( $next2 ) ) {
break;
}
$this->consumeCharacter();
}
if ( $next !== '"' && $next !== '\'' &&
!( self::isWhitespace( $next ) && ( $next2 === '"' || $next2=== '\'' ) )
!( self::isWhitespace( $next ) && ( $next2 === '"' || $next2 === '\'' ) )
) {
return $this->consumeUrlToken( $data );
}
@ -446,7 +400,7 @@ class DataSourceTokenizer implements Tokenizer {
*
* This assumes the leading quote or apostrophe has already been consumed.
*
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-string-token
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-string-token
* @param string $endChar Ending character of the string
* @param array $data Data for the new token (typically contains just 'position')
* @return Token
@ -458,7 +412,6 @@ class DataSourceTokenizer implements Tokenizer {
$this->consumeCharacter();
switch ( $this->currentCharacter ) {
case DataSource::EOF:
// Parse error from the editor's draft as of 2017-01-06
$this->parseError( 'unclosed-string', $data );
break 2;
@ -473,8 +426,6 @@ class DataSourceTokenizer implements Tokenizer {
case '\\':
if ( $this->nextCharacter === DataSource::EOF ) {
// Do nothing
// Parse error from the editor's draft as of 2017-01-06
$this->parseError( 'bad-escape' );
} elseif ( $this->nextCharacter === "\n" ) {
// Consume it
$this->consumeCharacter();
@ -482,7 +433,7 @@ class DataSourceTokenizer implements Tokenizer {
$data['value'] .= $this->consumeEscape();
} else {
// @codeCoverageIgnoreStart
throw new \UnexpectedValueException( "[$this->line:$this->pos] Unexpected state" );
throw new UnexpectedValueException( "[$this->line:$this->pos] Unexpected state" );
// @codeCoverageIgnoreEnd
}
break;
@ -493,6 +444,7 @@ class DataSourceTokenizer implements Tokenizer {
}
}
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
return new Token( Token::T_STRING, $data );
}
@ -501,8 +453,7 @@ class DataSourceTokenizer implements Tokenizer {
*
* This assumes the leading "url(" has already been consumed.
*
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-url-token
* @note Per the draft as of January 2017, this does not handle quoted URL tokens.
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-url-token
* @param array $data Data for the new token (typically contains just 'position')
* @return Token
*/
@ -516,29 +467,23 @@ class DataSourceTokenizer implements Tokenizer {
}
// 3.
if ( $this->nextCharacter === DataSource::EOF ) {
// Parse error from the editor's draft as of 2017-01-06
$this->parseError( 'unclosed-url', $data );
return new Token( Token::T_URL, $data );
}
// 4. (removed in draft, this was formerly the parsing for a quoted URL token)
// 5. (renumbered as 4 in the draft)
while ( true ) {
$this->consumeCharacter();
switch ( $this->currentCharacter ) {
case DataSource::EOF:
// Parse error from the editor's draft as of 2017-01-06
$this->parseError( 'unclosed-url', $data );
break 2;
// @codeCoverageIgnoreStart
case ')':
// @codeCoverageIgnoreEnd
break 2;
// @codeCoverageIgnoreStart
case "\n":
case "\t":
case ' ':
// @codeCoverageIgnoreEnd
while ( self::isWhitespace( $this->nextCharacter ) ) {
$this->consumeCharacter();
}
@ -546,7 +491,6 @@ class DataSourceTokenizer implements Tokenizer {
$this->consumeCharacter();
break 2;
} elseif ( $this->nextCharacter === DataSource::EOF ) {
// Parse error from the editor's draft as of 2017-01-06
$this->consumeCharacter();
$this->parseError( 'unclosed-url', $data );
break 2;
@ -554,16 +498,19 @@ class DataSourceTokenizer implements Tokenizer {
$this->consumeBadUrlRemnants();
return new Token( Token::T_BAD_URL, [ 'value' => '' ] + $data );
}
break;
// @codeCoverageIgnoreStart
case '"':
case '\'':
case '(':
// @codeCoverageIgnoreEnd
$this->parseError( 'bad-character-in-url' );
$this->consumeBadUrlRemnants();
return new Token( Token::T_BAD_URL, [ 'value' => '' ] + $data );
// @codeCoverageIgnoreStart
case '\\':
// @codeCoverageIgnoreEnd
if ( self::isValidEscape( $this->currentCharacter, $this->nextCharacter ) ) {
$data['value'] .= $this->consumeEscape();
} else {
@ -585,12 +532,13 @@ class DataSourceTokenizer implements Tokenizer {
}
}
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
return new Token( Token::T_URL, $data );
}
/**
* Clean up after finding an error in a URL
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-the-remnants-of-a-bad-url
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-remnants-of-bad-url
*/
protected function consumeBadUrlRemnants() {
while ( true ) {
@ -604,61 +552,9 @@ class DataSourceTokenizer implements Tokenizer {
}
}
/**
* Consume a unicode-range token
*
* This assumes the initial "u" has been consumed (currentCharacter is the '+'),
* and the next codepoint is verfied to be a hex digit or "?".
*
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-unicode-range-token
* @param array $data Data for the new token (typically contains just 'position')
* @return Token
*/
protected function consumeUnicodeRangeToken( array $data ) {
// 1.
$v = '';
while ( strlen( $v ) < 6 && self::isHexDigit( $this->nextCharacter ) ) {
$this->consumeCharacter();
$v .= $this->currentCharacter;
}
$anyQ = false;
while ( strlen( $v ) < 6 && $this->nextCharacter === '?' ) {
$anyQ = true;
$this->consumeCharacter();
$v .= $this->currentCharacter;
}
if ( $anyQ ) {
return new Token( Token::T_UNICODE_RANGE, $data + [
'start' => intval( str_replace( '?', '0', $v ), 16 ),
'end' => intval( str_replace( '?', 'F', $v ), 16 ),
] );
}
$data['start'] = intval( $v, 16 );
// 2.
list( $next, $next2 ) = $this->lookAhead();
if ( $next === '-' && self::isHexDigit( $next2 ) ) {
$this->consumeCharacter();
$v = '';
while ( strlen( $v ) < 6 && self::isHexDigit( $this->nextCharacter ) ) {
$this->consumeCharacter();
$v .= $this->currentCharacter;
}
$data['end'] = intval( $v, 16 );
} else {
// 3.
$data['end'] = $data['start'];
}
// 4.
return new Token( Token::T_UNICODE_RANGE, $data );
}
/**
* Indicate if a character is whitespace
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#whitespace
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#whitespace
* @param string $char A single UTF-8 character
* @return bool
*/
@ -668,7 +564,7 @@ class DataSourceTokenizer implements Tokenizer {
/**
* Indicate if a character is a name-start code point
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#name-start-code-point
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#name-start-code-point
* @param string $char A single UTF-8 character
* @return bool
*/
@ -676,14 +572,14 @@ class DataSourceTokenizer implements Tokenizer {
// Every non-ASCII character is a name start character, so we can just
// check the first byte.
$char = ord( $char );
return $char >= 0x41 && $char <= 0x5a ||
$char >= 0x61 && $char <= 0x7a ||
return ( $char >= 0x41 && $char <= 0x5a ) ||
( $char >= 0x61 && $char <= 0x7a ) ||
$char >= 0x80 || $char === 0x5f;
}
/**
* Indicate if a character is a name code point
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#name-code-point
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#name-code-point
* @param string $char A single UTF-8 character
* @return bool
*/
@ -691,15 +587,15 @@ class DataSourceTokenizer implements Tokenizer {
// Every non-ASCII character is a name character, so we can just check
// the first byte.
$char = ord( $char );
return $char >= 0x41 && $char <= 0x5a ||
$char >= 0x61 && $char <= 0x7a ||
$char >= 0x30 && $char <= 0x39 ||
return ( $char >= 0x41 && $char <= 0x5a ) ||
( $char >= 0x61 && $char <= 0x7a ) ||
( $char >= 0x30 && $char <= 0x39 ) ||
$char >= 0x80 || $char === 0x5f || $char === 0x2d;
}
/**
* Indicate if a character is non-printable
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#non-printable-code-point
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#non-printable-code-point
* @param string $char A single UTF-8 character
* @return bool
*/
@ -707,15 +603,15 @@ class DataSourceTokenizer implements Tokenizer {
// No non-ASCII character is non-printable, so we can just check the
// first byte.
$char = ord( $char );
return $char >= 0x00 && $char <= 0x08 ||
return ( $char >= 0x00 && $char <= 0x08 ) ||
$char === 0x0b ||
$char >= 0x0e && $char <= 0x1f ||
( $char >= 0x0e && $char <= 0x1f ) ||
$char === 0x7f;
}
/**
* Indicate if a character is a digit
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#digit
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#digit
* @param string $char A single UTF-8 character
* @return bool
*/
@ -728,7 +624,7 @@ class DataSourceTokenizer implements Tokenizer {
/**
* Indicate if a character is a hex digit
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#hex-digit
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#hex-digit
* @param string $char A single UTF-8 character
* @return bool
*/
@ -736,14 +632,14 @@ class DataSourceTokenizer implements Tokenizer {
// No non-ASCII character is a hex digit, so we can just check the
// first byte.
$char = ord( $char );
return $char >= 0x30 && $char <= 0x39 ||
$char >= 0x41 && $char <= 0x46 ||
$char >= 0x61 && $char <= 0x66;
return ( $char >= 0x30 && $char <= 0x39 ) ||
( $char >= 0x41 && $char <= 0x46 ) ||
( $char >= 0x61 && $char <= 0x66 );
}
/**
* Determine if two characters constitute a valid escape
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#starts-with-a-valid-escape
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#starts-with-a-valid-escape
* @param string $char1
* @param string $char2
* @return bool
@ -754,7 +650,7 @@ class DataSourceTokenizer implements Tokenizer {
/**
* Determine if three characters would start an identifier
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#would-start-an-identifier
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#would-start-an-identifier
* @param string $char1
* @param string $char2
* @param string $char3
@ -762,7 +658,6 @@ class DataSourceTokenizer implements Tokenizer {
*/
protected static function wouldStartIdentifier( $char1, $char2, $char3 ) {
if ( $char1 === '-' ) {
// Added the possibility for an itentifier beginning with "--" per the draft.
return self::isNameStartCharacter( $char2 ) || $char2 === '-' ||
self::isValidEscape( $char2, $char3 );
} elseif ( self::isNameStartCharacter( $char1 ) ) {
@ -776,7 +671,7 @@ class DataSourceTokenizer implements Tokenizer {
/**
* Determine if three characters would start a number
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#starts-with-a-number
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#starts-with-a-number
* @param string $char1
* @param string $char2
* @param string $char3
@ -785,7 +680,7 @@ class DataSourceTokenizer implements Tokenizer {
protected static function wouldStartNumber( $char1, $char2, $char3 ) {
if ( $char1 === '+' || $char1 === '-' ) {
return self::isDigit( $char2 ) ||
$char2 === '.' && self::isDigit( $char3 );
( $char2 === '.' && self::isDigit( $char3 ) );
} elseif ( $char1 === '.' ) {
return self::isDigit( $char2 );
// @codeCoverageIgnoreStart
@ -801,7 +696,7 @@ class DataSourceTokenizer implements Tokenizer {
*
* This assumes the leading backslash is consumed.
*
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-an-escaped-code-point
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-escaped-code-point
* @return string Escaped character
*/
protected function consumeEscape() {
@ -809,12 +704,6 @@ class DataSourceTokenizer implements Tokenizer {
$this->consumeCharacter();
// @codeCoverageIgnoreStart
if ( $this->currentCharacter === "\n" ) {
throw new \UnexpectedValueException( "[$this->line:$this->pos] Unexpected newline" );
}
// @codeCoverageIgnoreEnd
// 1-6 hexits, plus one optional whitespace character
if ( self::isHexDigit( $this->currentCharacter ) ) {
$num = $this->currentCharacter;
@ -827,16 +716,15 @@ class DataSourceTokenizer implements Tokenizer {
}
$num = intval( $num, 16 );
if ( $num === 0 || $num >= 0xd800 && $num <= 0xdfff || $num > 0x10ffff ) {
return \UtfNormal\Constants::UTF8_REPLACEMENT;
if ( $num === 0 || ( $num >= 0xd800 && $num <= 0xdfff ) || $num > 0x10ffff ) {
return Constants::UTF8_REPLACEMENT;
}
return \UtfNormal\Utils::codepointToUtf8( $num );
return Utils::codepointToUtf8( $num );
}
if ( $this->currentCharacter === DataSource::EOF ) {
// Parse error from the editor's draft as of 2017-01-06
$this->parseError( 'bad-escape', $position );
return \UtfNormal\Constants::UTF8_REPLACEMENT;
return Constants::UTF8_REPLACEMENT;
}
return $this->currentCharacter;
@ -849,7 +737,7 @@ class DataSourceTokenizer implements Tokenizer {
* self::wouldStartIdentifier() or the like before calling the method if
* necessary.
*
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-name
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-name
* @return string Name
*/
protected function consumeName() {
@ -863,13 +751,13 @@ class DataSourceTokenizer implements Tokenizer {
} elseif ( self::isValidEscape( $this->currentCharacter, $this->nextCharacter ) ) {
$name .= $this->consumeEscape();
} else {
$this->reconsumeCharacter(); // Doesn't say to, but breaks otherwise
return $name;
$this->reconsumeCharacter();
break;
}
}
// @codeCoverageIgnoreStart
return $name;
}
// @codeCoverageIgnoreEnd
/**
* Consume a number
@ -877,8 +765,9 @@ class DataSourceTokenizer implements Tokenizer {
* Note this does not do validation on the input stream. Call
* self::wouldStartNumber() before calling the method if necessary.
*
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-number
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-number
* @return array [ string $value, int|float $number, string $type ('integer' or 'number') ]
* @suppress PhanPluginDuplicateAdjacentStatement
*/
protected function consumeNumber() {
// 1.
@ -899,7 +788,7 @@ class DataSourceTokenizer implements Tokenizer {
// 4.
if ( $this->nextCharacter === '.' ) {
list( $next, $next2, $next3 ) = $this->lookAhead();
[ $next, $next2, ] = $this->lookAhead();
if ( self::isDigit( $next2 ) ) {
// 4.1.
$this->consumeCharacter();
@ -918,7 +807,7 @@ class DataSourceTokenizer implements Tokenizer {
// 5.
if ( $this->nextCharacter === 'e' || $this->nextCharacter === 'E' ) {
list( $next, $next2, $next3 ) = $this->lookAhead();
[ $next, $next2, $next3 ] = $this->lookAhead();
$ok = false;
if ( ( $next2 === '+' || $next2 === '-' ) && self::isDigit( $next3 ) ) {
$ok = true;
@ -948,7 +837,7 @@ class DataSourceTokenizer implements Tokenizer {
}
// 6. We assume PHP's casting follows the same rules as
// https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#convert-a-string-to-a-number
// https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#convert-string-to-number
$value = $type === 'integer' ? (int)$repr : (float)$repr;
// 7.

View File

@ -6,9 +6,15 @@
namespace Wikimedia\CSS\Parser;
use RuntimeException;
use UtfNormal\Constants;
use UtfNormal\Utils;
use Wikimedia\AtEase\AtEase;
/**
* Character set conversion for CSS
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#input-byte-stream
*
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#input-byte-stream
*/
class Encoder {
@ -96,9 +102,12 @@ class Encoder {
'iso_8859-8' => 'ISO-8859-8',
'iso_8859-8:1988' => 'ISO-8859-8',
'visual' => 'ISO-8859-8',
'csiso88598i' => 'ISO-8859-8', // ISO-8859-8-I?
'iso-8859-8-i' => 'ISO-8859-8', // ISO-8859-8-I?
'logical' => 'ISO-8859-8', // ISO-8859-8-I?
// ISO-8859-8-I?
'csiso88598i' => 'ISO-8859-8',
// ISO-8859-8-I?
'iso-8859-8-i' => 'ISO-8859-8',
// ISO-8859-8-I?
'logical' => 'ISO-8859-8',
'csisolatin6' => 'ISO-8859-10',
'iso-8859-10' => 'ISO-8859-10',
'iso-ir-157' => 'ISO-8859-10',
@ -188,15 +197,24 @@ class Encoder {
'x-cp1258' => 'Windows-1258',
'x-mac-cyrillic' => 'mac-cyrillic',
'x-mac-ukrainian' => 'mac-cyrillic',
'chinese' => 'GB18030', // GBK
'csgb2312' => 'GB18030', // GBK
'csiso58gb231280' => 'GB18030', // GBK
'gb2312' => 'GB18030', // GBK
'gb_2312' => 'GB18030', // GBK
'gb_2312-80' => 'GB18030', // GBK
'gbk' => 'GB18030', // GBK
'iso-ir-58' => 'GB18030', // GBK
'x-gbk' => 'GB18030', // GBK
// GBK
'chinese' => 'GB18030',
// GBK
'csgb2312' => 'GB18030',
// GBK
'csiso58gb231280' => 'GB18030',
// GBK
'gb2312' => 'GB18030',
// GBK
'gb_2312' => 'GB18030',
// GBK
'gb_2312-80' => 'GB18030',
// GBK
'gbk' => 'GB18030',
// GBK
'iso-ir-58' => 'GB18030',
// GBK
'x-gbk' => 'GB18030',
'gb18030' => 'GB18030',
'big5' => 'BIG-5',
'big5-hkscs' => 'BIG-5',
@ -231,6 +249,7 @@ class Encoder {
'iso-2022-cn' => 'replacement',
'iso-2022-cn-ext' => 'replacement',
'iso-2022-kr' => 'replacement',
'replacement' => 'replacement',
'utf-16be' => 'UTF-16BE',
'utf-16' => 'UTF-16LE',
'utf-16le' => 'UTF-16LE',
@ -247,7 +266,7 @@ class Encoder {
*/
public static function convert( $text, $encodings = [] ) {
// First, check for a BOM and honor that if it's present.
if ( substr( $text, 0, 3 ) === "\xef\xbb\xbf" ) {
if ( strpos( $text, "\xef\xbb\xbf" ) === 0 ) {
// UTF-8 with BOM (convert it anyway in case the BOM is a lie)
return self::doConvert( 'UTF-8', substr( $text, 3 ) );
}
@ -300,13 +319,13 @@ class Encoder {
protected static function doConvert( $encoding, $text ) {
// Pseudo-encoding that just outputs one replacement character
if ( $encoding === 'replacement' ) {
return \UtfNormal\Constants::UTF8_REPLACEMENT;
return Constants::UTF8_REPLACEMENT;
}
// Pseudo-encoding that shifts non-ASCII bytes to the BMP private use area
if ( $encoding === 'x-user-defined' ) {
return preg_replace_callback( '/[\x80-\xff]/', function ( $m ) {
return \UtfNormal\Utils::codepointToUtf8( 0xf700 + ord( $m[0] ) );
return preg_replace_callback( '/[\x80-\xff]/', static function ( $m ) {
return Utils::codepointToUtf8( 0xf700 + ord( $m[0] ) );
}, $text );
}
@ -315,15 +334,15 @@ class Encoder {
// some encodings mbstring doesn't support.
if ( in_array( $encoding, mb_list_encodings(), true ) ) {
$old = mb_substitute_character();
mb_substitute_character( \UtfNormal\Constants::UNICODE_REPLACEMENT );
mb_substitute_character( Constants::UNICODE_REPLACEMENT );
$text = mb_convert_encoding( $text, 'UTF-8', $encoding );
mb_substitute_character( $old );
return $text;
}
$ret = \MediaWiki\quietCall( 'iconv', $encoding, 'UTF-8', $text );
$ret = AtEase::quietCall( 'iconv', $encoding, 'UTF-8', $text );
if ( $ret === false ) {
throw new \RuntimeException( "Cannot convert '$text' from $encoding" );
throw new RuntimeException( "Cannot convert '$text' from $encoding" );
}
return $ret;
}

View File

@ -7,29 +7,28 @@
namespace Wikimedia\CSS\Parser;
use Wikimedia\CSS\Objects\AtRule;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\ComponentValue;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\CSSFunction;
use Wikimedia\CSS\Objects\Declaration;
use Wikimedia\CSS\Objects\DeclarationList;
use Wikimedia\CSS\Objects\DeclarationOrAtRuleList;
use Wikimedia\CSS\Objects\Declaration;
use Wikimedia\CSS\Objects\QualifiedRule;
use Wikimedia\CSS\Objects\Rule;
use Wikimedia\CSS\Objects\RuleList;
use Wikimedia\CSS\Objects\SimpleBlock;
use Wikimedia\CSS\Objects\Stylesheet;
use Wikimedia\CSS\Objects\Token;
use Wikimedia\CSS\Sanitizer\Sanitizer;
// Note: While reading the code below, you might find that my calls to
// consumeToken() don't match what the spec says and I don't ever "reconsume" a
// consumeToken() don't match what the spec says, and I don't ever "reconsume" a
// token. It turns out that the spec is overcomplicated and confused with
// respect to the "current input token" and the "next input token". It turns
// out things are pretty simple: every "consume an X" is called with the
// current input token being the first token of X, and returns with the current
// input token being the last token of X (or EOF if X ends at EOF).
// Also of note is that, since our Tokenizer can only return a stream of tokens
// Also, of note is that, since our Tokenizer can only return a stream of tokens
// rather than a stream of component values, the consume functions here only
// consider tokens. ComponentValueList::toTokenArray() may be used to convert a
// list of component values to a list of tokens if necessary.
@ -38,15 +37,19 @@ use Wikimedia\CSS\Sanitizer\Sanitizer;
* Parse CSS into a structure for further processing.
*
* This implements the CSS Syntax Module Level 3 candidate recommendation.
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/
*
* The usual entry points are:
* - Parser::parseStylesheet() to parse a stylesheet or the contents of a <style> tag.
* - Parser::parseDeclarationList() to parse an inline style attribute
*/
class Parser {
/** Maximum depth of nested ComponentValues */
const CV_DEPTH_LIMIT = 100; // Arbitrary number that seems like it should be enough
/**
* Maximum depth of nested ComponentValues
*
* Arbitrary number that seems like it should be enough
*/
private const CV_DEPTH_LIMIT = 100;
/** @var Tokenizer */
protected $tokenizer;
@ -151,57 +154,47 @@ class Parser {
* @param array $data Extra data about the error.
*/
protected function parseError( $tag, Token $token, array $data = [] ) {
list( $line, $pos ) = $token->getPosition();
[ $line, $pos ] = $token->getPosition();
$this->parseErrors[] = array_merge( [ $tag, $line, $pos ], $data );
}
/**
* Parse a stylesheet
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-stylesheet
* @note Per the Editor's Draft, if the first rule is an at-rule named
* "charset" it will be silently dropped. If you're not using the provided
* Sanitizer classes to further sanitize the CSS, you'll want to manually
* filter out any other such rules before stringifying the stylesheet
* and/or prepend `@charset "utf-8";` after stringifying it.
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-stylesheet
* @return Stylesheet
*/
public function parseStylesheet() {
$this->consumeToken(); // Move to the first token
// Move to the first token
$this->consumeToken();
$list = $this->consumeRuleList( true );
// Drop @charset per the Editor's Draft
if ( isset( $list[0] ) && $list[0] instanceof AtRule &&
!strcasecmp( $list[0]->getName(), 'charset' )
) {
$list->remove( 0 );
$list->rewind();
}
return new Stylesheet( $list );
}
/**
* Parse a list of rules
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-list-of-rules
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-list-of-rules
* @return RuleList
*/
public function parseRuleList() {
$this->consumeToken(); // Move to the first token
// Move to the first token
$this->consumeToken();
return $this->consumeRuleList( false );
}
/**
* Parse a rule
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-rule
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-rule
* @return Rule|null
*/
public function parseRule() {
// 1. and 2.
// 1.
$this->consumeTokenAndWhitespace();
// 3.
// 2.
if ( $this->currentToken->type() === Token::T_EOF ) {
$this->parseError( 'unexpected-eof', $this->currentToken ); // "return a syntax error"?
// "return a syntax error"?
$this->parseError( 'unexpected-eof', $this->currentToken );
return null;
}
@ -214,39 +207,39 @@ class Parser {
}
}
// 4.
// 3.
$this->consumeTokenAndWhitespace();
// 5.
// 4.
if ( $this->currentToken->type() === Token::T_EOF ) {
return $rule;
} else {
$this->parseError( 'expected-eof', $this->currentToken ); // "return a syntax error"?
return null;
}
// "return a syntax error"?
$this->parseError( 'expected-eof', $this->currentToken );
return null;
}
/**
* Parse a declaration
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-declaration
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-declaration
* @return Declaration|null
*/
public function parseDeclaration() {
// 1. and 2.
// 1.
$this->consumeTokenAndWhitespace();
// 3.
// 2.
if ( $this->currentToken->type() !== Token::T_IDENT ) {
$this->parseError( 'expected-ident', $this->currentToken ); // "return a syntax error"?
// "return a syntax error"?
$this->parseError( 'expected-ident', $this->currentToken );
return null;
}
// 4.
$declaration = $this->consumeDeclaration();
// 3.
// Declarations always run to EOF, no need to check.
return $declaration;
return $this->consumeDeclaration();
}
/**
@ -256,63 +249,66 @@ class Parser {
* @return DeclarationList
*/
public function parseDeclarationList() {
$this->consumeToken(); // Move to the first token
// Move to the first token
$this->consumeToken();
return $this->consumeDeclarationOrAtRuleList( false );
}
/**
* Parse a list of declarations and at-rules
* @note This is the entry point the standard calls "parse a list of declarations"
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-list-of-declarations
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-list-of-declarations
* @return DeclarationOrAtRuleList
*/
public function parseDeclarationOrAtRuleList() {
$this->consumeToken(); // Move to the first token
// Move to the first token
$this->consumeToken();
return $this->consumeDeclarationOrAtRuleList();
}
/**
* Parse a (non-whitespace) component value
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-component-value
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-component-value
* @return ComponentValue|null
*/
public function parseComponentValue() {
// 1. and 2.
// 1.
$this->consumeTokenAndWhitespace();
// 2.
if ( $this->currentToken->type() === Token::T_EOF ) {
// "return a syntax error"?
$this->parseError( 'unexpected-eof', $this->currentToken );
return null;
}
// 3.
if ( $this->currentToken->type() === Token::T_EOF ) {
$this->parseError( 'unexpected-eof', $this->currentToken ); // "return a syntax error"?
return null;
}
$value = $this->consumeComponentValue();
// 4.
$value = $this->consumeComponentValue();
// The spec says to return a syntax error if nothing is returned, but
// that can never happen and the Editor's Draft removed that language.
// 5.
$this->consumeTokenAndWhitespace();
// 6.
// 5.
if ( $this->currentToken->type() === Token::T_EOF ) {
return $value;
} else {
$this->parseError( 'expected-eof', $this->currentToken ); // "return a syntax error"?
return null;
}
// "return a syntax error"?
$this->parseError( 'expected-eof', $this->currentToken );
return null;
}
/**
* Parse a list of component values
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#parse-a-list-of-component-values
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-list-of-component-values
* @return ComponentValueList
*/
public function parseComponentValueList() {
$list = new ComponentValueList();
while ( true ) {
$this->consumeToken(); // Move to the first/next token
// Move to the first/next token
$this->consumeToken();
$value = $this->consumeComponentValue();
if ( $value instanceof Token && $value->type() === Token::T_EOF ) {
break;
@ -323,14 +319,42 @@ class Parser {
return $list;
}
/**
* Parse a comma-separated list of component values
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#parse-comma-separated-list-of-component-values
* @return ComponentValueList[]
*/
public function parseCommaSeparatedComponentValueList() {
$lists = [];
do {
$list = new ComponentValueList();
while ( true ) {
// Move to the first/next token
$this->consumeToken();
$value = $this->consumeComponentValue();
if ( $value instanceof Token &&
( $value->type() === Token::T_EOF || $value->type() === Token::T_COMMA )
) {
break;
}
$list->add( $value );
}
$lists[] = $list;
} while ( $value->type() === Token::T_COMMA );
return $lists;
}
/**
* Consume a list of rules
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-list-of-rules
* @param boolean $topLevel Determines the behavior when CDO and CDC tokens are encountered
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-list-of-rules
* @param bool $topLevel Determines the behavior when CDO and CDC tokens are encountered
* @return RuleList
*/
protected function consumeRuleList( $topLevel ) {
// @phan-suppress-previous-line PhanPluginNeverReturnMethod
$list = new RuleList();
// @phan-suppress-next-line PhanInfiniteLoop
while ( true ) {
$rule = false;
switch ( $this->currentToken->type() ) {
@ -342,11 +366,10 @@ class Parser {
case Token::T_CDO:
case Token::T_CDC:
if ( $topLevel ) {
// Do nothing
} else {
if ( !$topLevel ) {
$rule = $this->consumeQualifiedRule();
}
// Else, do nothing
break;
case Token::T_AT_KEYWORD:
@ -364,18 +387,21 @@ class Parser {
$this->consumeToken();
}
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
return $list;
}
/**
* Consume a list of declarations and at-rules
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-list-of-declarations
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-list-of-declarations
* @param bool $allowAtRules Whether to allow at-rules. This flag is not in
* the spec, and is used to implement the non-spec self::parseDeclarationList().
* the spec and is used to implement the non-spec self::parseDeclarationList().
* @return DeclarationOrAtRuleList|DeclarationList
*/
protected function consumeDeclarationOrAtRuleList( $allowAtRules = true ) {
// @phan-suppress-previous-line PhanPluginNeverReturnMethod
$list = $allowAtRules ? new DeclarationOrAtRuleList() : new DeclarationList();
// @phan-suppress-next-line PhanInfiniteLoop
while ( true ) {
$declaration = false;
switch ( $this->currentToken->type() ) {
@ -400,7 +426,6 @@ class Parser {
break;
case Token::T_IDENT:
// The draft changes this to ComponentValue instead of Token, which makes more sense.
$cvs = [];
do {
$cvs[] = $this->consumeComponentValue();
@ -411,7 +436,8 @@ class Parser {
);
$tokens = ( new ComponentValueList( $cvs ) )->toTokenArray();
$parser = static::newFromTokens( $tokens, $this->currentToken );
$parser->consumeToken(); // Load that first token
// Load that first token
$parser->consumeToken();
$declaration = $parser->consumeDeclaration();
// Propagate any errors
$this->parseErrors = array_merge( $this->parseErrors, $parser->parseErrors );
@ -436,32 +462,32 @@ class Parser {
$this->consumeToken();
}
// @phan-suppress-next-line PhanPluginUnreachableCode Reached by break 2
return $list;
}
/**
* Consume a declaration
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-declaration
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-declaration
* @return Declaration|null
*/
protected function consumeDeclaration() {
$declaration = new Declaration( $this->currentToken );
// 2.
// 1.
$this->consumeTokenAndWhitespace();
// 3.
// 2. and 3.
if ( $this->currentToken->type() !== Token::T_COLON ) {
$this->parseError( 'expected-colon', $this->currentToken );
return null;
}
$this->consumeToken();
$this->consumeTokenAndWhitespace();
// 4.
$value = $declaration->getValue();
$l1 = $l2 = -1;
while ( $this->currentToken->type() !== Token::T_EOF ) {
// The draft changes this to ComponentValue instead of Token, which makes more sense.
$value->add( $this->consumeComponentValue() );
if ( $this->currentToken->type() !== Token::T_WHITESPACE ) {
$l1 = $l2;
@ -470,48 +496,62 @@ class Parser {
$this->consumeToken();
}
// 5.
// 5. and part of 6.
// @phan-suppress-next-line PhanSuspiciousValueComparison False positive about $l1 is -1
$v1 = $l1 >= 0 ? $value[$l1] : null;
$v2 = $l2 >= 0 ? $value[$l2] : null;
if ( $v1 instanceof Token && $v1->type() === Token::T_DELIM && $v1->value() === '!' &&
$v2 instanceof Token && $v2->type() === Token::T_IDENT &&
if ( $v1 instanceof Token &&
$v1->type() === Token::T_DELIM &&
$v1->value() === '!' &&
$v2 instanceof Token &&
$v2->type() === Token::T_IDENT &&
!strcasecmp( $v2->value(), 'important' )
) {
// Technically it doesn't say to remove any whitespace within/after
// the "!important" too, but it makes sense to do so.
// This removes the "!" and "important" (5), and also any whitespace between/after (6)
while ( isset( $value[$l1] ) ) {
$value->remove( $l1 );
}
$declaration->setImportant( true );
}
// 6.
// Rest of 6.
$i = $value->count();
// @phan-suppress-next-line PhanNonClassMethodCall False positive
while ( --$i >= 0 && $value[$i] instanceof Token && $value[$i]->type() === Token::T_WHITESPACE ) {
$value->remove( $i );
}
// 7.
return $declaration;
}
/**
* Consume an at-rule
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-an-at-rule
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-at-rule
* @return AtRule
* @suppress PhanPluginNeverReturnMethod due to break 2;
*/
protected function consumeAtRule() {
$rule = new AtRule( $this->currentToken );
$this->consumeToken();
// @phan-suppress-next-line PhanInfiniteLoop
while ( true ) {
switch ( $this->currentToken->type() ) {
case Token::T_SEMICOLON:
return $rule;
break 2;
case Token::T_EOF:
// Parse error from the editor's draft as of 2017-01-11
if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
$this->parseError( 'unexpected-eof-in-rule', $this->currentToken );
}
return $rule;
break 2;
case Token::T_LEFT_BRACE:
$rule->setBlock( $this->consumeSimpleBlock( true ) );
return $rule;
$rule->setBlock( $this->consumeSimpleBlock() );
break 2;
// Spec has "simple block with an associated token of <{-token>" here, but that isn't possible
// because it's not a Token.
default:
$rule->getPrelude()->add( $this->consumeComponentValue() );
@ -519,13 +559,14 @@ class Parser {
}
$this->consumeToken();
}
// @codeCoverageIgnoreStart
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
return $rule;
}
// @codeCoverageIgnoreEnd
/**
* Consume a qualified rule
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-qualified-rule
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-qualified-rule
* @return QualifiedRule|null
*/
protected function consumeQualifiedRule() {
@ -539,8 +580,11 @@ class Parser {
return null;
case Token::T_LEFT_BRACE:
$rule->setBlock( $this->consumeSimpleBlock( true ) );
return $rule;
$rule->setBlock( $this->consumeSimpleBlock() );
break 2;
// Spec has "simple block with an associated token of <{-token>" here, but that isn't possible
// because it's not a Token.
default:
$rule->getPrelude()->add( $this->consumeComponentValue() );
@ -548,13 +592,14 @@ class Parser {
}
$this->consumeToken();
}
// @codeCoverageIgnoreStart
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
return $rule;
}
// @codeCoverageIgnoreEnd
/**
* Consume a component value
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-component-value
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-component-value
* @return ComponentValue
*/
protected function consumeComponentValue() {
@ -562,7 +607,7 @@ class Parser {
$this->parseError( 'recursion-depth-exceeded', $this->currentToken );
// There's no way to safely recover from this without more recursion.
// So just eat the rest of the input, then return a
// specially-flagged EOF so we can avoid 100 "unexpected EOF"
// specially-flagged EOF, so we can avoid 100 "unexpected EOF"
// errors.
$position = $this->currentToken->getPosition();
while ( $this->currentToken->type() !== Token::T_EOF ) {
@ -591,29 +636,31 @@ class Parser {
}
$this->cvDepth--;
// @phan-suppress-next-line PhanTypeMismatchReturnNullable $ret always set
return $ret;
}
/**
* Consume a simple block
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-simple-block
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-simple-block
* @return SimpleBlock
* @suppress PhanPluginNeverReturnMethod due to break 2;
*/
protected function consumeSimpleBlock() {
$block = new SimpleBlock( $this->currentToken );
$endTokenType = $block->getEndTokenType();
$this->consumeToken();
// @phan-suppress-next-line PhanInfiniteLoop
while ( true ) {
switch ( $this->currentToken->type() ) {
case Token::T_EOF:
// Parse error from the editor's draft as of 2017-01-12
if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
$this->parseError( 'unexpected-eof-in-block', $this->currentToken );
}
return $block;
break 2;
case $endTokenType:
return $block;
break 2;
default:
$block->getValue()->add( $this->consumeComponentValue() );
@ -621,30 +668,32 @@ class Parser {
}
$this->consumeToken();
}
// @codeCoverageIgnoreStart
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
return $block;
}
// @codeCoverageIgnoreEnd
/**
* Consume a function
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#consume-a-function
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#consume-function
* @return CSSFunction
* @suppress PhanPluginNeverReturnMethod due to break 2;
*/
protected function consumeFunction() {
$function = new CSSFunction( $this->currentToken );
$this->consumeToken();
// @phan-suppress-next-line PhanInfiniteLoop
while ( true ) {
switch ( $this->currentToken->type() ) {
case Token::T_EOF:
// Parse error from the editor's draft as of 2017-01-12
if ( $this->currentToken->typeFlag() !== 'recursion-depth-exceeded' ) {
$this->parseError( 'unexpected-eof-in-function', $this->currentToken );
}
return $function;
break 2;
case Token::T_RIGHT_PAREN:
return $function;
break 2;
default:
$function->getValue()->add( $this->consumeComponentValue() );
@ -652,7 +701,10 @@ class Parser {
}
$this->consumeToken();
}
// @codeCoverageIgnoreStart
// @phan-suppress-next-line PhanPluginUnreachableCode False positive due to break 2;
return $function;
}
// @codeCoverageIgnoreEnd
}

View File

@ -6,6 +6,9 @@
namespace Wikimedia\CSS\Parser;
use InvalidArgumentException;
use UnexpectedValueException;
/**
* Read data for the CSS parser
*/
@ -15,7 +18,10 @@ class StringDataSource implements DataSource {
protected $string;
/** @var int */
protected $len = 0, $pos = 0;
protected $len = 0;
/** @var int */
protected $pos = 0;
/** @var string[] */
protected $putBack = [];
@ -24,28 +30,15 @@ class StringDataSource implements DataSource {
* @param string $string Input string. Must be valid UTF-8 with no BOM.
*/
public function __construct( $string ) {
static $newPHP;
$this->string = (string)$string;
$this->len = strlen( $this->string );
// HHVM 3.4 and older come with an outdated version of libmbfl that
// incorrectly allows values above U+10FFFF, so we have to check
// for them separately. (This issue also exists in PHP 5.3 and
// older, which are no longer supported.)
// @codeCoverageIgnoreStart
if ( $newPHP === null ) {
$newPHP = !mb_check_encoding( "\xf4\x90\x80\x80", 'UTF-8' );
}
// @codeCoverageIgnoreEnd
if ( !mb_check_encoding( $this->string, 'UTF-8' ) ||
!$newPHP && preg_match( "/\xf4[\x90-\xbf]|[\xf5-\xff]/S", $this->string ) !== 0
) {
throw new \InvalidArgumentException( '$string is not valid UTF-8' );
if ( !mb_check_encoding( $this->string, 'UTF-8' ) ) {
throw new InvalidArgumentException( '$string is not valid UTF-8' );
}
}
/** @inheritDoc */
public function readCharacter() {
if ( $this->putBack ) {
return array_pop( $this->putBack );
@ -61,7 +54,7 @@ class StringDataSource implements DataSource {
$c = $this->string[$p];
$cc = ord( $this->string[$p] );
if ( $cc <= 0x7f ) {
$this->pos += 1;
$this->pos++;
return $c;
} elseif ( ( $cc & 0xe0 ) === 0xc0 ) {
$this->pos += 2;
@ -76,13 +69,14 @@ class StringDataSource implements DataSource {
// WTF? Should never get here because it should have failed
// validation in the constructor.
// @codeCoverageIgnoreStart
throw new \UnexpectedValueException(
throw new UnexpectedValueException(
sprintf( 'Unexpected byte %02X in string at position %d.', $cc, $this->pos )
);
// @codeCoverageIgnoreEnd
}
}
/** @inheritDoc */
public function putBackCharacter( $char ) {
if ( $char !== self::EOF ) {
$this->putBack[] = $char;

View File

@ -6,9 +6,10 @@
namespace Wikimedia\CSS\Parser;
use Wikimedia\CSS\Util;
use InvalidArgumentException;
use Wikimedia\CSS\Objects\Token;
use Wikimedia\CSS\Objects\TokenList;
use Wikimedia\CSS\Util;
/**
* Tokenizer that just returns a predefined list of tokens
@ -32,7 +33,7 @@ class TokenListTokenizer implements Tokenizer {
Util::assertAllInstanceOf( $tokens, Token::class, '$tokens' );
$this->tokens = $tokens;
} else {
throw new \InvalidArgumentException( '$tokens must be a TokenList or an array of tokens' );
throw new InvalidArgumentException( '$tokens must be a TokenList or an array of tokens' );
}
if ( $eof && $eof->type() === Token::T_EOF ) {
@ -46,13 +47,16 @@ class TokenListTokenizer implements Tokenizer {
}
}
/** @inheritDoc */
public function getParseErrors() {
return [];
}
/** @inheritDoc */
public function clearParseErrors() {
}
/** @inheritDoc */
public function consumeToken() {
return array_shift( $this->tokens ) ?: $this->eof;
}

View File

@ -14,6 +14,7 @@ use Wikimedia\CSS\Grammar\MatcherFactory;
use Wikimedia\CSS\Grammar\Quantifier;
use Wikimedia\CSS\Grammar\TokenMatcher;
use Wikimedia\CSS\Grammar\UnorderedGroup;
use Wikimedia\CSS\Grammar\UrangeMatcher;
use Wikimedia\CSS\Objects\AtRule;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Objects\Rule;
@ -22,7 +23,7 @@ use Wikimedia\CSS\Util;
/**
* Sanitizes a CSS \@font-face rule
* @see https://www.w3.org/TR/2013/CR-css-fonts-3-20131003/#font-resources
* @see https://www.w3.org/TR/2018/REC-css-fonts-3-20180920/#font-resources
*/
class FontFaceAtRuleSanitizer extends RuleSanitizer {
@ -52,13 +53,7 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
new KeywordMatcher( [ 'normal', 'bold' ] ), $matchData['numWeight']
] ),
'font-stretch' => $matchData['font-stretch'],
'unicode-range' => Quantifier::hash(
new TokenMatcher( Token::T_UNICODE_RANGE, function ( Token $t ) {
list( $start, $end ) = $t->range();
return $start <= $end && $end <= 0x10ffff;
} )
),
'font-variant' => $matchData['font-variant'],
'unicode-range' => Quantifier::hash( new UrangeMatcher() ),
'font-feature-settings' => $matchData['font-feature-settings'],
] );
}
@ -69,14 +64,12 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
* @return array
*/
public static function fontMatchData( MatcherFactory $matcherFactory ) {
$featureValueName = $matcherFactory->ident();
$featureValueNameHash = Quantifier::hash( $featureValueName );
$ret = [
'familyName' => new Alternative( [
$matcherFactory->string(),
Quantifier::plus( $matcherFactory->ident() ),
] ),
'numWeight' => new TokenMatcher( Token::T_NUMBER, function ( Token $t ) {
'numWeight' => new TokenMatcher( Token::T_NUMBER, static function ( Token $t ) {
return $t->typeFlag() === 'integer' && preg_match( '/^[1-9]00$/', $t->representation() );
} ),
'font-style' => new KeywordMatcher( [ 'normal', 'italic', 'oblique' ] ),
@ -87,7 +80,7 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
'font-feature-settings' => new Alternative( [
new KeywordMatcher( 'normal' ),
Quantifier::hash( new Juxtaposition( [
new TokenMatcher( Token::T_STRING, function ( Token $t ) {
new TokenMatcher( Token::T_STRING, static function ( Token $t ) {
return preg_match( '/^[\x20-\x7e]{4}$/', $t->value() );
} ),
Quantifier::optional( new Alternative( [
@ -102,15 +95,6 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
new KeywordMatcher( [ 'historical-ligatures', 'no-historical-ligatures' ] ),
new KeywordMatcher( [ 'contextual', 'no-contextual' ] )
],
'alt' => [
new FunctionMatcher( 'stylistic', $featureValueName ),
new KeywordMatcher( 'historical-forms' ),
new FunctionMatcher( 'styleset', $featureValueNameHash ),
new FunctionMatcher( 'character-variant', $featureValueNameHash ),
new FunctionMatcher( 'swash', $featureValueName ),
new FunctionMatcher( 'ornaments', $featureValueName ),
new FunctionMatcher( 'annotation', $featureValueName ),
],
'capsKeywords' => [
'small-caps', 'all-small-caps', 'petite-caps', 'all-petite-caps', 'unicase', 'titling-caps'
],
@ -125,27 +109,32 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
new KeywordMatcher( [ 'jis78', 'jis83', 'jis90', 'jis04', 'simplified', 'traditional' ] ),
new KeywordMatcher( [ 'full-width', 'proportional-width' ] ),
new KeywordMatcher( 'ruby' ),
]
],
'positionKeywords' => [
'sub', 'super',
],
];
$ret['font-variant'] = new Alternative( [
new KeywordMatcher( [ 'normal', 'none' ] ),
UnorderedGroup::someOf( array_merge(
$ret['ligatures'],
$ret['alt'],
[ new KeywordMatcher( $ret['capsKeywords'] ) ],
$ret['numeric'],
$ret['eastAsian']
$ret['eastAsian'],
[ new KeywordMatcher( $ret['positionKeywords'] ) ]
) )
] );
return $ret;
}
/** @inheritDoc */
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'font-face' );
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-at-rule', $object, [ 'font-face' ] );
return null;
}
@ -161,7 +150,7 @@ class FontFaceAtRuleSanitizer extends RuleSanitizer {
return null;
}
$ret = clone( $object );
$ret = clone $object;
$this->fixPreludeWhitespace( $ret, false );
$this->sanitizeDeclarationBlock( $ret->getBlock(), $this->propertySanitizer );

View File

@ -1,83 +0,0 @@
<?php
/**
* @file
* @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
*/
namespace Wikimedia\CSS\Sanitizer;
use Wikimedia\CSS\Grammar\Matcher;
use Wikimedia\CSS\Objects\AtRule;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\DeclarationList;
use Wikimedia\CSS\Objects\Rule;
use Wikimedia\CSS\Parser\Parser;
use Wikimedia\CSS\Util;
/**
* Sanitizes a feature-value at-rule inside a CSS \@font-feature-values rule
* @see https://www.w3.org/TR/2013/CR-css-fonts-3-20131003/#at-font-feature-values-rule
*/
class FontFeatureValueAtRuleSanitizer extends RuleSanitizer {
/** @var string */
protected $name;
/** @var Matcher */
protected $valueMatcher;
/**
* @param string $name
* @param Matcher $valueMatcher
*/
public function __construct( $name, Matcher $valueMatcher ) {
$this->name = $name;
$this->valueMatcher = $valueMatcher;
}
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), $this->name );
}
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-at-rule', $object, [ $this->name ] );
return null;
}
if ( $object->getBlock() === null ) {
$this->sanitizationError( 'at-rule-block-required', $object, [ $this->name ] );
return null;
}
// No non-whitespace prelude allowed
if ( Util::findFirstNonWhitespace( $object->getPrelude() ) ) {
$this->sanitizationError( 'invalid-font-feature-value', $object, [ $this->name ] );
return null;
}
$ret = clone( $object );
$this->fixPreludeWhitespace( $ret, false );
// Parse the block's contents into a list of declarations, sanitize it,
// and put it back into the block.
$blockContents = $ret->getBlock()->getValue();
$parser = Parser::newFromTokens( $blockContents->toTokenArray() );
$oldDeclarations = $parser->parseDeclarationList();
$this->sanitizationErrors = array_merge( $this->sanitizationErrors, $parser->getParseErrors() );
$newDeclarations = new DeclarationList();
foreach ( $oldDeclarations as $declaration ) {
if ( $this->valueMatcher->match( $declaration->getValue(), [ 'mark-significance' => true ] ) ) {
$newDeclarations->add( $declaration );
} else {
$this->sanitizationError( 'invalid-font-feature-value-declaration', $declaration,
[ $this->name ] );
}
}
$blockContents->clear();
$blockContents->add( $newDeclarations->toComponentValueArray() );
return $ret;
}
}

View File

@ -1,84 +0,0 @@
<?php
/**
* @file
* @license https://opensource.org/licenses/Apache-2.0 Apache-2.0
*/
namespace Wikimedia\CSS\Sanitizer;
use Wikimedia\CSS\Grammar\Alternative;
use Wikimedia\CSS\Grammar\Matcher;
use Wikimedia\CSS\Grammar\MatcherFactory;
use Wikimedia\CSS\Grammar\Quantifier;
use Wikimedia\CSS\Objects\AtRule;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Objects\Rule;
use Wikimedia\CSS\Util;
/**
* Sanitizes a CSS \@font-feature-values rule
* @see https://www.w3.org/TR/2013/CR-css-fonts-3-20131003/#at-font-feature-values-rule
*/
class FontFeatureValuesAtRuleSanitizer extends RuleSanitizer {
/** @var Matcher */
protected $fontListMatcher;
/** @var FontFeatureValueAtRuleSanitizer[] */
protected $ruleSanitizers;
/**
* @param MatcherFactory $matcherFactory
*/
public function __construct( MatcherFactory $matcherFactory ) {
$this->fontListMatcher = Quantifier::hash( new Alternative( [
$matcherFactory->string(),
Quantifier::plus( $matcherFactory->ident() ),
] ) );
$n = $matcherFactory->rawNumber();
$n2 = Quantifier::count( $n, 1, 2 );
$nPlus = Quantifier::plus( $n );
$this->ruleSanitizers = [
new FontFeatureValueAtRuleSanitizer( 'stylistic', $n ),
new FontFeatureValueAtRuleSanitizer( 'styleset', $nPlus ),
new FontFeatureValueAtRuleSanitizer( 'character-variant', $n2 ),
new FontFeatureValueAtRuleSanitizer( 'swash', $n ),
new FontFeatureValueAtRuleSanitizer( 'ornaments', $n ),
new FontFeatureValueAtRuleSanitizer( 'annotation', $n ),
];
}
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'font-feature-values' );
}
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-at-rule', $object, [ 'font-feature-values' ] );
return null;
}
if ( $object->getBlock() === null ) {
$this->sanitizationError( 'at-rule-block-required', $object, [ 'font-feature-values' ] );
return null;
}
// Test the page selector
if ( !$this->fontListMatcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
if ( $cv ) {
$this->sanitizationError( 'invalid-font-feature-values-font-list', $cv );
} else {
$this->sanitizationError( 'missing-font-feature-values-font-list', $object );
}
return null;
}
$ret = clone( $object );
$this->fixPreludeWhitespace( $ret, false );
$this->sanitizeRuleBlock( $ret->getBlock(), $this->ruleSanitizers );
return $ret;
}
}

View File

@ -7,6 +7,7 @@
namespace Wikimedia\CSS\Sanitizer;
use Wikimedia\CSS\Grammar\Alternative;
use Wikimedia\CSS\Grammar\FunctionMatcher;
use Wikimedia\CSS\Grammar\Juxtaposition;
use Wikimedia\CSS\Grammar\Matcher;
use Wikimedia\CSS\Grammar\MatcherFactory;
@ -18,7 +19,7 @@ use Wikimedia\CSS\Util;
/**
* Sanitizes a CSS \@import rule
* @see https://www.w3.org/TR/2016/CR-css-cascade-3-20160519/#at-import
* @see https://www.w3.org/TR/2018/CR-css-cascade-4-20180828/#at-import
*/
class ImportAtRuleSanitizer extends RuleSanitizer {
@ -27,27 +28,41 @@ class ImportAtRuleSanitizer extends RuleSanitizer {
/**
* @param MatcherFactory $matcherFactory
* @param array $options Additional options:
* - strict: (bool) Only accept defined syntax in supports(). Default true.
* - declarationSanitizer: (PropertySanitizer) Check supports() declarations against this
* Sanitizer.
*/
public function __construct( MatcherFactory $matcherFactory ) {
public function __construct( MatcherFactory $matcherFactory, array $options = [] ) {
$declarationSanitizer = $options['declarationSanitizer'] ?? null;
$strict = $options['strict'] ?? true;
$this->matcher = new Juxtaposition( [
new Alternative( [
$matcherFactory->url( 'css' ),
$matcherFactory->urlstring( 'css' ),
] ),
Quantifier::optional( new FunctionMatcher( 'supports', new Alternative( [
$matcherFactory->cssSupportsCondition( $declarationSanitizer, $strict ),
$matcherFactory->cssDeclaration( $declarationSanitizer ),
] ) ) ),
$matcherFactory->cssMediaQueryList(),
] );
}
/** @inheritDoc */
public function getIndex() {
return -1000;
}
/** @inheritDoc */
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'import' );
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-at-rule', $object, [ 'import' ] );
return null;
}
@ -56,7 +71,7 @@ class ImportAtRuleSanitizer extends RuleSanitizer {
$this->sanitizationError( 'at-rule-block-not-allowed', $object->getBlock(), [ 'import' ] );
return null;
}
if ( !$this->matcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
if ( !$this->matcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
if ( $cv ) {
$this->sanitizationError( 'invalid-import-value', $cv );
@ -65,8 +80,6 @@ class ImportAtRuleSanitizer extends RuleSanitizer {
}
return null;
}
$object = $this->fixPreludeWhitespace( $object, true );
return $object;
return $this->fixPreludeWhitespace( $object, true );
}
}

View File

@ -18,12 +18,12 @@ use Wikimedia\CSS\Util;
/**
* Sanitizes a CSS \@keyframes rule
* @see https://www.w3.org/TR/2013/WD-css3-animations-20130219/#keyframes
* @see https://www.w3.org/TR/2018/WD-css-animations-1-20181011/#keyframes
*/
class KeyframesAtRuleSanitizer extends RuleSanitizer {
/** @var Matcher */
protected $identMatcher;
protected $nameMatcher;
/** @var Sanitizer */
protected $ruleSanitizer;
@ -35,7 +35,10 @@ class KeyframesAtRuleSanitizer extends RuleSanitizer {
public function __construct(
MatcherFactory $matcherFactory, PropertySanitizer $propertySanitizer
) {
$this->identMatcher = $matcherFactory->ident();
$this->nameMatcher = new Alternative( [
$matcherFactory->customIdent( [ 'none' ] ),
$matcherFactory->string(),
] );
$this->ruleSanitizer = new StyleRuleSanitizer(
Quantifier::hash( new Alternative( [
new KeywordMatcher( [ 'from', 'to' ] ), $matcherFactory->rawPercentage()
@ -44,12 +47,14 @@ class KeyframesAtRuleSanitizer extends RuleSanitizer {
);
}
/** @inheritDoc */
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'keyframes' );
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-at-rule', $object, [ 'keyframes' ] );
return null;
}
@ -60,7 +65,7 @@ class KeyframesAtRuleSanitizer extends RuleSanitizer {
}
// Test the keyframe name
if ( !$this->identMatcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
if ( !$this->nameMatcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
if ( $cv ) {
$this->sanitizationError( 'invalid-keyframe-name', $cv );
@ -70,7 +75,7 @@ class KeyframesAtRuleSanitizer extends RuleSanitizer {
return null;
}
$ret = clone( $object );
$ret = clone $object;
$this->fixPreludeWhitespace( $ret, false );
$this->sanitizeRuleBlock( $ret->getBlock(), [ $this->ruleSanitizer ] );

View File

@ -13,7 +13,7 @@ use Wikimedia\CSS\Util;
/**
* Sanitizes the margin at-rules inside a CSS \@page rule
* @see https://www.w3.org/TR/2013/WD-css3-page-20130314/
* @see https://www.w3.org/TR/2018/WD-css-page-3-20181018/
*/
class MarginAtRuleSanitizer extends RuleSanitizer {
@ -34,13 +34,15 @@ class MarginAtRuleSanitizer extends RuleSanitizer {
$this->propertySanitizer = $propertySanitizer;
}
/** @inheritDoc */
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule &&
in_array( strtolower( $rule->getName() ), self::$marginRuleNames, true );
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-page-margin-at-rule', $object );
return null;
}
@ -56,7 +58,7 @@ class MarginAtRuleSanitizer extends RuleSanitizer {
return null;
}
$ret = clone( $object );
$ret = clone $object;
$this->fixPreludeWhitespace( $ret, false );
$this->sanitizeDeclarationBlock( $ret->getBlock(), $this->propertySanitizer );

View File

@ -7,7 +7,6 @@
namespace Wikimedia\CSS\Sanitizer;
use Wikimedia\CSS\Grammar\Matcher;
use Wikimedia\CSS\Grammar\MatcherFactory;
use Wikimedia\CSS\Objects\AtRule;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Objects\Rule;
@ -50,12 +49,14 @@ class MediaAtRuleSanitizer extends RuleSanitizer {
$this->ruleSanitizers = $ruleSanitizers;
}
/** @inheritDoc */
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'media' );
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-at-rule', $object, [ 'media' ] );
return null;
}
@ -66,7 +67,7 @@ class MediaAtRuleSanitizer extends RuleSanitizer {
}
// Test the media query
$match = $this->mediaQueryListMatcher->match(
$match = $this->mediaQueryListMatcher->matchAgainst(
$object->getPrelude(), [ 'mark-significance' => true ]
);
if ( !$match ) {
@ -75,7 +76,7 @@ class MediaAtRuleSanitizer extends RuleSanitizer {
return null;
}
$ret = clone( $object );
$ret = clone $object;
$this->fixPreludeWhitespace( $ret, false );
$this->sanitizeRuleBlock( $ret->getBlock(), $this->ruleSanitizers );

View File

@ -38,16 +38,19 @@ class NamespaceAtRuleSanitizer extends RuleSanitizer {
] );
}
/** @inheritDoc */
public function getIndex() {
return -900;
}
/** @inheritDoc */
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'namespace' );
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-at-rule', $object, [ 'namespace' ] );
return null;
}
@ -56,7 +59,7 @@ class NamespaceAtRuleSanitizer extends RuleSanitizer {
$this->sanitizationError( 'at-rule-block-not-allowed', $object->getBlock(), [ 'namespace' ] );
return null;
}
if ( !$this->matcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
if ( !$this->matcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
if ( $cv ) {
$this->sanitizationError( 'invalid-namespace-value', $cv );
@ -65,8 +68,6 @@ class NamespaceAtRuleSanitizer extends RuleSanitizer {
}
return null;
}
$object = $this->fixPreludeWhitespace( $object, true );
return $object;
return $this->fixPreludeWhitespace( $object, true );
}
}

View File

@ -25,7 +25,7 @@ use Wikimedia\CSS\Util;
/**
* Sanitizes a CSS \@page rule
* @see https://www.w3.org/TR/2013/WD-css3-page-20130314/
* @see https://www.w3.org/TR/2018/WD-css-page-3-20181018/
*/
class PageAtRuleSanitizer extends RuleSanitizer {
@ -63,26 +63,43 @@ class PageAtRuleSanitizer extends RuleSanitizer {
] );
$this->pageSelectorMatcher->setDefaultOptions( [ 'skip-whitespace' => false ] );
// Clone the $propertySanitizer and inject the special "size" property
$this->propertySanitizer = clone( $propertySanitizer );
$this->propertySanitizer->addKnownProperties( [ 'size' => new Alternative( [
Quantifier::count( $matcherFactory->length(), 1, 2 ),
new KeywordMatcher( 'auto' ),
UnorderedGroup::someOf( [
new KeywordMatcher( [ 'A5', 'A4', 'A3', 'B5', 'B4', 'letter', 'legal', 'ledger' ] ),
new KeywordMatcher( [ 'portrait', 'landscape' ] ),
// Clone the $propertySanitizer and inject the special properties
$this->propertySanitizer = clone $propertySanitizer;
$this->propertySanitizer->addKnownProperties( [
'size' => new Alternative( [
Quantifier::count( $matcherFactory->length(), 1, 2 ),
new KeywordMatcher( 'auto' ),
UnorderedGroup::someOf( [
new KeywordMatcher( [
'A5', 'A4', 'A3', 'B5', 'B4', 'JIS-B5', 'JIS-B4', 'letter', 'legal', 'ledger',
] ),
new KeywordMatcher( [ 'portrait', 'landscape' ] ),
] ),
] ),
] ) ] );
'marks' => new Alternative( [
new KeywordMatcher( 'none' ),
UnorderedGroup::someOf( [
new KeywordMatcher( 'crop' ),
new KeywordMatcher( 'cross' ),
] ),
] ),
'bleed' => new Alternative( [
new KeywordMatcher( 'auto' ),
$matcherFactory->length(),
] ),
] );
$this->ruleSanitizer = new MarginAtRuleSanitizer( $propertySanitizer );
}
/** @inheritDoc */
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'page' );
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-at-rule', $object, [ 'page' ] );
return null;
}
@ -93,7 +110,7 @@ class PageAtRuleSanitizer extends RuleSanitizer {
}
// Test the page selector
$match = $this->pageSelectorMatcher->match(
$match = $this->pageSelectorMatcher->matchAgainst(
$object->getPrelude(), [ 'mark-significance' => true ]
);
if ( !$match ) {
@ -102,7 +119,7 @@ class PageAtRuleSanitizer extends RuleSanitizer {
return null;
}
$ret = clone( $object );
$ret = clone $object;
$this->fixPreludeWhitespace( $ret, false );
// Parse the block's contents into a list of declarations and at-rules,

View File

@ -10,7 +10,6 @@ use InvalidArgumentException;
use Wikimedia\CSS\Grammar\Matcher;
use Wikimedia\CSS\Grammar\NothingMatcher;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\Declaration;
use Wikimedia\CSS\Util;
@ -28,7 +27,7 @@ class PropertySanitizer extends Sanitizer {
/**
* @param Matcher[] $properties Array mapping declaration names (lowercase)
* to Matchers for the values
* @param Matcher $cssWideKeywordsMatcher Matcher for keywords that should
* @param Matcher|null $cssWideKeywordsMatcher Matcher for keywords that should
* be recognized for all known properties.
*/
public function __construct( array $properties = [], Matcher $cssWideKeywordsMatcher = null ) {
@ -63,20 +62,20 @@ class PropertySanitizer extends Sanitizer {
/**
* Merge a list of matchers into the list of known properties
* @param Matcher[] $properties Array mapping declaration names (lowercase)
* @param Matcher[] $props Array mapping declaration names (lowercase)
* to Matchers for the values
* @throws InvalidArgumentException if some property is already defined
*/
public function addKnownProperties( $props ) {
$dups = [];
foreach ( $props as $k => $v ) {
if ( isset( $this->knownProperties[$k] ) && $props[$k] !== $this->knownProperties[$k] ) {
if ( isset( $this->knownProperties[$k] ) && $v !== $this->knownProperties[$k] ) {
$dups[] = $k;
}
}
if ( $dups ) {
throw new InvalidArgumentException(
'Duplicate definitions for properties: ' . join( ' ', $dups )
'Duplicate definitions for properties: ' . implode( ' ', $dups )
);
}
$this->setKnownProperties( $this->knownProperties + $props );
@ -98,6 +97,7 @@ class PropertySanitizer extends Sanitizer {
$this->cssWideKeywords = $matcher;
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Declaration ) {
$this->sanitizationError( 'expected-declaration', $object );
@ -112,8 +112,8 @@ class PropertySanitizer extends Sanitizer {
}
$list = $object->getValue();
if ( !$knownProperties[$name]->match( $list, [ 'mark-significance' => true ] ) &&
!$this->getCssWideKeywordsMatcher()->match( $list, [ 'mark-significance' => true ] )
if ( !$knownProperties[$name]->matchAgainst( $list, [ 'mark-significance' => true ] ) &&
!$this->getCssWideKeywordsMatcher()->matchAgainst( $list, [ 'mark-significance' => true ] )
) {
$cv = Util::findFirstNonWhitespace( $list );
if ( $cv ) {

View File

@ -6,9 +6,9 @@
namespace Wikimedia\CSS\Sanitizer;
use Wikimedia\CSS\Objects\Rule;
use Wikimedia\CSS\Objects\AtRule;
use Wikimedia\CSS\Objects\CSSFunction;
use Wikimedia\CSS\Objects\Rule;
use Wikimedia\CSS\Objects\SimpleBlock;
use Wikimedia\CSS\Objects\Token;
use Wikimedia\CSS\Parser\Parser;
@ -88,14 +88,17 @@ abstract class RuleSanitizer extends Sanitizer {
}
$significant = $cv instanceof CSSFunction ||
$cv instanceof Token &&
Token::separate( new Token( Token::T_AT_KEYWORD, $rule->getName() ), $cv );
( $cv instanceof Token &&
Token::separate( new Token( Token::T_AT_KEYWORD, $rule->getName() ), $cv )
);
// @phan-suppress-next-line PhanNonClassMethodCall False positive
if ( $prelude[0] instanceof Token && $prelude[0]->type() === Token::T_WHITESPACE ) {
// @phan-suppress-next-line PhanNonClassMethodCall False positive
$prelude[0] = $prelude[0]->copyWithSignificance( $significant );
} elseif ( $significant ) {
if ( $cloneIfNecessary ) {
$rule = clone( $rule );
$rule = clone $rule;
$prelude = $rule->getPrelude();
}
$prelude->add( new Token( Token::T_WHITESPACE ), 0 );

View File

@ -9,6 +9,7 @@ namespace Wikimedia\CSS\Sanitizer;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Objects\CSSObjectList;
use Wikimedia\CSS\Objects\RuleList;
use Wikimedia\ScopedCallback;
/**
* Base class for CSS sanitizers
@ -26,6 +27,22 @@ abstract class Sanitizer {
return $this->sanitizationErrors;
}
/**
* Temporarily clear sanitization errors
*
* Errors will be cleared, then restored when the returned ScopedCallback
* goes out of scope or is consumed.
*
* @return ScopedCallback
*/
public function stashSanitizationErrors() {
$reset = new ScopedCallback( function ( $e ) {
$this->sanitizationErrors = $e;
}, [ $this->sanitizationErrors ] );
$this->sanitizationErrors = [];
return $reset;
}
/**
* Clear sanitization errors
*/
@ -40,7 +57,7 @@ abstract class Sanitizer {
* @param array $data Extra data about the error.
*/
protected function sanitizationError( $tag, CSSObject $object, array $data = [] ) {
list( $line, $pos ) = $object->getPosition();
[ $line, $pos ] = $object->getPosition();
$this->sanitizationErrors[] = array_merge( [ $tag, $line, $pos ], $data );
}
@ -99,7 +116,7 @@ abstract class Sanitizer {
if ( $sanitizer->handlesRule( $rule ) ) {
$indexes = $sanitizer->getIndex();
if ( is_array( $indexes ) ) {
list( $testIndex, $setIndex ) = $indexes;
[ $testIndex, $setIndex ] = $indexes;
} else {
$testIndex = $setIndex = $indexes;
}

View File

@ -6,9 +6,9 @@
namespace Wikimedia\CSS\Sanitizer;
use Wikimedia\CSS\Grammar\MatcherFactory;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Objects\DeclarationList;
use Wikimedia\CSS\Grammar\MatcherFactory;
use Wikimedia\CSS\Parser\Parser;
/**
@ -43,11 +43,10 @@ class StyleAttributeSanitizer extends Sanitizer {
$propertySanitizer = new StylePropertySanitizer( $matcherFactory );
// StyleAttributeSanitizer brings it all together
$sanitizer = new StyleAttributeSanitizer( $propertySanitizer );
return $sanitizer;
return new StyleAttributeSanitizer( $propertySanitizer );
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof DeclarationList ) {
$this->sanitizationError( 'expected-declaration-list', $object );
@ -65,6 +64,7 @@ class StyleAttributeSanitizer extends Sanitizer {
$parser = Parser::newFromString( $string );
$declarations = $parser->parseDeclarationList();
$this->sanitizationErrors = array_merge( $this->sanitizationErrors, $parser->getParseErrors() );
// @phan-suppress-next-line PhanTypeMismatchReturnSuperType
return $this->sanitizeList( $this->propertySanitizer, $declarations );
}
}

View File

@ -6,11 +6,14 @@
namespace Wikimedia\CSS\Sanitizer;
use InvalidArgumentException;
use Wikimedia\CSS\Grammar\Juxtaposition;
use Wikimedia\CSS\Grammar\Matcher;
use Wikimedia\CSS\Grammar\MatcherFactory;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Grammar\Quantifier;
use Wikimedia\CSS\Objects\ComponentValue;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Objects\QualifiedRule;
use Wikimedia\CSS\Objects\Rule;
use Wikimedia\CSS\Objects\Token;
@ -18,7 +21,7 @@ use Wikimedia\CSS\Util;
/**
* Sanitizes a CSS style rule
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#style-rules
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#style-rules
*/
class StyleRuleSanitizer extends RuleSanitizer {
@ -28,6 +31,9 @@ class StyleRuleSanitizer extends RuleSanitizer {
/** @var ComponentValue[] */
protected $prependSelectors;
/** @var Matcher|null */
protected $hoistableMatcher;
/** @var PropertySanitizer */
protected $propertySanitizer;
@ -37,21 +43,43 @@ class StyleRuleSanitizer extends RuleSanitizer {
* @param PropertySanitizer $propertySanitizer Sanitizer to test property declarations.
* Probably an instance of StylePropertySanitizer.
* @param array $options Additional options
* - prependSelectors: (ComponentValue[]) Prepend this to all selectors.
* Include trailing whitespace if necessary. Note $selectorMatcher must
* capture each selector with the name 'selector'.
* - prependSelectors: (ComponentValue[]) Prepend this (and a whitespace) to all selectors.
* Note: $selectorMatcher must capture each selector with the name 'selector'.
* - hoistableComponentMatcher: (Matcher) Component groups (simple selector sequences,
* in CSS3 Selectors terminology) matched by this will be hoisted before the prepended
* selector sequence. (To be more precise: the hoisted part is the longest prefix of
* the selector that only contains matching simple selector sequences and descendant
* combinators, and is not followed by a non-descendant combinator.)
* This can be used to allow filtering by top-level conditional classes/IDs emitted by
* some framework (e.g. html.no-js) while still jailing selectors into some subsection
* of the content. For example, if prependSelectors is equivalent to '#content' and
* hoistableComponentMatcher to [html|body]<simple selector>* will turn
* 'html.no-js body.ltr div.list' into 'html.no-js body.ltr #content div.list'.
* Note: $selectorMatcher must capture each simple selector group with the name 'simple'
* and the combinators with 'combinator'.
*/
public function __construct(
Matcher $selectorMatcher, PropertySanitizer $propertySanitizer, array $options = []
) {
$options += [
'prependSelectors' => [],
'hoistableComponentMatcher' => null,
];
Util::assertAllInstanceOf(
$options['prependSelectors'], ComponentValue::class, 'prependSelectors'
);
if ( $options['hoistableComponentMatcher'] !== null &&
!$options['hoistableComponentMatcher'] instanceof Matcher
) {
throw new InvalidArgumentException( 'hoistableComponentMatcher must be a Matcher' );
}
$matcherFactory = MatcherFactory::singleton();
// Add optional whitespace around the selector-matcher, because
// selector-matchers don't usually have it.
if ( !$selectorMatcher->getDefaultOptions()['skip-whitespace'] ) {
$ows = MatcherFactory::singleton()->optionalWhitespace();
$ows = $matcherFactory->optionalWhitespace();
$this->selectorMatcher = new Juxtaposition( [
$ows,
$selectorMatcher,
@ -64,12 +92,29 @@ class StyleRuleSanitizer extends RuleSanitizer {
$this->propertySanitizer = $propertySanitizer;
$this->prependSelectors = $options['prependSelectors'];
if ( $options['hoistableComponentMatcher'] ) {
$hoistablePrefixMatcher = new Juxtaposition( [
$options['hoistableComponentMatcher'],
Quantifier::star( new Juxtaposition( [
$matcherFactory->significantWhitespace(),
$options['hoistableComponentMatcher'],
] ) )
] );
$this->hoistableMatcher = new Juxtaposition( [
$hoistablePrefixMatcher->capture( 'prefix' ),
$matcherFactory->significantWhitespace()->capture( 'ws' ),
$matcherFactory->cssSelector()->capture( 'postfix' ),
] );
$this->hoistableMatcher->setDefaultOptions( [ 'skip-whitespace' => false ] );
}
}
/** @inheritDoc */
public function handlesRule( Rule $rule ) {
return $rule instanceof QualifiedRule;
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof QualifiedRule ) {
$this->sanitizationError( 'expected-qualified-rule', $object );
@ -77,7 +122,7 @@ class StyleRuleSanitizer extends RuleSanitizer {
}
// Test that the prelude is a valid selector list
$match = $this->selectorMatcher->match( $object->getPrelude(), [ 'mark-significance' => true ] );
$match = $this->selectorMatcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] );
if ( !$match ) {
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
if ( $cv ) {
@ -88,26 +133,40 @@ class StyleRuleSanitizer extends RuleSanitizer {
return null;
}
$ret = clone( $object );
$ret = clone $object;
// If necessary, munge the selector list
if ( $this->prependSelectors ) {
$prelude = $ret->getPrelude();
$comma = [
new Token( Token::T_COMMA ),
new Token( Token::T_WHITESPACE, [ 'significant' => false ] )
];
$oldPrelude = $object->getPrelude();
$space = [
new Token( Token::T_WHITESPACE, [ 'significant' => true ] )
];
$prelude->clear();
foreach ( $match->getCapturedMatches() as $m ) {
if ( $m->getName() === 'selector' ) {
foreach ( $match->getCapturedMatches() as $selectorOrWs ) {
if ( $selectorOrWs->getName() === 'selector' ) {
if ( $prelude->count() ) {
$prelude->add( $comma );
}
$prelude->add( $this->prependSelectors );
$prelude->add( $m->getValues() );
} elseif ( $m->getName() === 'trailingWS' && $m->getLength() > 0 ) {
$prelude->add( $m->getValues() );
$valueList = new ComponentValueList( $selectorOrWs->getValues() );
$hoistMatch = $this->hoistableMatcher ? $this->hoistableMatcher->matchAgainst( $valueList ) : null;
if ( $hoistMatch ) {
[ $prefix, , $postfix ] = $hoistMatch->getCapturedMatches();
$prelude->add( $prefix->getValues() );
$prelude->add( $space );
$prelude->add( $this->prependSelectors );
$prelude->add( $space );
$prelude->add( $postfix->getValues() );
} else {
$prelude->add( $this->prependSelectors );
$prelude->add( $space );
$prelude->add( $valueList );
}
} elseif ( $selectorOrWs->getName() === 'trailingWS' && $selectorOrWs->getLength() > 0 ) {
$prelude->add( $selectorOrWs->getValues() );
}
}
}
@ -116,4 +175,5 @@ class StyleRuleSanitizer extends RuleSanitizer {
return $ret;
}
}

View File

@ -14,7 +14,7 @@ use Wikimedia\CSS\Util;
/**
* Sanitizes a CSS stylesheet or rule list
* @see https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#css-stylesheets
* @see https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#css-stylesheets
*/
class StylesheetSanitizer extends Sanitizer {
@ -52,7 +52,6 @@ class StylesheetSanitizer extends Sanitizer {
$ruleSanitizers = [
'style' => new StyleRuleSanitizer( $matcherFactory->cssSelectorList(), $propertySanitizer ),
'@font-face' => new FontFaceAtRuleSanitizer( $matcherFactory ),
'@font-feature-values' => new FontFeatureValuesAtRuleSanitizer( $matcherFactory ),
'@keyframes' => new KeyframesAtRuleSanitizer( $matcherFactory, $propertySanitizer ),
'@page' => new PageAtRuleSanitizer( $matcherFactory, $propertySanitizer ),
'@media' => new MediaAtRuleSanitizer( $matcherFactory->cssMediaQueryList() ),
@ -66,15 +65,15 @@ class StylesheetSanitizer extends Sanitizer {
$ruleSanitizers['@supports']->setRuleSanitizers( $ruleSanitizers );
// Now we can put together the StylesheetSanitizer
$sanitizer = new StylesheetSanitizer( $ruleSanitizers + [
return new StylesheetSanitizer( $ruleSanitizers + [
// Note there's intentionally no "@charset" sanitizer, as that at-rule
// was removed in the Editor's Draft in favor of special handling
// in the parser.
'@import' => new ImportAtRuleSanitizer( $matcherFactory ),
'@import' => new ImportAtRuleSanitizer( $matcherFactory, [
'declarationSanitizer' => $propertySanitizer,
] ),
'@namespace' => new NamespaceAtRuleSanitizer( $matcherFactory ),
] );
return $sanitizer;
}
/**
@ -94,9 +93,11 @@ class StylesheetSanitizer extends Sanitizer {
$this->ruleSanitizers = $ruleSanitizers;
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
$isSheet = $object instanceof Stylesheet;
if ( $isSheet ) {
'@phan-var Stylesheet $object';
$object = $object->getRuleList();
}
if ( !$object instanceof RuleList ) {

View File

@ -6,24 +6,11 @@
namespace Wikimedia\CSS\Sanitizer;
use Wikimedia\CSS\Grammar\Alternative;
use Wikimedia\CSS\Grammar\AnythingMatcher;
use Wikimedia\CSS\Grammar\BlockMatcher;
use Wikimedia\CSS\Grammar\CheckedMatcher;
use Wikimedia\CSS\Grammar\FunctionMatcher;
use Wikimedia\CSS\Grammar\Juxtaposition;
use Wikimedia\CSS\Grammar\KeywordMatcher;
use Wikimedia\CSS\Grammar\Match;
use Wikimedia\CSS\Grammar\Matcher;
use Wikimedia\CSS\Grammar\MatcherFactory;
use Wikimedia\CSS\Grammar\NothingMatcher;
use Wikimedia\CSS\Grammar\Quantifier;
use Wikimedia\CSS\Objects\AtRule;
use Wikimedia\CSS\Objects\CSSObject;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\Rule;
use Wikimedia\CSS\Objects\Token;
use Wikimedia\CSS\Parser\Parser;
use Wikimedia\CSS\Util;
/**
@ -41,76 +28,14 @@ class SupportsAtRuleSanitizer extends RuleSanitizer {
/**
* @param MatcherFactory $matcherFactory
* @param array $options Additional options:
* strict: (bool) Only accept defined syntax. Default true.
* declarationSanitizer: (PropertySanitizer) Check declarations against this Sanitizer.
* - strict: (bool) Only accept defined syntax. Default true.
* - declarationSanitizer: (PropertySanitizer) Check declarations against this Sanitizer.
*/
public function __construct( MatcherFactory $matcherFactory, array $options = [] ) {
$options += [
'strict' => true,
];
$declarationSanitizer = null;
if ( isset( $options['declarationSanitizer'] ) ) {
$declarationSanitizer = $options['declarationSanitizer'];
if ( !$declarationSanitizer instanceof PropertySanitizer ) {
throw new \InvalidArgumentException(
'declarationSanitizer must be an instance of ' . PropertySanitizer::class
);
}
}
$ws = $matcherFactory->significantWhitespace();
$anythingPlus = new AnythingMatcher( [ 'quantifier' => '+' ] );
if ( $options['strict'] ) {
$generalEnclosed = new NothingMatcher();
} else {
$generalEnclosed = new Alternative( [
new FunctionMatcher( null, $anythingPlus ),
new BlockMatcher( Token::T_LEFT_PAREN, new Juxtaposition( [
$matcherFactory->ident(), $anythingPlus
] ) ),
] );
}
$supportsConditionBlock = new NothingMatcher(); // temp
$supportsConditionInParens = new Alternative( [
&$supportsConditionBlock,
new BlockMatcher( Token::T_LEFT_PAREN, new CheckedMatcher(
$anythingPlus,
function ( ComponentValueList $list, Match $match, array $options )
use ( $declarationSanitizer )
{
$cvlist = new ComponentValueList( $match->getValues() );
$parser = Parser::newFromTokens( $cvlist->toTokenArray() );
$declaration = $parser->parseDeclaration();
if ( $parser->getParseErrors() || !$declaration ) {
return false;
}
if ( !$declarationSanitizer ) {
return true;
}
$oldErrors = $declarationSanitizer->sanitizationErrors;
$ret = $declarationSanitizer->doSanitize( $declaration );
$errors = $declarationSanitizer->getSanitizationErrors();
$declarationSanitizer->sanitizationErrors = $oldErrors;
return $ret === $declaration && !$errors;
}
) ),
$generalEnclosed,
] );
$supportsCondition = new Alternative( [
new Juxtaposition( [ new KeywordMatcher( 'not' ), $ws, $supportsConditionInParens ] ),
new Juxtaposition( [ $supportsConditionInParens, Quantifier::plus( new Juxtaposition( [
$ws, new KeywordMatcher( 'and' ), $ws, $supportsConditionInParens
] ) ) ] ),
new Juxtaposition( [ $supportsConditionInParens, Quantifier::plus( new Juxtaposition( [
$ws, new KeywordMatcher( 'or' ), $ws, $supportsConditionInParens
] ) ) ] ),
$supportsConditionInParens,
] );
$supportsConditionBlock = new BlockMatcher( Token::T_LEFT_PAREN, $supportsCondition );
$this->conditionMatcher = $supportsCondition;
$this->conditionMatcher = $matcherFactory->cssSupportsCondition(
$options['declarationSanitizer'] ?? null,
$options['strict'] ?? true
);
}
/**
@ -130,12 +55,14 @@ class SupportsAtRuleSanitizer extends RuleSanitizer {
$this->ruleSanitizers = $ruleSanitizers;
}
/** @inheritDoc */
public function handlesRule( Rule $rule ) {
return $rule instanceof AtRule && !strcasecmp( $rule->getName(), 'supports' );
}
/** @inheritDoc */
protected function doSanitize( CSSObject $object ) {
if ( !$object instanceof Rule || !$this->handlesRule( $object ) ) {
if ( !$object instanceof AtRule || !$this->handlesRule( $object ) ) {
$this->sanitizationError( 'expected-at-rule', $object, [ 'supports' ] );
return null;
}
@ -146,7 +73,7 @@ class SupportsAtRuleSanitizer extends RuleSanitizer {
}
// Test the media query
if ( !$this->conditionMatcher->match( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
if ( !$this->conditionMatcher->matchAgainst( $object->getPrelude(), [ 'mark-significance' => true ] ) ) {
$cv = Util::findFirstNonWhitespace( $object->getPrelude() );
if ( $cv ) {
$this->sanitizationError( 'invalid-supports-condition', $cv );
@ -156,7 +83,7 @@ class SupportsAtRuleSanitizer extends RuleSanitizer {
return null;
}
$ret = clone( $object );
$ret = clone $object;
$this->fixPreludeWhitespace( $ret, false );
$this->sanitizeRuleBlock( $ret->getBlock(), $this->ruleSanitizers );

View File

@ -6,6 +6,7 @@
namespace Wikimedia\CSS;
use InvalidArgumentException;
use Wikimedia\CSS\Objects\ComponentValue;
use Wikimedia\CSS\Objects\ComponentValueList;
use Wikimedia\CSS\Objects\CSSObject;
@ -22,13 +23,13 @@ class Util {
* @param array $array
* @param string $class
* @param string $what Describe the array being checked
* @throws \InvalidArgumentException
* @throws InvalidArgumentException
*/
public static function assertAllInstanceOf( array $array, $class, $what ) {
foreach ( $array as $k => $v ) {
if ( !$v instanceof $class ) {
$vtype = is_object( $v ) ? get_class( $v ) : gettype( $v );
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
"$what may only contain instances of $class" .
" (found $vtype at index $k)"
);
@ -37,23 +38,23 @@ class Util {
}
/**
* Check that a set of tokens are all of the same type
* Check that a set of tokens are all the same type
* @param Token[] $array
* @param string $type
* @param string $what Describe the array being checked
* @throws \InvalidArgumentException
* @throws InvalidArgumentException
*/
public static function assertAllTokensOfType( array $array, $type, $what ) {
foreach ( $array as $k => $v ) {
if ( !$v instanceof Token ) {
$vtype = is_object( $v ) ? get_class( $v ) : gettype( $v );
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
"$what may only contain instances of " . Token::class .
" (found $vtype at index $k)"
);
}
if ( $v->type() !== $type ) {
throw new \InvalidArgumentException(
throw new InvalidArgumentException(
"$what may only contain \"$type\" tokens" .
" (found \"{$v->type()}\" at index $k)"
);
@ -68,7 +69,7 @@ class Util {
*/
public static function findFirstNonWhitespace( $list ) {
if ( !$list instanceof TokenList && !$list instanceof ComponentValueList ) {
throw new \InvalidArgumentException( 'List must be TokenList or ComponentValueList' );
throw new InvalidArgumentException( 'List must be TokenList or ComponentValueList' );
}
foreach ( $list as $v ) {
if ( !$v instanceof Token || $v->type() !== Token::T_WHITESPACE ) {
@ -80,13 +81,19 @@ class Util {
/**
* Turn a CSSObject into a string
* @param CSSObject $object
* @param array $options Serialziation options:
* @param CSSObject|CSSObject[] $object
* @param array $options Serialization options:
* - minify: (bool) Skip comments and insignificant tokens
* @return string
*/
public static function stringify( CSSObject $object, $options = [] ) {
$tokens = $object->toTokenArray();
public static function stringify( $object, $options = [] ) {
if ( is_array( $object ) ) {
$tokens = array_reduce( $object, static function ( array $carry, CSSObject $item ) {
return array_merge( $carry, $item->toTokenArray() );
}, [] );
} else {
$tokens = $object->toTokenArray();
}
if ( !$tokens ) {
return '';
}
@ -97,23 +104,30 @@ class Util {
for ( $i = 1; $i < $e; $i++ ) {
$t = $tokens[$i];
if ( $t->type() === Token::T_WHITESPACE && !$t->significant() &&
Token::separate( $tokens[$i-1], $tokens[$i+1] )
Token::separate( $tokens[$i - 1], $tokens[$i + 1] )
) {
$tokens[$i] = $t->copyWithSignificance( true );
}
}
// Filter!
$tokens = array_filter( $tokens, function ( $t ) {
$tokens = array_filter( $tokens, static function ( $t ) {
return $t->significant();
} );
}
$prev = reset( $tokens );
$ret = (string)$prev;
$urangeHack = 0;
while ( ( $token = next( $tokens ) ) !== false ) {
if ( Token::separate( $prev, $token ) ) {
// Per https://www.w3.org/TR/2014/CR-css-syntax-3-20140220/#serialization
// Avoid serializing tokens that are part of a <urange> with extraneous comments
// by checking for a hack-flag in the type.
// @see Wikimedia\CSS\Matcher\UrangeMatcher
// @phan-suppress-next-line PhanAccessMethodInternal
$urangeHack = max( $urangeHack, $prev->urangeHack() );
if ( --$urangeHack <= 0 && Token::separate( $prev, $token ) ) {
// Per https://www.w3.org/TR/2019/CR-css-syntax-3-20190716/#serialization
$ret .= '/**/';
}
$ret .= (string)$token;