Skip to content

Commit

Permalink
Use RemexHtml to properly remove thumbnails
Browse files Browse the repository at this point in the history
  • Loading branch information
edwardspec committed Dec 30, 2024
1 parent 8247ebe commit 1828250
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 11 deletions.
32 changes: 24 additions & 8 deletions includes/HtmlSanitizer.php
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,9 @@ public function startDocument( $fragmentNamespace, $fragmentName ) {

/** @inheritDoc */
public function element( SerializerNode $parent, SerializerNode $node, $contents ) {
$typeof = $node->attrs['typeof'] ?? null;
$classes = explode( ' ', $node->attrs['class'] ?? '' );

switch ( $node->name ) {
// Remove everything outside the <body> tag.
case 'head':
Expand All @@ -55,20 +58,33 @@ public function element( SerializerNode $parent, SerializerNode $node, $contents
case 'body':
return $contents;

case 'img':
// Remove the image tags: in 99,9% of cases they are too wide
// to be included into the calendar.
// Not needed in MediaWiki 1.40+ (already removed with the <span> below).
return '';
case 'a':
// MediaWiki 1.39 only: remove the links around non-thumbnail images.
if ( in_array( 'image', $classes ) ) {
return '';
}
break;

case 'div':
// MediaWiki 1.39 only: remove wrapper around the thumbnail.
if ( in_array( 'thumb', $classes ) ) {
return '';
}
break;

case 'span':
if ( ( $node->attrs['typeof'] ?? '' ) === 'mw:File' ) {
// Wrapper around the image.
// MediaWiki 1.40+: remove the wrappers around non-thumbnail images.
if ( $typeof === 'mw:File' ) {
return '';
}
break;

// TODO: properly remove <div class="thumb"> with all contents (currently hidden by CSS).
case 'figure':
// MediaWiki 1.40+: remove wrapper around the thumbnail.
if ( $typeof === 'mw:File/Thumb' ) {
return '';
}
break;

case 'p':
// Remove trailing newline inside <p> tags.
Expand Down
19 changes: 16 additions & 3 deletions tests/phpunit/EventCalendarTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -774,10 +774,12 @@ public function testSnippetForCompressedRevision() {

/**
* Verify that unwanted parts of HTML (such as images) are removed from the snippet.
* @dataProvider dataProviderSnippetSanitizer
* @param bool $isThumb False to add an image to the page, true to add a thumbnail.
*/
public function testSnippetSanitizer() {
$filename = 'Testimage.png';
$pageText = "Expected snippet [[File:$filename]]";
public function testSnippetSanitizer( $isThumb ) {
$filename = 'Testimage' . ( $isThumb ? '1' : '2' ) . '.png';
$pageText = 'Expected snippet [[File:' . $filename . ( $isThumb ? '|thumb' : '' ) . ']]';
$expectedSnippet = '<p>Expected snippet</p>';

// Upload a test file, so that [[File:]] syntax would create an actual thumbnail, not a redlink.
Expand All @@ -800,6 +802,17 @@ public function testSnippetSanitizer() {
$this->assertSame( $expectedSnippet, $actualData[0]['title'] );
}

/**
* Provides datasets for testSnippetSanitizer().
* @return array
*/
public function dataProviderSnippetSanitizer() {
return [
'image (not a thumbnail)' => [ false ],
'thumbnail' => [ true ]
];
}

/**
* Verify that parameters like height=300 and aspectratio=1.5 are provided to JavaScript library.
* @dataProvider dataProviderOptionalAttributes
Expand Down

0 comments on commit 1828250

Please sign in to comment.