recentchanges: Back out QueryRateEstimator
Per my production testing, it doesn't work well enough to be usable
right now. It adds overhead which isn't justified.
Bug: T403798
Change-Id: I4e2fff3bffddb400b7f90eeaaa6bf536421d4571
diff --git a/autoload.php b/autoload.php
index 9d17de5..291b05a 100644
--- a/autoload.php
+++ b/autoload.php
@@ -2427,7 +2427,6 @@
'MediaWiki\\RecentChanges\\ChangesListQuery\\NamedCondition' => __DIR__ . '/includes/recentchanges/ChangesListQuery/NamedCondition.php',
'MediaWiki\\RecentChanges\\ChangesListQuery\\NamedConditionHelper' => __DIR__ . '/includes/recentchanges/ChangesListQuery/NamedConditionHelper.php',
'MediaWiki\\RecentChanges\\ChangesListQuery\\QueryBackend' => __DIR__ . '/includes/recentchanges/ChangesListQuery/QueryBackend.php',
- 'MediaWiki\\RecentChanges\\ChangesListQuery\\QueryRateEstimator' => __DIR__ . '/includes/recentchanges/ChangesListQuery/QueryRateEstimator.php',
'MediaWiki\\RecentChanges\\ChangesListQuery\\RevisionTypeCondition' => __DIR__ . '/includes/recentchanges/ChangesListQuery/RevisionTypeCondition.php',
'MediaWiki\\RecentChanges\\ChangesListQuery\\SeenCondition' => __DIR__ . '/includes/recentchanges/ChangesListQuery/SeenCondition.php',
'MediaWiki\\RecentChanges\\ChangesListQuery\\SubpageOfCondition' => __DIR__ . '/includes/recentchanges/ChangesListQuery/SubpageOfCondition.php',
diff --git a/includes/ServiceWiring.php b/includes/ServiceWiring.php
index 6fb3e1f..b42957b 100644
--- a/includes/ServiceWiring.php
+++ b/includes/ServiceWiring.php
@@ -544,7 +544,6 @@
$services->getUserFactory(),
$services->getLinkTargetLookup(),
$services->getChangeTagsStore(),
- $services->getObjectCacheFactory(),
$services->getStatsFactory(),
LoggerFactory::getInstance( 'ChangesListQuery' ),
$services->getConnectionProvider(),
diff --git a/includes/recentchanges/ChangesListQuery/ChangesListQuery.php b/includes/recentchanges/ChangesListQuery/ChangesListQuery.php
index c7d741a7..5015fcb 100644
--- a/includes/recentchanges/ChangesListQuery/ChangesListQuery.php
+++ b/includes/recentchanges/ChangesListQuery/ChangesListQuery.php
@@ -20,13 +20,11 @@
use MediaWiki\User\UserFactory;
use MediaWiki\User\UserIdentity;
use MediaWiki\Watchlist\WatchedItemStoreInterface;
-use ObjectCacheFactory;
use Psr\Log\LoggerInterface;
use stdClass;
use Wikimedia\Rdbms\IExpression;
use Wikimedia\Rdbms\IReadableDatabase;
use Wikimedia\Rdbms\IResultWrapper;
-use Wikimedia\Rdbms\Platform\ISQLPlatform;
use Wikimedia\Rdbms\RawSQLExpression;
use Wikimedia\Rdbms\SelectQueryBuilder;
use Wikimedia\Stats\StatsFactory;
@@ -139,7 +137,6 @@
private UserFactory $userFactory,
private LinkTargetLookup $linkTargetLookup,
private ChangeTagsStore $changeTagsStore,
- private ObjectCacheFactory $objectCacheFactory,
private StatsFactory $statsFactory,
private LoggerInterface $logger,
private IReadableDatabase $db,
@@ -1051,30 +1048,23 @@
* @param stdClass[] &$rows
*/
private function doPartitionQuery( SelectQueryBuilder $sqb, &$rows ) {
- $queryHash = $this->getCondsHash( $sqb );
$now = ConvertibleTimestamp::time();
$minTime = (int)ConvertibleTimestamp::convert( TS_UNIX,
$this->minTimestamp ?? $now - $this->rcMaxAge );
$limit = $this->limit ?? 10_000;
- $rateStore = $this->newRateEstimator( $queryHash );
- $countsByBucket = [];
- $bucketPeriod = $rateStore->getBucketPeriod();
- $rate = $rateStore->fetchRate( $minTime, $now );
$rcSize = $this->rcStats->getIdDelta();
- $this->logger->debug( 'Beginning partition request with ' .
- 'rate={rate}, density={density}, period={period}',
+ $this->logger->debug( 'Beginning partition request with density={density}, period={period}',
[
'period' => $now - $minTime,
'limit' => $limit,
- 'rate' => $rate,
'density' => $this->density,
'rcSize' => $rcSize,
]
);
$partitioner = new TimestampRangePartitioner( $minTime, $now, $limit,
- $rate, $this->density, $rcSize, $this->rcMaxAge );
+ null, $this->density, $rcSize, $this->rcMaxAge );
do {
[ $min, $max, $limit ] = $partitioner->getNextPartition();
@@ -1089,23 +1079,17 @@
}
$partitionQuery->limit( $limit );
- $time = null;
+ $row = null;
$res = $partitionQuery->fetchResultSet();
foreach ( $res as $row ) {
$rows[] = $row;
- $time = (int)ConvertibleTimestamp::convert( TS_UNIX, $row->rc_timestamp );
- $bucket = (int)( $time / $bucketPeriod );
- $countsByBucket[$bucket] = ( $countsByBucket[$bucket] ?? 0 ) + 1;
}
- $partitioner->notifyResult( $time, $res->numRows() );
+ $partitioner->notifyResult(
+ $row ? (int)ConvertibleTimestamp::convert( TS_UNIX, $row->rc_timestamp ) : null,
+ $res->numRows()
+ );
} while ( !$partitioner->isDone() );
- $rateStore->storeCounts(
- $countsByBucket,
- $partitioner->getMinFoundTime() ?? $minTime,
- $now
- );
-
$m = $partitioner->getMetrics();
$this->logger->debug( 'Finished partition request: ' .
'got {actualRows} rows in {queryCount} queries, period={actualPeriod}',
@@ -1123,34 +1107,6 @@
}
/**
- * Get a string hash describing the query conditions
- *
- * @param SelectQueryBuilder $sqb
- * @return string
- */
- private function getCondsHash( SelectQueryBuilder $sqb ): string {
- $info = $sqb->getQueryInfo();
- $blob = $this->db->makeList( $info['conds'], ISQLPlatform::LIST_AND );
- foreach ( $info['join_conds'] as $join ) {
- if ( is_string( $join[1] ) ) {
- $joinCond = $join[1];
- } else {
- $joinCond = $this->db->makeList( $join[1], ISQLPlatform::LIST_AND );
- }
- $blob .= ' AND ' . $joinCond;
- }
- return md5( $blob );
- }
-
- private function newRateEstimator( string $key ): QueryRateEstimator {
- return new QueryRateEstimator(
- $this->objectCacheFactory->getLocalClusterInstance(),
- $this->rcMaxAge,
- $key
- );
- }
-
- /**
* This is called from ChangesListResult (via a closure) to evaluate the
* highlights.
*
diff --git a/includes/recentchanges/ChangesListQuery/ChangesListQueryFactory.php b/includes/recentchanges/ChangesListQuery/ChangesListQueryFactory.php
index e81c6fa..d973bb0 100644
--- a/includes/recentchanges/ChangesListQuery/ChangesListQueryFactory.php
+++ b/includes/recentchanges/ChangesListQuery/ChangesListQueryFactory.php
@@ -9,7 +9,6 @@
use MediaWiki\User\TempUser\TempUserConfig;
use MediaWiki\User\UserFactory;
use MediaWiki\Watchlist\WatchedItemStoreInterface;
-use ObjectCacheFactory;
use Psr\Log\LoggerInterface;
use Wikimedia\Rdbms\IConnectionProvider;
use Wikimedia\Stats\StatsFactory;
@@ -28,7 +27,6 @@
private UserFactory $userFactory,
private LinkTargetLookup $linkTargetLookup,
private ChangeTagsStore $changeTagsStore,
- private ObjectCacheFactory $objectCacheFactory,
private StatsFactory $statsFactory,
private LoggerInterface $logger,
private IConnectionProvider $connectionProvider,
@@ -49,7 +47,6 @@
$this->userFactory,
$this->linkTargetLookup,
$this->changeTagsStore,
- $this->objectCacheFactory,
$this->statsFactory,
$this->logger,
$this->connectionProvider->getReplicaDatabase(),
diff --git a/includes/recentchanges/ChangesListQuery/QueryRateEstimator.php b/includes/recentchanges/ChangesListQuery/QueryRateEstimator.php
deleted file mode 100644
index 02ad6fe..0000000
--- a/includes/recentchanges/ChangesListQuery/QueryRateEstimator.php
+++ /dev/null
@@ -1,117 +0,0 @@
-<?php
-
-namespace MediaWiki\RecentChanges\ChangesListQuery;
-
-use Wikimedia\ObjectCache\BagOStuff;
-
-/**
- * Estimate the query rate, that is, the number of rows per second of timestamp
- * interval likely to be returned by a query, by storing the number of results
- * returned by previous queries with identical conditions.
- *
- * Counts are stored by timestamp bucket, and it's assumed that rows will be
- * added to a bucket but not deleted, so we update by taking the maximum of
- * the existing and new value.
- *
- * An object instance relates to a single fetch and update for a single query.
- *
- * All timestamps are UNIX time.
- *
- * @since 1.45
- */
-class QueryRateEstimator {
- /** @var array The previously fetched results indexed by cache key */
- private $results = [];
-
- /**
- * @param BagOStuff $cache
- * @param int|float $maxAge The maximum age of rows in the table, in seconds
- * @param string $key The query identifier
- */
- public function __construct(
- private BagOStuff $cache,
- private int|float $maxAge,
- private string $key,
- ) {
- }
-
- /**
- * Get the period covered by a bucket in seconds.
- *
- * @return int
- */
- public function getBucketPeriod(): int {
- // Use coarse buckets, otherwise the cure may be worse than the disease.
- // We don't want it to take longer to fetch the metrics than to run the
- // query.
- return (int)( $this->maxAge / 30 );
- }
-
- /**
- * Get the best estimate of the query rate, or null if it is unknown.
- *
- * @param int $startTime The start of the planned timestamp range
- * @param int $endTime The end of the planned timestamp range
- * @return float|int|null
- */
- public function fetchRate( int $startTime, int $endTime ) {
- $cache = $this->cache;
- $period = $this->getBucketPeriod();
- $startBucket = (int)( $startTime / $period );
- $endBucket = (int)( $endTime / $period );
- $keys = [];
- for ( $b = $startBucket; $b <= $endBucket; $b++ ) {
- $keys[$b] = $cache->makeKey( 'ChangesListQueryRate', $this->key, $b );
- }
- $this->results = $cache->getMulti( $keys );
- if ( !$this->results ) {
- return null;
- }
-
- $totalPeriod = 0;
- $totalCount = 0;
- foreach ( $keys as $b => $key ) {
- if ( !isset( $this->results[$key] ) ) {
- // No information about this bucket, don't include in rate
- continue;
- }
- if ( $b === $endBucket ) {
- // Don't round up the end time, since it's presumed to be now,
- // so the rate is zero after it
- $totalPeriod += $endTime - $b * $period;
- } else {
- $totalPeriod += $period;
- }
- $totalCount += $this->results[$key];
- }
- return $totalCount / $totalPeriod;
- }
-
- /**
- * Store the observed row counts after the query completes.
- *
- * @param int[] $counts The number of rows in each timestamp bucket. The
- * key is the current UNIX time divided by the bucket period, rounded
- * down to an integer.
- * @param int $startTime The start of the observed timestamp range
- * @param int $endTime The end of the observed timestamp range
- */
- public function storeCounts( array $counts, int $startTime, int $endTime ) {
- $cache = $this->cache;
- $period = $this->getBucketPeriod();
- $startBucket = (int)( $startTime / $period );
- $endBucket = (int)( $endTime / $period );
- $batch = [];
- for ( $b = $startBucket; $b <= $endBucket; $b++ ) {
- $bucketKey = $cache->makeKey( 'ChangesListQueryRate', $this->key, $b );
- $prevResult = $this->results[$bucketKey] ?? null;
- $newResult = $counts[$b] ?? 0;
- if ( $prevResult === null || $prevResult < $newResult ) {
- $batch[$bucketKey] = $newResult;
- }
- }
- if ( $batch ) {
- $cache->setMulti( $batch, $this->maxAge );
- }
- }
-}
diff --git a/tests/phpunit/includes/recentchanges/ChangesListQuery/ChangesListQueryTest.php b/tests/phpunit/includes/recentchanges/ChangesListQuery/ChangesListQueryTest.php
index c71156a..bfed352 100644
--- a/tests/phpunit/includes/recentchanges/ChangesListQuery/ChangesListQueryTest.php
+++ b/tests/phpunit/includes/recentchanges/ChangesListQuery/ChangesListQueryTest.php
@@ -230,7 +230,6 @@
$services->getUserFactory(),
$services->getLinkTargetLookup(),
$services->getChangeTagsStore(),
- $services->getObjectCacheFactory(),
$services->getStatsFactory(),
$options['logger'] ?? LoggerFactory::getInstance( 'ChangesListQuery' ),
$services->getConnectionProvider(),
diff --git a/tests/phpunit/includes/recentchanges/ChangesListQuery/QueryRateEstimatorTest.php b/tests/phpunit/includes/recentchanges/ChangesListQuery/QueryRateEstimatorTest.php
deleted file mode 100644
index e10492a..0000000
--- a/tests/phpunit/includes/recentchanges/ChangesListQuery/QueryRateEstimatorTest.php
+++ /dev/null
@@ -1,83 +0,0 @@
-<?php
-
-namespace MediaWiki\Tests\RecentChanges\ChangesListQuery;
-
-use MediaWiki\RecentChanges\ChangesListQuery\QueryRateEstimator;
-use Wikimedia\ObjectCache\HashBagOStuff;
-
-/**
- * @covers \MediaWiki\RecentChanges\ChangesListQuery\QueryRateEstimator
- */
-class QueryRateEstimatorTest extends \MediaWikiIntegrationTestCase {
- /** @var float|int */
- private $now = 3.5 * 86_400;
-
- /**
- * @return array{QueryRateEstimator, HashBagOStuff}
- */
- private function createQueryRateEstimator() {
- $cache = new HashBagOStuff;
- $cache->setMockTime( $this->now );
- $qre = new QueryRateEstimator(
- $cache,
- 30 * 86_400,
- 'test'
- );
- return [ $qre, $cache ];
- }
-
- /**
- * @param HashBagOStuff $cache
- * @param int[] $buckets
- * @return string[]
- */
- private function makeKeys( $cache, $buckets ) {
- $res = [];
- foreach ( $buckets as $b ) {
- $res[] = $cache->makeKey( 'ChangesListQueryRate', 'test', $b );
- }
- return $res;
- }
-
- public function testFetchRateStoreRate() {
- [ $qre, $cache ] = $this->createQueryRateEstimator();
- $start = 0;
- $day = 86_400;
- $end = $this->now;
- $bucketPeriod = $qre->getBucketPeriod();
- // Just an assumption of the test, it's not an interface requirement
- $this->assertSame( $day, $bucketPeriod );
-
- // Empty state
- $rate = $qre->fetchRate( $start, $end );
- $this->assertNull( $rate );
-
- // Observe some results
- $qre->storeCounts( [ 2 => 100, 3 => 50 ], 2 * $day, $end );
-
- // It should have stored results for buckets 2 and 3
- $keys = $this->makeKeys( $cache, [ 2, 3 ] );
- $res = $cache->getMulti( $keys );
- $expected = [ $keys[0] => 100, $keys[1] => 50 ];
- $this->assertSame( $expected, $res );
-
- // Now we have a non-null rate estimate
- $rate = $qre->fetchRate( $start, $end );
- // There's 150 over 1.5 days that we know of
- $this->assertEqualsWithDelta( 150 / 1.5 / $day, $rate, 1e-9 );
-
- // We found some rows from day 1 now
- $qre->storeCounts( [ 1 => 100, 2 => 100, 3 => 50 ], 2 * $day, $end );
- // Now there's 250 over 2.5 days
- $rate = $qre->fetchRate( $start, $end );
- $this->assertEqualsWithDelta( 250 / 2.5 / $day, $rate, 1e-9 );
-
- // Some more changes happened
- $this->now += 0.25 * $day;
- $end = $this->now;
- $qre->storeCounts( [ 2 => 0, 3 => 75 ], 2 * $day, $end );
- // Counts only go up, they don't go down, so now we have 275 over 2.75 days
- $rate = $qre->fetchRate( $start, $end );
- $this->assertEqualsWithDelta( 275 / 2.75 / $day, $rate, 1e-9 );
- }
-}