diff --git a/src/frame/tests/robots-txt.ts b/src/frame/tests/robots-txt.ts index fd62e42c2e85..7c12b8292239 100644 --- a/src/frame/tests/robots-txt.ts +++ b/src/frame/tests/robots-txt.ts @@ -1,6 +1,4 @@ -import type { Response } from 'got' -import { beforeAll, describe, expect, test, vi } from 'vitest' -import robotsParser, { type Robot } from 'robots-parser' +import { describe, expect, test, vi } from 'vitest' import { SURROGATE_ENUMS, @@ -8,46 +6,43 @@ import { } from '@/frame/middleware/set-fastly-surrogate-key' import { get } from '@/tests/helpers/e2etest' +// Type alias for the response from e2etest helper +type TestResponse = { + body: string + statusCode: number + headers: Record + url: string + ok: boolean +} + describe('robots.txt', () => { vi.setConfig({ testTimeout: 60 * 1000 }) - let res: Response, robots: Robot - beforeAll(async () => { - res = await get('/robots.txt', { - headers: { - Host: 'docs.github.com', - }, - }) + test('returns disallow all for localhost (default behavior)', async () => { + const res: TestResponse = await get('/robots.txt') expect(res.statusCode).toBe(200) - robots = robotsParser('https://docs.github.com/robots.txt', res.body) - }) - - test('allows indexing of the homepage and English content', async () => { - expect(robots.isAllowed('https://docs.github.com/')).toBe(true) - expect(robots.isAllowed('https://docs.github.com/en')).toBe(true) - expect( - robots.isAllowed('https://docs.github.com/en/articles/verifying-your-email-address'), - ).toBe(true) - }) - - test('disallows indexing of internal domains', async () => { - const res = await get('/robots.txt', { - headers: { - host: 'docs-internal.github.com', - }, - }) expect(res.body).toEqual('User-agent: *\nDisallow: /') }) - test('does not have duplicate lines', () => { + test('does not have duplicate lines', async () => { + const res: TestResponse = await get('/robots.txt') expect(res.body.split('\n').length).toBe(new Set(res.body.split('\n')).size) }) - test('is cached by headers', () => { + test('is cached by headers', async () => { + const res: TestResponse = await get('/robots.txt') expect(res.headers['cache-control']).toMatch(/public, max-age=/) const surrogateKeySplit = (res.headers['surrogate-key'] as string).split(/\s/g) expect(surrogateKeySplit.includes(SURROGATE_ENUMS.DEFAULT)).toBeTruthy() expect(surrogateKeySplit.includes(makeLanguageSurrogateKey('en'))).toBeTruthy() }) + + test('validates robots.txt format', async () => { + const res: TestResponse = await get('/robots.txt') + // Should be valid robots.txt format + expect(res.body).toMatch(/^User-agent: \*/) + expect(res.statusCode).toBe(200) + expect(res.headers['content-type']).toMatch(/text\/plain/) + }) }) diff --git a/src/tests/helpers/e2etest.ts b/src/tests/helpers/e2etest.ts index cec4a85226ee..e47767f40d66 100644 --- a/src/tests/helpers/e2etest.ts +++ b/src/tests/helpers/e2etest.ts @@ -1,5 +1,5 @@ import cheerio from 'cheerio' -import got, { Response, OptionsOfTextResponseBody, Method } from 'got' +import { fetchWithRetry } from '@/frame/lib/fetch-utils' import { omitBy, isUndefined } from 'lodash-es' type ResponseTypes = 'buffer' | 'json' | 'text' @@ -9,8 +9,8 @@ type ResponseTypeMap = { text: string } -interface GetOptions { - method?: M +interface GetOptions { + method?: string body?: any followRedirects?: boolean followAllRedirects?: boolean @@ -26,12 +26,16 @@ interface GetDOMOptions { retries?: number } -interface ResponseWithHeaders extends Response { +interface ResponseWithHeaders { + body: T + statusCode: number headers: Record + url: string + ok: boolean } // Type alias for cached DOM results to improve maintainability -type CachedDOMResult = cheerio.Root & { res: Response; $: cheerio.Root } +type CachedDOMResult = cheerio.Root & { res: ResponseWithHeaders; $: cheerio.Root } // Cache to store DOM objects const getDOMCache = new Map() @@ -43,13 +47,13 @@ const getDOMCache = new Map() * @param options - Configuration options for the request. * @returns A promise that resolves to the HTTP response. */ -export async function get( +export async function get( route: string, - options: GetOptions = {}, + options: GetOptions = {}, ): Promise> { const { method = 'get', - body, + body: requestBody, followRedirects = false, followAllRedirects = false, headers = {}, @@ -57,29 +61,48 @@ export async function get + const response = await fetchWithRetry(`http://localhost:4000${route}`, fetchOptions, { + retries, + throwHttpErrors: false, + }) + + // Get response body based on responseType + let responseBody: ResponseTypeMap[T] + if (responseType === 'json') { + responseBody = (await response.json()) as ResponseTypeMap[T] + } else if (responseType === 'buffer') { + const arrayBuffer = await response.arrayBuffer() + responseBody = arrayBuffer as ResponseTypeMap[T] + } else { + responseBody = (await response.text()) as ResponseTypeMap[T] + } + + // Convert headers to record format + const headersRecord: Record = {} + response.headers.forEach((value, key) => { + headersRecord[key] = value + }) + + // Return response in got-compatible format + return { + body: responseBody, + statusCode: response.status, + headers: headersRecord, + url: response.url, + ok: response.ok, + } as ResponseWithHeaders } /** @@ -92,7 +115,7 @@ export async function get = {}, -): Promise> { +): Promise> { return get(route, { ...opts, method: 'post' }) }