diff --git a/src/lib/rss-transform.ts b/src/lib/rss-transform.ts new file mode 100644 index 0000000..b99e790 --- /dev/null +++ b/src/lib/rss-transform.ts @@ -0,0 +1,84 @@ +/** + * Starting around episode 223, the RSS feed changed from HTML to plain text. + * This transformer converts the new plain-text format to match the old HTML structure exactly. + */ +export function transformPlainTextToHtml(text: string): string { + const lines = text.split('\n').map(l => l.trim()).filter(Boolean); + const html: string[] = []; + let i = 0; + + while (i < lines.length) { + const line = lines[i]; + + // Check if this is a timestamp line like "(00:00) - Intro" + if (/^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(line)) { + // Start collecting all consecutive timestamp lines into a list + const listItems: string[] = []; + while (i < lines.length && /^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(lines[i])) { + listItems.push(escapeHtml(lines[i])); + i++; + } + html.push(''); + continue; + } + + // Check if this is a section header (bold text like "**Links**" or just "Links") + if (/^\*\*(.+?)\*\*$/.test(line)) { + const text = line.replace(/^\*\*(.+?)\*\*$/, '$1'); + html.push(`

${escapeHtml(text)}

`); + i++; + continue; + } + + // Check if this looks like a link list item (e.g., "CodeRabbit: https://...") + if (/:?\s*https?:\/\//.test(line)) { + // Collect all consecutive link lines into a list + const linkItems: string[] = []; + while (i < lines.length && /:?\s*https?:\/\//.test(lines[i])) { + linkItems.push(lines[i]); + i++; + } + html.push(''); + continue; + } + + // Default: regular paragraph + html.push(`

${escapeHtml(line)}

`); + i++; + } + + return html.join('\n'); +} + +export function escapeHtml(str: string): string { + return str + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/\"/g, '"') + .replace(/'/g, '''); +} + +export function looksLikeHtml(text: string): boolean { + return /<[a-z][\s\S]*>/i.test(text.trim()); +} diff --git a/src/lib/rss.ts b/src/lib/rss.ts index 5aceb54..8d01441 100644 --- a/src/lib/rss.ts +++ b/src/lib/rss.ts @@ -3,10 +3,16 @@ import parseFeed from 'rss-to-json'; import { array, number, object, optional, parse, string } from 'valibot'; import { optimizeImage } from './optimize-episode-image'; +import { + escapeHtml, + looksLikeHtml, + transformPlainTextToHtml +} from './rss-transform'; import { dasherize } from '../utils/dasherize'; import { truncate } from '../utils/truncate'; import starpodConfig from '../../starpod.config'; + export interface Show { title: string; description: string; @@ -102,7 +108,9 @@ export async function getAllEpisodes() { return { id, title: `${title}`, - content: description, + content: looksLikeHtml(description) + ? description + : transformPlainTextToHtml(description), description: truncate(htmlToText(description), 260), duration: itunes_duration, episodeImage: itunes_image?.href, @@ -121,4 +129,4 @@ export async function getAllEpisodes() { episodesCache = episodes; return episodes; -} +} \ No newline at end of file diff --git a/tests/unit/rss.test.ts b/tests/unit/rss.test.ts new file mode 100644 index 0000000..e78b58a --- /dev/null +++ b/tests/unit/rss.test.ts @@ -0,0 +1,82 @@ +import { describe, expect, it } from 'vitest'; +import { + escapeHtml, + looksLikeHtml, + transformPlainTextToHtml +} from '../../src/lib/rss-transform'; + +describe('RSS Transformation Functions', () => { + describe('escapeHtml', () => { + it('escapes multiple special characters', () => { + expect(escapeHtml('')).toBe( + '<script>alert("XSS & stuff")</script>' + ); + }); + + it('returns unchanged string without special characters', () => { + expect(escapeHtml('Hello World')).toBe('Hello World'); + }); + }); + + describe('looksLikeHtml', () => { + it('detects HTML tags vs plain text', () => { + expect(looksLikeHtml('

Hello

')).toBe(true); + expect(looksLikeHtml('Just plain text')).toBe(false); + expect(looksLikeHtml('5 < 10 and 10 > 5')).toBe(false); + }); + }); + + describe('transformPlainTextToHtml', () => { + it('handles mixed content with lists, headers, and paragraphs', () => { + const input = `**Episode Summary** + +This episode covers many topics & details. + +**Timestamps** + +(00:00) - Introduction +(05:30) - Main discussion +(00:00:15) - With seconds + +**Links** + +GitHub: https://github.com/example +https://example.com +Company & Co: https://example.com?foo=bar&baz=qux + +Thanks for listening!`; + + const output = transformPlainTextToHtml(input); + + expect(output).toContain('

Episode Summary

'); + expect(output).toContain('

This episode covers many topics & details.

'); + expect(output).toContain('
  • (00:00) - Introduction
  • '); + expect(output).toContain('
  • (00:00:15) - With seconds
  • '); + expect(output.match(/