Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
84 changes: 84 additions & 0 deletions src/lib/rss-transform.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/**
* Starting around episode 223, the RSS feed changed from HTML to plain text.
* This transformer converts the new plain-text format to match the old HTML structure exactly.
*/
export function transformPlainTextToHtml(text: string): string {
const lines = text.split('\n').map(l => l.trim()).filter(Boolean);
const html: string[] = [];
let i = 0;

while (i < lines.length) {
const line = lines[i];

// Check if this is a timestamp line like "(00:00) - Intro"
if (/^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(line)) {
// Start collecting all consecutive timestamp lines into a list
const listItems: string[] = [];
while (i < lines.length && /^\(\d{2}:\d{2}(?::\d{2})?\)\s*-/.test(lines[i])) {
listItems.push(escapeHtml(lines[i]));
i++;
}
html.push('<ul>');
listItems.forEach(item => html.push(`<li>${item}</li>`));
html.push('</ul>');
continue;
}

// Check if this is a section header (bold text like "**Links**" or just "Links")
if (/^\*\*(.+?)\*\*$/.test(line)) {
const text = line.replace(/^\*\*(.+?)\*\*$/, '$1');
html.push(`<p><strong>${escapeHtml(text)}</strong></p>`);
i++;
continue;
}

// Check if this looks like a link list item (e.g., "CodeRabbit: https://...")
if (/:?\s*https?:\/\//.test(line)) {
// Collect all consecutive link lines into a list
const linkItems: string[] = [];
while (i < lines.length && /:?\s*https?:\/\//.test(lines[i])) {
linkItems.push(lines[i]);
i++;
}
html.push('<ul>');
linkItems.forEach(item => {
// Parse "Label: URL" or just "URL"
const match = item.match(/^(.+?):\s*(https?:\/\/.+)$/);
if (match) {
const label = escapeHtml(match[1].trim());
const url = escapeHtml(match[2].trim());
html.push(`<li>${label}: <a href="${url}">${url}</a></li>`);
} else {
const urlMatch = item.match(/(https?:\/\/.+)/);
if (urlMatch) {
const url = escapeHtml(urlMatch[1].trim());
html.push(`<li><a href="${url}">${url}</a></li>`);
} else {
html.push(`<li>${escapeHtml(item)}</li>`);
}
}
});
html.push('</ul>');
continue;
}
Comment on lines +35 to +63
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Inline-URL sentences get coerced into link lists (drops surrounding text).
Any line containing http becomes a list item, so a sentence like “Sponsor: https://x (use code …)” loses the trailing text. If that’s not intended, tighten detection to only match lines that are just a URL or “Label: URL”.

🔧 Safer link-line detection
-    if (/:?\s*https?:\/\//.test(line)) {
+    const linkLineRegex = /^(?:[^:]+:\s*)?https?:\/\/\S+\s*$/;
+    if (linkLineRegex.test(line)) {
       // Collect all consecutive link lines into a list
       const linkItems: string[] = [];
-      while (i < lines.length && /:?\s*https?:\/\//.test(lines[i])) {
+      while (i < lines.length && linkLineRegex.test(lines[i])) {
         linkItems.push(lines[i]);
         i++;
       }
       html.push('<ul>');
       linkItems.forEach(item => {
         // Parse "Label: URL" or just "URL"
-        const match = item.match(/^(.+?):\s*(https?:\/\/.+)$/);
+        const match = item.match(/^(?:([^:]+):\s*)?(https?:\/\/\S+)\s*$/);
         if (match) {
-          const label = escapeHtml(match[1].trim());
-          const url = escapeHtml(match[2].trim());
-          html.push(`<li>${label}: <a href="${url}">${url}</a></li>`);
+          const label = match[1] ? escapeHtml(match[1].trim()) : null;
+          const url = escapeHtml(match[2].trim());
+          html.push(
+            label
+              ? `<li>${label}: <a href="${url}">${url}</a></li>`
+              : `<li><a href="${url}">${url}</a></li>`
+          );
         } else {
           const urlMatch = item.match(/(https?:\/\/.+)/);
           if (urlMatch) {
             const url = escapeHtml(urlMatch[1].trim());
             html.push(`<li><a href="${url}">${url}</a></li>`);
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
// Check if this looks like a link list item (e.g., "CodeRabbit: https://...")
if (/:?\s*https?:\/\//.test(line)) {
// Collect all consecutive link lines into a list
const linkItems: string[] = [];
while (i < lines.length && /:?\s*https?:\/\//.test(lines[i])) {
linkItems.push(lines[i]);
i++;
}
html.push('<ul>');
linkItems.forEach(item => {
// Parse "Label: URL" or just "URL"
const match = item.match(/^(.+?):\s*(https?:\/\/.+)$/);
if (match) {
const label = escapeHtml(match[1].trim());
const url = escapeHtml(match[2].trim());
html.push(`<li>${label}: <a href="${url}">${url}</a></li>`);
} else {
const urlMatch = item.match(/(https?:\/\/.+)/);
if (urlMatch) {
const url = escapeHtml(urlMatch[1].trim());
html.push(`<li><a href="${url}">${url}</a></li>`);
} else {
html.push(`<li>${escapeHtml(item)}</li>`);
}
}
});
html.push('</ul>');
continue;
}
// Check if this looks like a link list item (e.g., "CodeRabbit: https://...")
const linkLineRegex = /^(?:[^:]+:\s*)?https?:\/\/\S+\s*$/;
if (linkLineRegex.test(line)) {
// Collect all consecutive link lines into a list
const linkItems: string[] = [];
while (i < lines.length && linkLineRegex.test(lines[i])) {
linkItems.push(lines[i]);
i++;
}
html.push('<ul>');
linkItems.forEach(item => {
// Parse "Label: URL" or just "URL"
const match = item.match(/^(?:([^:]+):\s*)?(https?:\/\/\S+)\s*$/);
if (match) {
const label = match[1] ? escapeHtml(match[1].trim()) : null;
const url = escapeHtml(match[2].trim());
html.push(
label
? `<li>${label}: <a href="${url}">${url}</a></li>`
: `<li><a href="${url}">${url}</a></li>`
);
} else {
const urlMatch = item.match(/(https?:\/\/.+)/);
if (urlMatch) {
const url = escapeHtml(urlMatch[1].trim());
html.push(`<li><a href="${url}">${url}</a></li>`);
} else {
html.push(`<li>${escapeHtml(item)}</li>`);
}
}
});
html.push('</ul>');
continue;
}
🤖 Prompt for AI Agents
In `@src/lib/rss-transform.ts` around lines 35 - 63, The current link-list
detection treats any line containing "http" as a list item, dropping trailing
text; update the initial test and the while loop in rss-transform.ts to only
treat a line as a link-list entry when the entire line matches either "URL" or
"Label: URL" (e.g., use a stricter regex that anchors the whole line such as
matching optional "Label:" then a single URL with no extra text). Apply the same
anchored regex in both the outer if and the while condition that builds
linkItems, and then keep the existing parsing logic
(item.match(/^(.+?):\s*(https?:\/\/.+)$/) and fallback URL-only match) to render
the <ul> only for those fully-matching lines so inline sentences with trailing
text are left untouched.


// Default: regular paragraph
html.push(`<p>${escapeHtml(line)}</p>`);
i++;
}

return html.join('\n');
}

export function escapeHtml(str: string): string {
return str
.replace(/&/g, '&amp;')
.replace(/</g, '&lt;')
.replace(/>/g, '&gt;')
.replace(/\"/g, '&quot;')
.replace(/'/g, '&#039;');
}

export function looksLikeHtml(text: string): boolean {
return /<[a-z][\s\S]*>/i.test(text.trim());
}
12 changes: 10 additions & 2 deletions src/lib/rss.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,10 +3,16 @@ import parseFeed from 'rss-to-json';
import { array, number, object, optional, parse, string } from 'valibot';

import { optimizeImage } from './optimize-episode-image';
import {
escapeHtml,
looksLikeHtml,
transformPlainTextToHtml
} from './rss-transform';
import { dasherize } from '../utils/dasherize';
import { truncate } from '../utils/truncate';
import starpodConfig from '../../starpod.config';


export interface Show {
title: string;
description: string;
Expand Down Expand Up @@ -102,7 +108,9 @@ export async function getAllEpisodes() {
return {
id,
title: `${title}`,
content: description,
content: looksLikeHtml(description)
? description
: transformPlainTextToHtml(description),
description: truncate(htmlToText(description), 260),
duration: itunes_duration,
Comment on lines +111 to 115
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Plain-text descriptions still go through htmlToText.
When description is plaintext, running htmlToText can drop literal </> sequences. Consider using raw text for truncation when !looksLikeHtml(description).

✅ Suggested fix
-          return {
+          const isHtml = looksLikeHtml(description);
+          const descriptionText = isHtml ? htmlToText(description) : description;
+          return {
             id,
             title: `${title}`,
-            content: looksLikeHtml(description)
-              ? description
-              : transformPlainTextToHtml(description),
-            description: truncate(htmlToText(description), 260),
+            content: isHtml ? description : transformPlainTextToHtml(description),
+            description: truncate(descriptionText, 260),
🤖 Prompt for AI Agents
In `@src/lib/rss.ts` around lines 111 - 115, The truncation currently always runs
htmlToText(description), which strips literal '<'/'>' in plain text; change the
logic so description is truncated from htmlToText(description) only when
looksLikeHtml(description) is true, otherwise truncate the raw description
string; update the description assignment (alongside content which uses
looksLikeHtml and transformPlainTextToHtml) to use a conditional: if
looksLikeHtml(description) use htmlToText(description) for truncation, else use
description directly, then pass that result into truncate(..., 260).

episodeImage: itunes_image?.href,
Expand All @@ -121,4 +129,4 @@ export async function getAllEpisodes() {

episodesCache = episodes;
return episodes;
}
}
82 changes: 82 additions & 0 deletions tests/unit/rss.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import { describe, expect, it } from 'vitest';
import {
escapeHtml,
looksLikeHtml,
transformPlainTextToHtml
} from '../../src/lib/rss-transform';

describe('RSS Transformation Functions', () => {
describe('escapeHtml', () => {
it('escapes multiple special characters', () => {
expect(escapeHtml('<script>alert("XSS & stuff")</script>')).toBe(
'&lt;script&gt;alert(&quot;XSS &amp; stuff&quot;)&lt;/script&gt;'
);
});

it('returns unchanged string without special characters', () => {
expect(escapeHtml('Hello World')).toBe('Hello World');
});
});

describe('looksLikeHtml', () => {
it('detects HTML tags vs plain text', () => {
expect(looksLikeHtml('<p>Hello</p>')).toBe(true);
expect(looksLikeHtml('Just plain text')).toBe(false);
expect(looksLikeHtml('5 < 10 and 10 > 5')).toBe(false);
});
});

describe('transformPlainTextToHtml', () => {
it('handles mixed content with lists, headers, and paragraphs', () => {
const input = `**Episode Summary**

This episode covers many topics & details.

**Timestamps**

(00:00) - Introduction
(05:30) - Main discussion
(00:00:15) - With seconds

**Links**

GitHub: https://github.com/example
https://example.com
Company & Co: https://example.com?foo=bar&baz=qux

Thanks for listening!`;

const output = transformPlainTextToHtml(input);

expect(output).toContain('<p><strong>Episode Summary</strong></p>');
expect(output).toContain('<p>This episode covers many topics &amp; details.</p>');
expect(output).toContain('<li>(00:00) - Introduction</li>');
expect(output).toContain('<li>(00:00:15) - With seconds</li>');
expect(output.match(/<ul>/g)).toHaveLength(2);
expect(output).toContain(
'<li>GitHub: <a href="https://github.com/example">https://github.com/example</a></li>'
);
expect(output).toContain(
'<li>Company &amp; Co: <a href="https://example.com?foo=bar&amp;baz=qux">https://example.com?foo=bar&amp;baz=qux</a></li>'
);
expect(output).toContain('<p>Thanks for listening!</p>');
});

it('separates non-consecutive timestamp groups', () => {
const input = `(00:00) - Intro
(05:00) - Part 1

Some text in between

(10:00) - Part 2
(15:00) - Part 3`;
const output = transformPlainTextToHtml(input);
expect(output.match(/<ul>/g)).toHaveLength(2);
});

it('returns empty string for blank input', () => {
expect(transformPlainTextToHtml('')).toBe('');
expect(transformPlainTextToHtml(' \n \n ')).toBe('');
});
});
});