Create a summary from extracted URL content using OpenAI
// TASK: summary
// Run this task with:
// forge task:run openAi:summary --uuid 0c2a10bf-cf34-4c21-a3be-ba31ed96ddad
import { createTask } from '@forgehive/task'
import { Schema } from '@forgehive/schema'
import { OpenAI } from 'openai'
import markdownToTxt from 'markdown-to-txt'
import { z } from "zod";
import { zodResponseFormat } from "openai/helpers/zod";
import { Url } from '@/models'
const description = 'Create a summary from extracted URL content using OpenAI'
const schema = new Schema({
uuid: Schema.string()
})
// Define the structured response format for the summary
const ContentSummary = z.object({
summary: z.string().describe("A comprehensive markdown summary that includes: 4 paragraphs of content summary, followed by a 'Key Points' section with 3-5 bullet points, and a 'Why Read This' section explaining the value"),
readingWorthiness: z.string().describe("Why someone should read this content")
})
const boundaries = {
findByUuid: async (uuid: string) => {
return await Url.findOne({ uuid })
},
generateSummary: async (model: string, content: string): Promise<{ content: z.infer<typeof ContentSummary>; usage: number | undefined }> => {
const apiKey = process.env.OPENAI_API_KEY;
if (!apiKey) {
throw new Error('OpenAI API key is not configured. Please set OPENAI_API_KEY in your .env file.');
}
const openai = new OpenAI({ apiKey });
const response = await openai.beta.chat.completions.parse({
model,
messages: [
{
role: "system",
content: `
You are an assistant that analyzes content and creates comprehensive summaries.
Create a comprehensive markdown summary with the following structure:
1. Four paragraphs summarizing the main content
2. A "## Key Points" section with 3-5 bullet points highlighting the most important learnings
3. A "## Why Read This" section explaining the value and benefits of reading the full content
Use proper markdown formatting throughout.
Do not include the title in your summary - focus only on summarizing the main content.
`
},
{
role: "user",
content: content,
}
],
max_tokens: 4000,
temperature: 0.3,
response_format: zodResponseFormat(ContentSummary, "content_summary"),
});
if (!response.choices[0].message.parsed) {
throw new Error('Could not generate summary from the content.');
}
console.log('Usage =>', response.usage?.prompt_tokens, response.usage?.completion_tokens, response.usage?.total_tokens)
return {
content: response.choices[0].message.parsed,
usage: response.usage?.total_tokens
};
},
saveSummary: async (uuid: string, summary: string, readingWorthiness: string, content: string, title: string, url: string, description: string) => {
// Use findOneAndUpdate with upsert to save or create
await Url.findOneAndUpdate(
{ uuid },
{
url,
title,
description,
content,
summary,
readingWorthiness,
status: 'hasSummary'
},
{
upsert: true,
new: true
}
)
return true
},
markSummaryError: async (uuid: string) => {
const url = await Url.findOne({ uuid })
if (!url) {
throw new Error(`URL not found for uuid: ${uuid}`)
}
url.status = 'hasSummaryError'
await url.save()
return { success: true }
}
}
export const summary = createTask(
schema,
boundaries,
async function ({ uuid }, { findByUuid, generateSummary, saveSummary, markSummaryError }) {
// Get the URL document
const urlDoc = await findByUuid(uuid);
if (!urlDoc) {
throw new Error(`URL not found for uuid: ${uuid}`);
}
if (!urlDoc.content) {
throw new Error('URL document does not contain content to summarize');
}
try {
// Convert markdown to plain text and limit length
const text = markdownToTxt(urlDoc.content)
.replace(/\s+/g, ' ')
.trim()
.slice(0, 15000); // Limit to 15k chars
if (!text) {
throw new Error('No text content found to summarize');
}
console.log('->', urlDoc.title || urlDoc.url, text.length)
// Prepare the content for analysis
const content = [
urlDoc.title ? `Title: ${urlDoc.title}` : '',
urlDoc.description ? `Description: ${urlDoc.description}` : '',
`Content: ${text}`
].filter(Boolean).join('\n\n');
// Generate summary using OpenAI
const model = "gpt-4o-mini-2024-07-18";
const analysis = await generateSummary(model, content);
// Get the structured summary data
const summaryData = analysis.content;
const usage = analysis.usage;
console.log('Summary length ->', model, summaryData.summary.length)
// Save the summary (using the main summary text)
await saveSummary(uuid, summaryData.summary, summaryData.readingWorthiness, urlDoc.content, urlDoc.title || '', urlDoc.url, urlDoc.description || '');
return {
status: 'Ok',
summary: summaryData.summary,
readingWorthiness: summaryData.readingWorthiness,
usage: usage
};
} catch (error) {
// Mark as error if something goes wrong
await markSummaryError(uuid);
throw error;
}
}
)
summary.setDescription(description)