openAi:summary

Create a summary from extracted URL content using OpenAI

Published June 30, 2025
Source Code
// TASK: summary
// Run this task with:
// forge task:run openAi:summary --uuid 0c2a10bf-cf34-4c21-a3be-ba31ed96ddad

import { createTask } from '@forgehive/task'
import { Schema } from '@forgehive/schema'
import { OpenAI } from 'openai'
import markdownToTxt from 'markdown-to-txt'
import { z } from "zod";
import { zodResponseFormat } from "openai/helpers/zod";
import { Url } from '@/models'

const description = 'Create a summary from extracted URL content using OpenAI'

const schema = new Schema({
  uuid: Schema.string()
})

// Define the structured response format for the summary
const ContentSummary = z.object({
  summary: z.string().describe("A comprehensive markdown summary that includes: 4 paragraphs of content summary, followed by a 'Key Points' section with 3-5 bullet points, and a 'Why Read This' section explaining the value"),
  readingWorthiness: z.string().describe("Why someone should read this content")
})

const boundaries = {
  findByUuid: async (uuid: string) => {
    return await Url.findOne({ uuid })
  },
  generateSummary: async (model: string, content: string): Promise<{ content: z.infer<typeof ContentSummary>; usage: number | undefined }> => {
    const apiKey = process.env.OPENAI_API_KEY;

    if (!apiKey) {
      throw new Error('OpenAI API key is not configured. Please set OPENAI_API_KEY in your .env file.');
    }

    const openai = new OpenAI({ apiKey });

    const response = await openai.beta.chat.completions.parse({
      model,
      messages: [
        {
          role: "system",
          content: `
            You are an assistant that analyzes content and creates comprehensive summaries.
            Create a comprehensive markdown summary with the following structure:

            1. Four paragraphs summarizing the main content
            2. A "## Key Points" section with 3-5 bullet points highlighting the most important learnings
            3. A "## Why Read This" section explaining the value and benefits of reading the full content

            Use proper markdown formatting throughout.
            Do not include the title in your summary - focus only on summarizing the main content.
          `
        },
        {
          role: "user",
          content: content,
        }
      ],
      max_tokens: 4000,
      temperature: 0.3,
      response_format: zodResponseFormat(ContentSummary, "content_summary"),
    });

    if (!response.choices[0].message.parsed) {
      throw new Error('Could not generate summary from the content.');
    }

    console.log('Usage =>', response.usage?.prompt_tokens, response.usage?.completion_tokens, response.usage?.total_tokens)

    return {
      content: response.choices[0].message.parsed,
      usage: response.usage?.total_tokens
    };
  },
  saveSummary: async (uuid: string, summary: string, readingWorthiness: string, content: string, title: string, url: string, description: string) => {
    // Use findOneAndUpdate with upsert to save or create
    await Url.findOneAndUpdate(
      { uuid },
      {
        url,
        title,
        description,
        content,
        summary,
        readingWorthiness,
        status: 'hasSummary'
      },
      {
        upsert: true,
        new: true
      }
    )

    return true
  },
  markSummaryError: async (uuid: string) => {
    const url = await Url.findOne({ uuid })

    if (!url) {
      throw new Error(`URL not found for uuid: ${uuid}`)
    }

    url.status = 'hasSummaryError'
    await url.save()

    return { success: true }
  }
}

export const summary = createTask(
  schema,
  boundaries,
  async function ({ uuid }, { findByUuid, generateSummary, saveSummary, markSummaryError }) {
    // Get the URL document
    const urlDoc = await findByUuid(uuid);

    if (!urlDoc) {
      throw new Error(`URL not found for uuid: ${uuid}`);
    }

    if (!urlDoc.content) {
      throw new Error('URL document does not contain content to summarize');
    }

    try {
      // Convert markdown to plain text and limit length
      const text = markdownToTxt(urlDoc.content)
        .replace(/\s+/g, ' ')
        .trim()
        .slice(0, 15000); // Limit to 15k chars

      if (!text) {
        throw new Error('No text content found to summarize');
      }

      console.log('->', urlDoc.title || urlDoc.url, text.length)

      // Prepare the content for analysis
      const content = [
        urlDoc.title ? `Title: ${urlDoc.title}` : '',
        urlDoc.description ? `Description: ${urlDoc.description}` : '',
        `Content: ${text}`
      ].filter(Boolean).join('\n\n');

      // Generate summary using OpenAI
      const model = "gpt-4o-mini-2024-07-18";
      const analysis = await generateSummary(model, content);

      // Get the structured summary data
      const summaryData = analysis.content;
      const usage = analysis.usage;

      console.log('Summary length ->', model, summaryData.summary.length)

      // Save the summary (using the main summary text)
      await saveSummary(uuid, summaryData.summary, summaryData.readingWorthiness, urlDoc.content, urlDoc.title || '', urlDoc.url, urlDoc.description || '');

      return {
        status: 'Ok',
        summary: summaryData.summary,
        readingWorthiness: summaryData.readingWorthiness,
        usage: usage
      };
    } catch (error) {
      // Mark as error if something goes wrong
      await markSummaryError(uuid);
      throw error;
    }
  }
)

summary.setDescription(description)
openAi:summary by Hive - Hive