quality:calculate

Add task description here

Published June 30, 2025
Source Code
// TASK: calculate
// Run this task with:
// forge task:run quality:calculate --uuid "4eaa392c-43db-4174-b407-ffe394573cec"
// forge task:run quality:calculate --uuid "4eaa392c-43db-4174-b407-ffe394573cec"

import { createTask } from '@forgehive/task'
import { Schema } from '@forgehive/schema'
import { createHiveLogClient, isApiError } from '@forgehive/hive-sdk'
import { OpenAI } from 'openai'
import { z } from "zod";
import { zodResponseFormat } from "openai/helpers/zod";

const description = 'Add task description here'
const TASK_NAME = 'openAi:summary'

const schema = new Schema({
  uuid: Schema.string()
})

// Define the structured response format for quality evaluation
const QualityEvaluation = z.object({
  score: z.number().min(0).max(1).describe("A numeric score from 0-1 where 1 is excellent and 0 is poor"),
  reason: z.string().describe("A clear explanation of why this score was given"),
  suggestions: z.string().describe("Specific recommendations for how to improve the score")
})

const boundaries = {
  fetchLog: async (uuid: string) => {
    const client = createHiveLogClient('Personal Knowledge Management System')
    const data = await client.getLog(TASK_NAME, uuid)

    return data
  },
  setQuality: async (uuid: string, quality: { score: number; reason: string; suggestions: string }) => {
    const client = createHiveLogClient('Personal Knowledge Management System')
    const success = await client.setQuality(TASK_NAME, uuid, quality)

    return success
  },
  evaluateSummaryQuality: async (model: string, summary: string): Promise<{ score: number; reason: string; suggestions: string }> => {
    const apiKey = process.env.OPENAI_API_KEY;

    if (!apiKey) {
      throw new Error('OpenAI API key is not configured. Please set OPENAI_API_KEY in your .env file.');
    }

    const openai = new OpenAI({ apiKey });

    const response = await openai.beta.chat.completions.parse({
      model,
      messages: [
        {
          role: "system",
          content: `
            You are an expert evaluator of summary quality. Your task is to evaluate the clarity of the complete summary (including main summary, key points, and reading worthiness) and how well it explains what you can learn from reading the article.

            Rate the summary quality using these specific yardsticks (you may use up to 4 decimal places for precision):
            - 0: Very unclear, no indication of what can be learned
            - 0.2: Poor clarity, vague learning outcomes
            - 0.4: Somewhat clear but learning value is unclear
            - 0.6: Clear summary with good indication of learning value
            - 0.8: Very clear with compelling learning outcomes
            - 1.0: Exceptionally clear with outstanding learning outcomes

            Choose a score based on these yardsticks, considering intermediate values when appropriate (e.g., 0.1, 0.3, 0.5, 0.7, 0.9 or any value up to 4 decimal places).

            Focus specifically on:
            - How clear and understandable is the complete summary?
            - How well do the key points highlight important learnings?
            - How well does it explain what you can learn from the article?
            - How compelling is the case for reading the full content?
            - How specific and actionable are the learning outcomes mentioned?

            Provide your evaluation with:
            1. A numeric score (0-1, up to 4 decimal places)
            2. A clear reason explaining why you gave this score based on clarity and learning value
            3. Specific suggestions for how to improve the clarity, key points, and learning outcomes
          `
        },
        {
          role: "user",
          content: `Please evaluate this summary for clarity of learning value:\n\n${summary}`
        }
      ],
      max_tokens: 1000,
      temperature: 0.5,
      response_format: zodResponseFormat(QualityEvaluation, "quality_evaluation"),
    });

    if (!response.choices[0].message.parsed) {
      throw new Error('Could not evaluate summary quality.');
    }

    const evaluation = response.choices[0].message.parsed;

    // Ensure score has exactly 4 decimal places
    const score = Number(Number(evaluation.score).toFixed(4));

    return {
      score,
      reason: evaluation.reason,
      suggestions: evaluation.suggestions
    };
  }
}

export const calculate = createTask(
  schema,
  boundaries,
  async function ({ uuid }, { fetchLog, evaluateSummaryQuality, setQuality }) {
    const data = await fetchLog(uuid)

    if (!data) {
      throw new Error(`Failed to fetch log data for ${TASK_NAME} with uuid ${uuid}`)
    }

    if (isApiError(data)) {
      throw new Error(data.error)
    }

    const log = data.logItem
    const output = log.output as { summary: string; keyPoints: string[]; readingWorthiness: string }

    // Combine summary with key points for comprehensive evaluation
    const fullSummary = [
      output.summary,
      output.keyPoints?.length ? `\n\nKey Points:\n${output.keyPoints.map(point => `• ${point}`).join('\n')}` : '',
      output.readingWorthiness ? `\n\nWhy Read This:\n${output.readingWorthiness}` : ''
    ].filter(Boolean).join('')

    // Evaluate the complete summary quality
    // const model = "gpt-4o-mini-2024-07-18"
    const model = "gpt-4.1-mini-2025-04-14"
    const evaluation = await evaluateSummaryQuality(model, fullSummary)

    await setQuality(uuid, evaluation)

    return {
      quality: evaluation
    }
  }
)

calculate.setDescription(description)