tasks:createDocumentation

Generate comprehensive documentation for a task using fingerprint data and Claude API

Published July 24, 2025
Documentation

Overview

Generates comprehensive documentation for tasks by analyzing their fingerprint data and source code using Claude AI. This task retrieves task metadata from MongoDB database, fetches source code from AWS S3, and creates structured documentation via Anthropic's Claude API that includes overview, input/output specifications, implementation details, error handling, and boundary documentation.

Implementation

The main function orchestrates a multi-step documentation generation process:

  1. Task Retrieval: Calls getTaskFromDb to fetch the target task with populated project and quality function data from MongoDB database
  2. Validation: Verifies the task has required fingerprint data and source code path
  3. Source Code Fetching: Uses getFromS3 to retrieve the task's TypeScript source code from AWS S3 storage
  4. Prompt Generation: Calls createDocumentationPrompt to build a comprehensive AI prompt including fingerprint analysis, input/output schemas, boundaries, and error information
  5. AI Documentation: Invokes generateDocumentation to send the prompt to Claude API and receive structured JSON documentation from Anthropic's Claude API
  6. Metrics Recording: Tracks token usage metrics for input and output tokens using setMetrics
  7. Database Update: Uses updateTaskDocumentation to save the generated documentation back to the task record in MongoDB database
  8. Response Assembly: Returns a structured response with status, metadata, and documentation preview
Source Code
// TASK: createDocumentation
// Run this task with:
// forge task:run tasks:createDocumentation --uuid "8f50ff1c-a702-49c7-8dd9-4fcbdaa8f703"

import { createTask } from '@forgehive/task'
import { Schema } from '@forgehive/schema'
import { S3Client, GetObjectCommand } from '@aws-sdk/client-s3'
import { Task } from '@/models'
import Anthropic from '@anthropic-ai/sdk'

interface SchemaProperty {
  type: string
  optional?: boolean
  default?: unknown
  description?: string
  properties?: Record<string, SchemaProperty>
}

interface OutputType {
  type?: string
  properties?: Record<string, SchemaProperty>
  elementType?: {
    type: string
    properties?: Record<string, SchemaProperty>
  }
}

interface BoundaryInput {
  type: string
  name?: string
  properties?: Record<string, SchemaProperty>
}

interface BoundaryError {
  message: string
}

interface Boundary {
  name: string
  input?: BoundaryInput[]
  output?: {
    type: string
    properties?: Record<string, SchemaProperty>
  }
  errors?: BoundaryError[]
}

interface TaskError {
  type: string
  message: string
  location?: {
    line: number
  }
}

interface TaskFingerprint {
  inputSchema?: {
    properties?: Record<string, SchemaProperty>
  }
  outputType?: OutputType
  boundaries?: Boundary[]
  errors?: TaskError[]
}

interface DocumentationStructure {
  overview: string
  inputOutput: {
    inputParameters: string
    returnValue: string
  }
  taskImplementation: string
  errorHandling: string
  boundaries?: Record<string, string> | null
}

interface GenerationMetrics {
  input: number
  output: number
  model: string
}

interface GenerationResult {
  documentation: DocumentationStructure
  metrics: GenerationMetrics
}

const name = 'tasks:createDocumentation'
const description = 'Generate comprehensive documentation for a task using fingerprint data and Claude API'

const region = process.env.AWS_S3_REGION
const bucketName = process.env.AWS_S3_BUCKET
const claudeApiKey = process.env.CLAUDE_API_KEY

const s3Client = new S3Client({
  region
})

const schema = new Schema({
  uuid: Schema.string()
})

const boundaries = {
  getTaskFromDb: async (uuid: string) => {
    const task = await Task.findOne({ uuid }).populate('project').populate('qualityFunction')
    if (!task) {
      throw new Error(`Task with UUID ${uuid} not found`)
    }
    return task
  },

  getFromS3: async (key: string) => {
    const command = new GetObjectCommand({
      Bucket: bucketName,
      Key: key
    })

    const response = await s3Client.send(command)

    if (!response.Body) {
      throw new Error(`Could not get object from S3: ${bucketName}/${key}`)
    }

    const data = await response.Body.transformToString()
    return data
  },

  createDocumentationPrompt: async (fingerprint: TaskFingerprint, sourceCode: string, taskName: string): Promise<string> => {
    const inputProperties = fingerprint.inputSchema?.properties || {}
    const outputType = fingerprint.outputType || {}
    const boundaries = fingerprint.boundaries || []
    const errors = fingerprint.errors || []

    let prompt = `Generate comprehensive documentation for the ForgeHive task "${taskName}".

## Task Source Code:
\`\`\`typescript
${sourceCode}
\`\`\`

## Task Fingerprint Analysis:

### Input Schema:
The task accepts the following input parameters:
`

    // Document input parameters
    Object.entries(inputProperties).forEach(([key, value]: [string, SchemaProperty]) => {
      prompt += `- **${key}** (${value.type}): ${value.optional ? 'Optional' : 'Required'}`
      if (value.default) {
        prompt += ` - Default: ${value.default}`
      }
      prompt += '\n'
    })

    // Document output type
    prompt += `
### Output Type:
The task returns: ${outputType.type || 'unknown'}`

    if (outputType.properties) {
      prompt += '\nOutput properties:\n'
      Object.entries(outputType.properties).forEach(([key, value]: [string, SchemaProperty]) => {
        prompt += `- **${key}** (${value.type}): ${value.description || 'No description'}\n`
      })
    }

    if (outputType.elementType) {
      prompt += `\nArray element type: ${outputType.elementType.type}\n`
      if (outputType.elementType.properties) {
        prompt += 'Element properties:\n'
        Object.entries(outputType.elementType.properties).forEach(([key, value]: [string, SchemaProperty]) => {
          prompt += `- **${key}** (${value.type})\n`
        })
      }
    }

    // Document boundaries
    if (boundaries.length > 0) {
      prompt += `
### Boundaries (External Dependencies):
This task uses the following external dependencies:
`
      boundaries.forEach((boundary: Boundary) => {
        prompt += `
#### ${boundary.name}
- **Purpose**: External dependency for ${boundary.name}
- **Input**: `
        if (Array.isArray(boundary.input)) {
          boundary.input.forEach((input: BoundaryInput, index: number) => {
            if (input.type === 'object' && input.properties) {
              prompt += `Object with properties: ${Object.keys(input.properties).join(', ')}`
            } else {
              prompt += `${input.name || `param${index + 1}`} (${input.type})`
            }
          })
        }
        prompt += `
- **Output**: ${boundary.output?.type || 'unknown'}`

        if (boundary.output?.properties) {
          prompt += ' with properties: ' + Object.keys(boundary.output.properties).join(', ')
        }

        if (boundary.errors && boundary.errors.length > 0) {
          prompt += `
- **Potential Errors**:
`
          boundary.errors.forEach((error: BoundaryError) => {
            prompt += `  - ${error.message}\n`
          })
        }
      })
    }

    // Document errors
    if (errors.length > 0) {
      prompt += `
### Potential Errors:
The following errors may occur during task execution:
`
      errors.forEach((error: TaskError) => {
        prompt += `- **${error.type}**: ${error.message}`
        if (error.location) {
          prompt += ` (at line ${error.location.line})`
        }
        prompt += '\n'
      })
    }

    prompt += `

IMPORTANT: You must respond with ONLY a valid JSON object in the following structure. Do not include any markdown formatting or additional text before or after the JSON:

{
  "overview": "A clear description of what this task does and its main purpose. Do NOT mention 'ForgeHive task' or the task name again since these are already displayed in the UI. If the task uses external services through boundaries, mention which external services are called (e.g., 'This task retrieves stock data from Yahoo Finance API' or 'This task stores data in MongoDB database').",
  "inputOutput": {
    "inputParameters": "Detail each schema parameter with types, validation rules, and examples for parameters: ${Object.keys(inputProperties).join(', ')}",
    "returnValue": "Explain what the task returns, including the structure and meaning of the response."
  },
  "taskImplementation": "Explain the main function logic inside createTask - what business logic it performs, how it orchestrates the boundaries, and the overall flow from input to output. When describing boundary calls, include the external service being used in the format: 'Step Name: Description *from external-service-name*' (e.g., 'Price Fetching: Calls the fetchStockPrice boundary function with the ticker to retrieve current market data *from Yahoo Finance API*').",
  "errorHandling": "Document the potential errors that can occur during execution, including schema validation errors, boundary-specific errors, and main function logic errors.",
  ${boundaries.length > 0 ? `"boundaries": {
    ${boundaries.map((boundary: Boundary) => `"${boundary.name}": "Explain what this boundary does, why it's needed, its input/output contract, and mention if it calls external services (APIs, databases, cloud services, etc.)"`).join(',\n    ')}
  }` : '"boundaries": null'}
}

Generate comprehensive documentation content for each field. Use markdown formatting within the strings where appropriate for better readability.`

    return prompt
  },

  generateDocumentation: async (prompt: string): Promise<GenerationResult> => {
    if (!claudeApiKey) {
      throw new Error('CLAUDE_API_KEY environment variable is not set')
    }

    // Initialize Anthropic client
    const anthropic = new Anthropic({
      apiKey: claudeApiKey,
    })

    const modelName = 'claude-sonnet-4-20250514'
    const message = await anthropic.messages.create({
      model: modelName,
      max_tokens: 4000,
      messages: [
        {
          role: 'user',
          content: prompt
        }
      ]
    })

    // Log token usage metrics
    const inputTokens = message.usage.input_tokens
    const outputTokens = message.usage.output_tokens
    console.log(`Token usage - Input: ${inputTokens}, Output: ${outputTokens}`)

    // Extract the text content from the response
    const textContent = message.content.find(content => content.type === 'text')
    if (!textContent || textContent.type !== 'text') {
      throw new Error('No text content received from Claude API')
    }

    // Parse the JSON response
    try {
      const documentation = JSON.parse(textContent.text)
      return {
        documentation,
        metrics: {
          input: inputTokens,
          output: outputTokens,
          model: modelName.replace(/-/g, '_')
        }
      }
    } catch (error) {
      throw new Error(`Failed to parse documentation JSON: ${error}. Raw response: ${textContent.text}`)
    }
  },

  updateTaskDocumentation: async (uuid: string, documentation: DocumentationStructure) => {
    const task = await Task.findOneAndUpdate(
      { uuid },
      { documentation, updatedAt: new Date() },
      { new: true }
    )

    if (!task) {
      throw new Error(`Failed to update documentation for task ${uuid}`)
    }

    return task
  }
}



export const createDocumentation = createTask({
  name,
  description,
  schema,
  boundaries,
    fn: async function ({ uuid }, { getTaskFromDb, getFromS3, createDocumentationPrompt, generateDocumentation, updateTaskDocumentation, setMetrics }) {
    console.log(`Generating documentation for task: ${uuid}`)

    // Get the task from the database
    const task = await getTaskFromDb(uuid)

    if (!task.fingerprint) {
      throw new Error(`Task ${uuid} does not have fingerprint data. Please publish the task with fingerprints first.`)
    }

    console.log(`Found task: ${task.taskName}`)
    console.log(`Fingerprint available: ${!!task.fingerprint}`)

    // Retrieve the source code from S3
    let sourceCode = ''
    if (task.sourceCodePath) {
      sourceCode = await getFromS3(task.sourceCodePath)
      console.log(`Source code retrieved from S3: ${task.sourceCodePath}`)
    } else {
      throw new Error(`Task ${uuid} does not have source code path`)
    }

    // Create comprehensive prompt using boundary
    console.log('Creating documentation prompt...')
    const prompt = await createDocumentationPrompt(task.fingerprint, sourceCode, task.taskName)

    // Generate documentation using Claude API
    console.log('Calling Claude API to generate documentation...')
    const result = await generateDocumentation(prompt)
    const { documentation, metrics } = result
    console.log(`Documentation generated successfully (${JSON.stringify(documentation).length} characters)`)

    // Set metrics for token usage
    await setMetrics({ type: 'usage', name: `input_tokens_${metrics.model}`, value: metrics.input })
    await setMetrics({ type: 'usage', name: `output_tokens_${metrics.model}`, value: metrics.output })

    // Update the task with generated documentation
    await updateTaskDocumentation(uuid, documentation)
    console.log(`Documentation saved to task ${uuid}`)

    return {
      status: 'Ok',
      taskUuid: uuid,
      taskName: task.taskName,
      documentationStructure: {
        overview: !!documentation.overview,
        inputOutput: !!documentation.inputOutput,
        taskImplementation: !!documentation.taskImplementation,
        errorHandling: !!documentation.errorHandling,
        boundaries: !!documentation.boundaries
      },
      message: 'Structured documentation generated and saved successfully',
      preview: documentation.overview?.substring(0, 200) + '...' || 'No overview available'
    }
  }
})