# deepaidetector REST API — OpenAPI 3.1 specification
#
# Update this file in lockstep with the Hono handlers under
# apps/api/src/routes/ and the human-readable reference at /api-docs.
# Machines should consume THIS file (the /api-docs page is a polished
# view of the same endpoints with code samples).
#
# Convention: any breaking change must bump the major version and add
# a deprecated marker on the prior endpoint for a minimum of 6 months.

openapi: 3.1.0

info:
  title: Deep AI Detector API
  version: 1.4.0
  summary: |
    REST API for AI text detection, deep paragraph scan, plagiarism source
    detection, and account / key management.
  description: |
    # Overview

    The Deep AI Detector API exposes the same advanced AI detection model
    that powers the web app — trained on 2 billion+ text samples — plus
    per-paragraph "deep scan", a multi-source plagiarism crawler, and
    account endpoints for usage and API key lifecycle.

    All endpoints accept JSON request bodies and return JSON responses.
    Authentication is a bearer token (`Authorization: Bearer dad_live_*`)
    on every request except `GET /v1/health` and `GET /v1/health/deep`.

    Rate-limit metadata is returned on every successful response in
    `meta.rate_limit` (and in the `X-RateLimit-*` response headers).
    Detection responses include a `pricing_quote` so callers can preview
    the cost of either billing mode before committing to one.

    # Versioning

    The current major version is `v1`. We freeze the public shape of `v1`
    endpoints and provide at least 6 months of overlap when introducing
    a `v2`. Additive changes (new fields on responses, new optional
    request parameters) are NOT breaking and can land in `v1` at any
    time — make sure your client tolerates unknown fields.

    # Pricing modes

    Every metered call returns `pricing_quote` showing both `per_word`
    ($0.0003/word, $0.05 minimum) and `per_detection` ($0.50 flat).
    Select your billing mode per API key in the dashboard; the API
    bills based on the selected mode but always shows the alternative
    so you know which would be cheaper.
  contact:
    name: Deep AI Detector support
    email: support@deepaidetector.com
    url: https://deepaidetector.com/contact
  license:
    name: Proprietary
    url: https://deepaidetector.com/legal/terms
  termsOfService: https://deepaidetector.com/legal/terms
  x-logo:
    url: https://deepaidetector.com/og-default.svg
    altText: deepaidetector

servers:
  - url: https://api.deepaidetector.com
    description: Production
  - url: https://api-staging.deepaidetector.com
    description: Staging (sandbox keys — prefix `dad_test_`)

security:
  - bearerAuth: []

tags:
  - name: detection
    description: |
      AI text detection. The main `/v1/detect` runs the full ensemble
      synchronously (~80ms warm). `/v1/detect/deep` runs a per-paragraph
      LoRA-adapter pipeline with selectable strategies (1–10s latency,
      Pro+ only).
    externalDocs:
      description: Algorithm reference
      url: https://deepaidetector.com/api-docs#endpoints
  - name: plagiarism
    description: |
      Multi-source plagiarism / source attribution. Searches the open web
      (SearXNG), 700M+ academic papers (OpenAlex, CrossRef, Semantic
      Scholar, arXiv, CORE), and Wikipedia. Pro+ only.
  - name: usage
    description: Current billing-cycle usage, quota, and per-day breakdown.
  - name: account
    description: |
      Account info, API key lifecycle. Treat created keys like passwords —
      the plaintext is returned exactly once at creation time.
  - name: billing
    description: Subscription and balance management (most flows live in the dashboard).
  - name: system
    description: Liveness / readiness checks. No authentication required.

paths:
  # ─── Detection ────────────────────────────────────────────────────────────────
  /v1/detect:
    post:
      tags: [detection]
      summary: Single-shot AI detection
      description: |
        Runs the full six-layer ensemble (RoBERTa, Binoculars, GLTR,
        L4 rules, L5 domain, L6 rhythm) and returns an AI-probability
        score, calibration band, layer-by-layer breakdown, and per-tier
        per-passage breakdown (paid tiers only).

        Synchronous: typical warm latency is 80ms. Cold start adds 5–10s.
        Cache hits are free (no rate-limit consumption, no inference cost)
        when an identical text was scanned in the previous 24h on the
        same tier.
      operationId: detect
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/DetectionRequest'
            examples:
              shortEssay:
                summary: Short essay (English, balanced strictness)
                value:
                  text: "Climate change is reshaping the way coastal cities approach long-term planning. Rising sea levels and intensifying storms have moved infrastructure resilience from a niche topic to the top of every mayor's agenda."
                  strictness: balanced
              strictMode:
                summary: Marketing copy with strict thresholds
                value:
                  text: 'Unlock the full potential of your team with our cutting-edge platform. Streamline workflows, boost productivity, and drive measurable results across every department.'
                  strictness: strict
                  domain: marketing
      responses:
        '200':
          description: Detection result
          headers:
            X-RateLimit-Limit:
              $ref: '#/components/headers/RateLimitLimit'
            X-RateLimit-Remaining:
              $ref: '#/components/headers/RateLimitRemaining'
            X-RateLimit-Reset:
              $ref: '#/components/headers/RateLimitReset'
            X-Request-Id:
              $ref: '#/components/headers/RequestId'
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DetectionResponse'
              examples:
                aiVerdict:
                  summary: Definite AI verdict, paid tier
                  value:
                    document_id: 6a5b7c2d-1f43-4e2c-8d9a-3b5c7e9f0a1b
                    score: 0.873
                    band: definitely_ai
                    ai_pct: 87.3
                    layers: { L1: 0.92, L2: 0.81, L3: 0.85, L4: 0.79, L5: 0.88, L6: 0.83 }
                    fired_rule_count: 14
                    detected_domain_name: marketing
                    pricing_quote:
                      { per_word: 0.30, per_detection: 0.50, cheaper: per_word, savings_usd: 0.20 }
                    meta:
                      {
                        cache_hit: false,
                        latency_ms: 87,
                        rate_limit:
                          { used: 1, limit: 5000, remaining: 4999, resets_at: 1717948800000 },
                      }
                humanVerdict:
                  summary: Likely-human verdict
                  value:
                    document_id: 8f3c2d1e-9b4a-4c6f-a7d8-2e1b9c5a4f7d
                    score: 0.18
                    band: likely_human
                    ai_pct: 18.0
                    layers: { L1: 0.21, L2: 0.14, L3: 0.19, L4: 0.16, L5: 0.22, L6: 0.13 }
                    fired_rule_count: 2
                    detected_domain_name: blog
                    pricing_quote:
                      { per_word: 0.18, per_detection: 0.50, cheaper: per_word, savings_usd: 0.32 }
                    meta: { cache_hit: false, latency_ms: 73 }
        '400':
          $ref: '#/components/responses/InvalidInput'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/UpgradeRequired'
        '413':
          $ref: '#/components/responses/PayloadTooLarge'
        '422':
          $ref: '#/components/responses/InsufficientText'
        '429':
          $ref: '#/components/responses/RateLimited'
        '500':
          $ref: '#/components/responses/InternalError'

  /v1/detect/deep:
    post:
      tags: [detection]
      summary: Per-paragraph deep scan
      description: |
        LoRA-adapter detection with paragraph-level resolution. Selectable
        strategies trade latency for confidence:

        - `paragraph_vote` — split on blank lines, score each paragraph
          independently, weighted mean by word count. Catches localised
          AI markers that single-pass averaging would smooth out.
        - `sliding_window` — overlapping 512-token chunks. Essential for
          documents >2k tokens where single-shot truncates.
        - `mc_dropout_band` — K dropout-enabled forward passes (default
          K=5). Returns calibrated uncertainty (mean ± std).
        - `combo` — paragraph_vote × mc_dropout_band. Most accurate,
          most expensive. Use when "definitely_ai" or "definitely_human"
          must be defensible.

        Paid tiers only (Pro+). Free tier receives `402 upgrade_required`.
      operationId: detectDeep
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/DeepDetectionRequest'
            examples:
              combo:
                summary: Long article, combo strategy
                value:
                  text: 'Coastal cities face a difficult transition as sea levels rise and storms grow more intense. Planners and engineers are revisiting flood defenses, drainage networks, and transit corridors built for a quieter climate. Insurance markets are repricing risk, and homeowners are renegotiating long-held assumptions about what land is buildable. The next decade of infrastructure investment will reshape how dense urban centers cope with water that no longer stays in its channels, and which neighborhoods remain viable for the families that have lived there for generations.'
                  strategy: combo
                  mc_k: 7
      responses:
        '200':
          description: Deep scan result
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/DeepDetectionResponse'
        '400':
          $ref: '#/components/responses/InvalidInput'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/UpgradeRequired'
        '429':
          $ref: '#/components/responses/RateLimited'
        '502':
          description: ML upstream unavailable
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  # ─── Plagiarism ──────────────────────────────────────────────────────────────
  /v1/plagiarism:
    post:
      tags: [plagiarism]
      summary: Multi-source plagiarism scan
      description: |
        Parallel scan across 7 source layers: SearXNG meta-search (30+
        engines), OpenAlex, CrossRef, Semantic Scholar, arXiv, CORE, and
        Wikipedia. Returns per-passage matched sources with similarity
        scores and a citation-ready URL list.

        Synchronous (5–15s typical). For very long documents pass
        `async=true` and poll `/v1/plagiarism/scans/{id}` for the report.
        Paid tiers only.
      operationId: plagiarism
      requestBody:
        required: true
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/PlagiarismRequest'
      responses:
        '200':
          description: Plagiarism report
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/PlagiarismResponse'
        '400':
          $ref: '#/components/responses/InvalidInput'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/UpgradeRequired'
        '422':
          $ref: '#/components/responses/InsufficientText'
        '429':
          $ref: '#/components/responses/RateLimited'

  /v1/plagiarism/scans:
    get:
      tags: [plagiarism]
      summary: List recent plagiarism scans
      description: Returns up to 100 of the authenticated user's recent plagiarism scans (most recent first).
      operationId: listPlagiarismScans
      responses:
        '200':
          description: List of scan summaries
          content:
            application/json:
              schema:
                type: object
                properties:
                  scans:
                    type: array
                    items:
                      $ref: '#/components/schemas/PlagiarismScanSummary'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/UpgradeRequired'

  /v1/plagiarism/scans/{id}:
    get:
      tags: [plagiarism]
      summary: Retrieve a previous plagiarism scan
      operationId: getPlagiarismScan
      parameters:
        - in: path
          name: id
          required: true
          schema: { type: string, format: uuid }
      responses:
        '200':
          description: Full plagiarism report
          content:
            application/json:
              schema:
                type: object
                properties:
                  id: { type: string, format: uuid }
                  report:
                    $ref: '#/components/schemas/PlagiarismResponse'
        '404':
          $ref: '#/components/responses/NotFound'

  # ─── Usage ───────────────────────────────────────────────────────────────────
  /v1/usage:
    get:
      tags: [usage]
      summary: Current billing-cycle usage
      description: |
        Returns the authenticated user's detection count, average word
        length, verdict ratios (AI/human/paraphrased), per-day breakdown
        for the last 30 days, current plan, monthly quota, and PAYG
        balance (in cents) when applicable.
      operationId: getUsage
      responses:
        '200':
          description: Usage snapshot
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/UsageResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'

  # ─── Account ─────────────────────────────────────────────────────────────────
  /v1/account:
    get:
      tags: [account]
      summary: Account info
      description: |
        Returns the authenticated user's basic account: id, email, plan,
        billing mode (per_word or per_detection), retention preference,
        and Stripe customer id when applicable. Use for "what plan am I
        on" prompts in your integration.
      operationId: getAccount
      responses:
        '200':
          description: Account info
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/AccountResponse'
        '401':
          $ref: '#/components/responses/Unauthorized'

  /v1/account/api-keys:
    get:
      tags: [account]
      summary: List API keys
      description: |
        Returns the authenticated user's API keys. The full key value is
        NEVER returned after creation — only `last4`, `name`, `created_at`,
        `last_used_at`, and `revoked_at` fields are exposed.
      operationId: listApiKeys
      responses:
        '200':
          description: List of keys
          content:
            application/json:
              schema:
                type: object
                properties:
                  keys:
                    type: array
                    items:
                      $ref: '#/components/schemas/ApiKeySummary'
        '401':
          $ref: '#/components/responses/Unauthorized'
    post:
      tags: [account]
      summary: Create an API key
      description: |
        Returns the new plaintext key in `key`. This is the ONLY time the
        full value is returned — store it securely (treat like a password).
        If you lose it, revoke and create a new one.
      operationId: createApiKey
      requestBody:
        required: true
        content:
          application/json:
            schema:
              type: object
              required: [name]
              properties:
                name:
                  type: string
                  minLength: 1
                  maxLength: 64
                  description: Human-readable label for the key (e.g. "Production server").
              additionalProperties: false
            examples:
              prodServer:
                value: { name: 'Production server' }
      responses:
        '201':
          description: Key created
          content:
            application/json:
              schema:
                type: object
                properties:
                  key:
                    type: string
                    description: The plaintext key — returned exactly once. Store securely.
                    example: dad_live_aB3xK9LqRtY7vM2pQ8nFsW4hJ6cX1zE0
                  summary:
                    $ref: '#/components/schemas/ApiKeySummary'
        '400':
          $ref: '#/components/responses/InvalidInput'
        '401':
          $ref: '#/components/responses/Unauthorized'
        '402':
          $ref: '#/components/responses/UpgradeRequired'

  /v1/account/api-keys/{id}:
    delete:
      tags: [account]
      summary: Revoke an API key
      description: |
        Immediately revokes the key. In-flight requests using this key
        will fail with `401 unauthorized` on the next call. Revocation is
        permanent — to rotate, create a new key first and migrate clients
        before revoking the old one.
      operationId: revokeApiKey
      parameters:
        - in: path
          name: id
          required: true
          schema: { type: string, format: uuid }
      responses:
        '204':
          description: Key revoked
        '401':
          $ref: '#/components/responses/Unauthorized'
        '404':
          $ref: '#/components/responses/NotFound'

  # ─── System ──────────────────────────────────────────────────────────────────
  /v1/health:
    get:
      tags: [system]
      summary: Liveness probe
      description: Returns 200 when the API process is up. Use for uptime monitoring.
      operationId: health
      security: []
      responses:
        '200':
          description: Healthy
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HealthResponse'
        '503':
          description: Service unavailable.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/ErrorResponse'

  /v1/health/deep:
    get:
      tags: [system]
      summary: Readiness probe (deep)
      description: |
        Checks downstream dependencies (D1, KV, Modal ML, Workers AI,
        Stripe, Resend). Returns per-component status. Use for status
        pages and pre-deploy smoke tests.
      operationId: healthDeep
      security: []
      responses:
        '200':
          description: All components reporting
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HealthDeepResponse'
        '503':
          description: At least one critical component is down.
          content:
            application/json:
              schema:
                $ref: '#/components/schemas/HealthDeepResponse'

components:
  # ─── Security ────────────────────────────────────────────────────────────────
  securitySchemes:
    bearerAuth:
      type: http
      scheme: bearer
      bearerFormat: dad_live_*
      description: |
        Bearer token in the `Authorization` header. Live keys are prefixed
        `dad_live_`; sandbox/test keys are prefixed `dad_test_`. Issue,
        rotate, and revoke from your dashboard.

  # ─── Reusable headers ────────────────────────────────────────────────────────
  headers:
    RateLimitLimit:
      description: Maximum requests allowed in the current window.
      schema: { type: integer }
      example: 5000
    RateLimitRemaining:
      description: Requests remaining in the current window.
      schema: { type: integer }
      example: 4999
    RateLimitReset:
      description: Unix-ms timestamp when the window resets.
      schema: { type: integer, format: int64 }
      example: 1717948800000
    RequestId:
      description: Per-request UUID — include in support tickets.
      schema: { type: string, format: uuid }

  # ─── Reusable responses ──────────────────────────────────────────────────────
  responses:
    InvalidInput:
      description: Body or query failed validation.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error: invalid_input
            message: 'text: String must contain at least 50 character(s)'
            request_id: 0190b1f8-aae5-7e3a-9b1c-7f4ba2c0d3e1
    Unauthorized:
      description: Missing or invalid bearer token.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error: unauthorized
            message: API key missing or invalid.
            request_id: 0190b1f8-aae5-7e3a-9b1c-7f4ba2c0d3e1
    UpgradeRequired:
      description: Word count exceeds tier maximum, or feature requires a higher plan.
      content:
        application/json:
          schema:
            allOf:
              - $ref: '#/components/schemas/ErrorResponse'
              - type: object
                properties:
                  upgrade_url:
                    type: string
                    format: uri
                  upgrade_features:
                    type: array
                    items: { type: string }
          example:
            error: upgrade_required
            message: Free tier supports up to 1,000 words. Upgrade to Plus for 5,000.
            upgrade_url: https://deepaidetector.com/pricing
            upgrade_features: ['Up to 5,000 words/doc', 'API access', 'Webhooks']
            request_id: 0190b1f8-aae5-7e3a-9b1c-7f4ba2c0d3e1
    PayloadTooLarge:
      description: Request body exceeds 200KB.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error: payload_too_large
            message: 'Request body exceeds 200,000 byte cap.'
            request_id: 0190b1f8-aae5-7e3a-9b1c-7f4ba2c0d3e1
            details:
              max_bytes: 200000
    InsufficientText:
      description: Text is shorter than the minimum word count for a reliable verdict.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error: insufficient_text
            message: 'Detection needs at least 80 words. You provided 23.'
            request_id: 0190b1f8-aae5-7e3a-9b1c-7f4ba2c0d3e1
            details:
              word_count: 23
              min_words: 80
              recommended_min_words: 150
    RateLimited:
      description: Rate limit exceeded.
      headers:
        Retry-After:
          schema: { type: integer }
          description: Seconds until the limit resets.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error: rate_limit_exceeded
            message: 'Daily limit reached: 50 detections.'
            request_id: 0190b1f8-aae5-7e3a-9b1c-7f4ba2c0d3e1
            details:
              used: 50
              limit: 50
              resets_at: 1717948800000
              retry_after_seconds: 28800
    NotFound:
      description: Resource not found.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error: not_found
            request_id: 0190b1f8-aae5-7e3a-9b1c-7f4ba2c0d3e1
    InternalError:
      description: Unexpected server-side failure.
      content:
        application/json:
          schema:
            $ref: '#/components/schemas/ErrorResponse'
          example:
            error: internal_error
            request_id: 0190b1f8-aae5-7e3a-9b1c-7f4ba2c0d3e1

  # ─── Schemas ─────────────────────────────────────────────────────────────────
  schemas:
    # Request envelopes
    DetectionRequest:
      type: object
      required: [text]
      additionalProperties: false
      properties:
        text:
          type: string
          minLength: 50
          maxLength: 50000
          description: Text to analyse. Minimum 80 words for a verdict.
        strictness:
          type: string
          enum: [lenient, balanced, strict]
          default: balanced
          description: |
            Threshold preset. `lenient` favours fewer false positives;
            `strict` favours fewer false negatives. `balanced` is the
            default and what the dashboard uses.
        domain:
          type: string
          description: |
            Optional hint to skip domain auto-detection. One of:
            `academic`, `marketing`, `news`, `blog`, `legal`, `fiction`,
            `technical`, `social`, `email`, `code_comments`, `code`,
            `creative_writing`, ...
        return_per_passage:
          type: boolean
          default: true
          description: When false, omits the `per_passage` array (smaller payload).

    DeepDetectionRequest:
      type: object
      required: [text, strategy]
      additionalProperties: false
      properties:
        text:
          type: string
          minLength: 80
          maxLength: 50000
        strategy:
          type: string
          enum: [paragraph_vote, sliding_window, mc_dropout_band, combo]
        mc_k:
          type: integer
          minimum: 3
          maximum: 20
          default: 5
          description: Number of MC-dropout forwards (only used when strategy is `mc_dropout_band` or `combo`).

    PlagiarismRequest:
      type: object
      required: [text]
      additionalProperties: false
      properties:
        text:
          type: string
          minLength: 200
          maxLength: 80000
        similarity_floor:
          type: number
          minimum: 0.4
          maximum: 0.95
          default: 0.6
          description: Minimum similarity (0-1) for a match to be reported.
        use_embeddings:
          type: boolean
          default: true
          description: Use semantic embeddings in addition to lexical match.
        max_ngrams:
          type: integer
          minimum: 10
          maximum: 80
          default: 30

    # Response envelopes
    DetectionResponse:
      type: object
      properties:
        document_id:
          type: string
          format: uuid
          description: Stable identifier for this detection. Reference in support tickets.
        score:
          type: number
          minimum: 0
          maximum: 1
          description: Probability the text is AI-generated.
        band:
          type: string
          enum:
            - highly_confident_human
            - definitely_human
            - likely_human
            - paraphrased
            - likely_ai
            - definitely_ai
            - highly_confident_ai
        ai_pct:
          type: number
          description: Convenience field — `score * 100`.
        layers:
          type: object
          description: Per-layer scores. Names map to PRD §6.
          properties:
            L1: { type: number, description: RoBERTa base + large average }
            L2: { type: number, description: Binoculars perplexity }
            L3: { type: number, description: GLTR token-rank histogram }
            L4: { type: number, description: 'Rule corpus (1201 patterns)' }
            L5: { type: number, description: '26-domain calibration' }
            L6:
              {
                type: number,
                description: 'Document rhythm: burstiness, hapax ratio, paragraph uniformity',
              }
        layer_agreement:
          type: number
          description: 0-1, how tightly the six layers agree. Higher = more confident.
        confidence_interval:
          type: object
          properties:
            low: { type: number }
            high: { type: number }
        detected_language:
          type: string
          example: en
        detected_language_confidence:
          type: number
        detected_domain:
          type: string
          example: marketing
        detected_domain_name:
          type: string
          example: Marketing copy
        detected_domain_confidence:
          type: number
        per_passage:
          type: array
          description: Per-sentence highlights. Empty array when `per_passage_locked` is true (free tiers).
          items:
            type: object
            properties:
              text: { type: string }
              start: { type: integer }
              end: { type: integer }
              score: { type: number }
              band: { type: string }
              fired_rule_ids:
                type: array
                items: { type: string }
        per_passage_locked:
          type: boolean
          description: True for free / anonymous tiers. Signal to UI to render the "upgrade for highlights" CTA.
        per_passage_count:
          type: integer
        document_rhythm:
          type: object
          additionalProperties: true
          description: L6 rhythm metrics — burstiness, hapax ratio, paragraph CV, etc.
        rhythm_signals:
          type: array
          items: { type: string }
        paraphrase_signature:
          type: object
          properties:
            humanizer_detected: { type: boolean }
            score: { type: number }
            signals:
              type: array
              items: { type: string }
            detected:
              type: array
              items: { type: string }
        fired_rules:
          type: array
          description: Top 50 fired rules — empty if none.
          items:
            type: object
            properties:
              rule_id: { type: string }
              pattern: { type: string }
              category: { type: string }
              sign: { type: string, enum: [ai_marker, human_marker] }
              weight: { type: number }
              count: { type: integer }
              positions:
                type: array
                items: { type: integer }
              human_replacement: { type: string }
        fired_rule_count:
          type: integer
        pricing_quote:
          $ref: '#/components/schemas/PricingQuote'
        upgrade_available:
          oneOf:
            - type: 'null'
            - type: object
              properties:
                cta: { type: string }
                tagline: { type: string }
                features:
                  type: array
                  items: { type: string }
                url: { type: string, format: uri }
                price: { type: string }
                api_quote:
                  $ref: '#/components/schemas/PricingQuote'
        meta:
          type: object
          properties:
            version: { type: string }
            rule_db_version: { type: string }
            latency_ms: { type: integer }
            tier: { type: string }
            word_count: { type: integer }
            tier_max_words: { type: integer }
            pipeline: { type: string }
            cache_hit: { type: boolean }
            insufficient_text: { type: boolean }
            reliability:
              type: object
              properties:
                level: { type: string, enum: [low, high] }
                reason: { type: string }
                recommended_min_words: { type: integer }
            rate_limit:
              $ref: '#/components/schemas/RateLimitMeta'

    DeepDetectionResponse:
      type: object
      properties:
        strategy:
          type: string
          enum: [paragraph_vote, sliding_window, mc_dropout_band, combo]
        ai_prob:
          type: number
          minimum: 0
          maximum: 1
          description: Aggregator verdict (LoRA + L4 + L6 + L7 + L8 + L9 + L10 ensemble).
        ai_prob_lora_only:
          type: number
          description: LoRA-only score (diagnostic — not the headline verdict).
        ai_prob_std:
          type: number
          description: Standard deviation across MC-dropout forwards or per-paragraph variance.
        confidence_band:
          type: object
          properties:
            low: { type: number }
            high: { type: number }
        band:
          type: string
        per_paragraph:
          type: array
          items:
            type: object
            properties:
              text: { type: string }
              ai_prob: { type: number }
              ai_prob_std: { type: [number, 'null'] }
              word_count: { type: [integer, 'null'] }
        per_window:
          type: array
          items: { type: number }
        n_paragraphs:
          type: integer
        n_windows:
          type: integer
        word_count:
          type: integer
        tier:
          type: string
        latency_ms:
          type: integer
        cache_hit:
          type: boolean

    PlagiarismResponse:
      type: object
      properties:
        report:
          type: object
          properties:
            overallPercent: { type: number }
            sources:
              type: array
              items:
                $ref: '#/components/schemas/PlagiarismSource'
            byKind:
              type: object
              additionalProperties: { type: integer }
            diagnostics:
              type: object
              properties:
                wordCount: { type: integer }
                ngramsChecked: { type: integer }
                latencyMs: { type: integer }
            scannerVersion: { type: string }
        meta:
          type: object
          properties:
            cache_hit: { type: boolean }
            tier: { type: string }
            latency_ms: { type: integer }
            scanner_version: { type: string }

    PlagiarismSource:
      type: object
      properties:
        source:
          {
            type: string,
            description: 'searx | openalex | crossref | semanticscholar | arxiv | core | wikipedia',
          }
        title: { type: string }
        url: { type: string, format: uri }
        similarity: { type: number, minimum: 0, maximum: 1 }
        matchedPassage: { type: string }
        author: { type: [string, 'null'] }
        publishedYear: { type: [integer, 'null'] }

    PlagiarismScanSummary:
      type: object
      properties:
        id: { type: string, format: uuid }
        overallPercent: { type: number }
        sourcesCount: { type: integer }
        wordCount: { type: integer }
        createdAt: { type: string, format: date-time }

    UsageResponse:
      type: object
      properties:
        plan:
          type: string
          enum: [free, starter, pro, plus, business, team, enterprise, payg, api_only]
        quota:
          type: integer
          description: Monthly detection allowance. -1 for unlimited.
        detections_this_month:
          type: integer
        balance_cents:
          type: [integer, 'null']
          description: PAYG balance in cents. Null for subscription tiers.
        billing_mode:
          type: string
          enum: [per_word, per_detection]
          description: Active per-key billing mode.
        avg_word_count:
          type: number
        verdicts:
          type: object
          properties:
            ai: { type: integer }
            human: { type: integer }
            paraphrased: { type: integer }
        daily:
          type: array
          items:
            type: object
            properties:
              date: { type: string, format: date }
              count: { type: integer }
        cycle_start: { type: string, format: date }
        cycle_end: { type: string, format: date }

    AccountResponse:
      type: object
      properties:
        id: { type: string, format: uuid }
        email: { type: string, format: email }
        plan: { type: string }
        display_name: { type: [string, 'null'] }
        balance_cents: { type: [integer, 'null'] }
        billing_mode: { type: string, enum: [per_word, per_detection] }
        retain_plaintext:
          {
            type: boolean,
            description: 'When true, detection plaintext is retained for dashboard history. Default false.',
          }
        created_at: { type: string, format: date-time }
        stripe_customer_id: { type: [string, 'null'] }

    ApiKeySummary:
      type: object
      properties:
        id: { type: string, format: uuid }
        name: { type: string }
        last4:
          type: string
          minLength: 4
          maxLength: 4
          description: Last 4 characters of the plaintext key. Useful for matching keys to your records.
        created_at: { type: string, format: date-time }
        last_used_at: { type: [string, 'null'], format: date-time }
        revoked_at: { type: [string, 'null'], format: date-time }

    HealthResponse:
      type: object
      properties:
        ok: { type: boolean }
        service: { type: string }
        version: { type: string }
        env: { type: string, enum: [development, staging, production] }
        ts: { type: integer, format: int64 }

    HealthDeepResponse:
      type: object
      properties:
        ok: { type: boolean }
        version: { type: string }
        ts: { type: integer, format: int64 }
        components:
          type: object
          properties:
            d1: { $ref: '#/components/schemas/HealthComponent' }
            kv: { $ref: '#/components/schemas/HealthComponent' }
            modal: { $ref: '#/components/schemas/HealthComponent' }
            workers_ai: { $ref: '#/components/schemas/HealthComponent' }
            stripe: { $ref: '#/components/schemas/HealthComponent' }
            resend: { $ref: '#/components/schemas/HealthComponent' }

    HealthComponent:
      type: object
      properties:
        status: { type: string, enum: [operational, degraded, down, unknown] }
        latency_ms: { type: [integer, 'null'] }
        checked_at: { type: integer, format: int64 }
        message: { type: [string, 'null'] }

    PricingQuote:
      type: object
      properties:
        per_word: { type: number, description: USD billed in per_word mode. }
        per_detection: { type: number, description: USD billed in per_detection mode. }
        cheaper: { type: string, enum: [per_word, per_detection] }
        savings_usd: { type: number }

    RateLimitMeta:
      type: object
      properties:
        used: { type: integer }
        limit: { type: integer }
        remaining: { type: integer }
        resets_at: { type: integer, format: int64 }

    ErrorResponse:
      type: object
      required: [error]
      properties:
        error:
          type: string
          description: Stable machine-readable error code. Switch on this, never on `message`.
        message:
          type: string
          description: Human-readable explanation, safe to surface to end users.
        request_id:
          type: string
          format: uuid
          description: Include in support requests so we can find the request in logs.
        details:
          type: object
          additionalProperties: true
          description: Per-error context (e.g. `word_count`, `tier`, `min_words`).

    WebhookEnvelope:
      type: object
      required: [id, type, created, data]
      properties:
        id:
          type: string
          format: uuid
          description: Stable event id. Use to deduplicate retries.
        type:
          type: string
          enum:
            - detection.completed
            - detection.deep.completed
            - plagiarism.completed
            - subscription.activated
            - subscription.canceled
            - subscription.past_due
            - balance.low
          description: Event type. Switch on this.
        created:
          type: integer
          format: int64
          description: Unix-ms timestamp at which the event fired.
        api_version:
          type: string
          example: 'v1'
        data:
          type: object
          description: |
            Event payload. For `detection.completed` this is the same shape
            as `DetectionResponse`. For other event types see the events
            reference at /api-docs/webhooks.
          additionalProperties: true

  # ─── Webhooks ────────────────────────────────────────────────────────────────
webhooks:
  detection.completed:
    post:
      operationId: webhookDetectionCompleted
      tags: [account]
      summary: Detection completed
      description: |
        Fires after every `/v1/detect` and `/v1/detect/deep` call when the
        endpoint URL is configured in your dashboard (Plus tier and above).
        Body is JSON. Signature is `HMAC-SHA256(body, secret)` in the
        `X-DAD-Signature` header as a hex string.

        Retry policy: 7 attempts with exponential backoff (1m, 5m, 15m,
        1h, 6h, 24h, 72h). Responses with 2xx are considered successful.
        After the 7th failed attempt the event is moved to the dead-letter
        log and you receive an email alert.
      requestBody:
        content:
          application/json:
            schema:
              $ref: '#/components/schemas/WebhookEnvelope'
      responses:
        '200':
          description: Receipt acknowledged.
