Text + Logo + Reference Fidelity Pipeline

Run a two-pass SKU pipeline: reference-anchored generation first, then targeted edit retries for text and logo fidelity exceptions. Uses shipped CreativeAI image endpoints only.

Why this closes active buyer objections

Exact text intent in prompt contract

Capture required copy per SKU in structured input fields.

Logo placement guardrails

Reference anchor + logo notes reduce brand drift across variants.

Second-pass repair route

Automatically retry failed fidelity checks through /v1/images/edits.

Audit-friendly QA outputs

Persist JSONL status for pass/fail and replay only failed SKUs.

Prepare a SKU fidelity CSV

Keep exact required copy in a dedicated field. Do not bury required text inside unstructured notes.

csv

sku,reference_image_url,required_text,logo_note,scene_note
SKU-4101,https://cdn.example.com/ref/sku-4101-front.png,"ACME SPORT","Front label must stay centered and legible","White seamless, soft shadow"
SKU-4102,https://cdn.example.com/ref/sku-4102-front.png,"NORTHLINE 500ml","Keep bottle cap icon + wordmark size ratio","Kitchen counter lifestyle"
SKU-4103,https://cdn.example.com/ref/sku-4103-front.png,"ROAMWELL","Keep embossed logo placement on strap","Premium dark editorial lighting"

Run two-pass generation + edit repair

Pass 1 calls /v1/images/generations with reference anchoring. Pass 2 calls /v1/images/edits for SKUs that fail your QA text/logo checks.

python

import asyncio
import csv
import io
import json
import os
from dataclasses import dataclass

import httpx

API_KEY = os.environ["CREATIVEAI_API_KEY"]
BASE = "https://api.creativeai.run/v1"
INPUT_CSV = "fidelity-jobs.csv"
OUTPUT_JSONL = "fidelity-outputs.jsonl"

MAX_CONCURRENCY = 4
MAX_RETRIES = 2


@dataclass
class Row:
    sku: str
    reference_image_url: str
    required_text: str
    logo_note: str
    scene_note: str


def load_rows(path: str) -> list[Row]:
    rows: list[Row] = []
    with open(path, newline="", encoding="utf-8") as f:
        for raw in csv.DictReader(f):
            rows.append(
                Row(
                    sku=raw["sku"].strip(),
                    reference_image_url=raw["reference_image_url"].strip(),
                    required_text=raw["required_text"].strip(),
                    logo_note=raw["logo_note"].strip(),
                    scene_note=raw["scene_note"].strip(),
                )
            )
    return rows


def build_prompt(row: Row) -> str:
    return (
        f"Commercial product photo for SKU {row.sku}. "
        f"Use the reference product as identity anchor. "
        f"Required visible text must read exactly: '{row.required_text}'. "
        f"Logo constraint: {row.logo_note}. "
        f"Scene: {row.scene_note}. "
        "Do not invent extra labels, watermarks, or additional text."
    )


async def generate_variants(client: httpx.AsyncClient, row: Row) -> list[str]:
    payload = {
        "model": "gpt-image-1",
        "prompt": build_prompt(row),
        "image_url": row.reference_image_url,
        "size": "1024x1024",
        "quality": "high",
        "n": 2,
    }
    resp = await client.post(f"{BASE}/images/generations", json=payload)
    resp.raise_for_status()
    data = resp.json()
    return [item.get("url") for item in data.get("data", []) if item.get("url")]


def qa_text_match(image_url: str, expected_text: str) -> bool:
    # Plug in your OCR/QA layer here.
    # Example: call your OCR provider and compare normalized strings.
    # Keep this explicit so fidelity scoring stays auditable.
    return False


async def edit_from_url(client: httpx.AsyncClient, image_url: str, expected_text: str, logo_note: str) -> str | None:
    img = await client.get(image_url)
    img.raise_for_status()

    edit_prompt = (
        "Correct only text/logo fidelity errors while preserving product geometry, materials, "
        "camera, lighting, and background composition. "
        f"Final visible text must read exactly: '{expected_text}'. "
        f"Logo constraint: {logo_note}."
    )

    files = {
        "image": ("source.png", img.content, "image/png"),
    }
    data = {
        "model": "gpt-image-1",
        "prompt": edit_prompt,
        "size": "1024x1024",
        "quality": "high",
        "n": "1",
        "response_format": "url",
    }

    resp = await client.post(f"{BASE}/images/edits", data=data, files=files)
    resp.raise_for_status()
    body = resp.json()
    out = body.get("data", [])
    return out[0].get("url") if out else None


async def process_row(client: httpx.AsyncClient, sem: asyncio.Semaphore, row: Row) -> dict:
    async with sem:
        for attempt in range(MAX_RETRIES + 1):
            try:
                candidates = await generate_variants(client, row)
                chosen = candidates[0] if candidates else None
                qa_pass = chosen is not None and qa_text_match(chosen, row.required_text)

                repaired = None
                if chosen and not qa_pass:
                    repaired = await edit_from_url(client, chosen, row.required_text, row.logo_note)

                return {
                    "sku": row.sku,
                    "status": "completed",
                    "first_pass_candidates": candidates,
                    "selected_url": chosen,
                    "qa_text_pass": qa_pass,
                    "second_pass_url": repaired,
                    "final_url": repaired or chosen,
                }
            except Exception as exc:
                if attempt == MAX_RETRIES:
                    return {"sku": row.sku, "status": "failed", "error": str(exc)}
                await asyncio.sleep(1.5 * (attempt + 1))


async def main() -> None:
    rows = load_rows(INPUT_CSV)
    sem = asyncio.Semaphore(MAX_CONCURRENCY)
    headers = {"Authorization": f"Bearer {API_KEY}"}

    async with httpx.AsyncClient(headers=headers, timeout=120.0) as client:
        results = await asyncio.gather(*[process_row(client, sem, row) for row in rows])

    with open(OUTPUT_JSONL, "w", encoding="utf-8") as f:
        for item in results:
            f.write(json.dumps(item) + "\n")

    completed = sum(1 for r in results if r["status"] == "completed")
    repaired = sum(1 for r in results if r.get("second_pass_url"))
    failed = len(results) - completed
    print(f"done: completed={completed}, repaired={repaired}, failed={failed}, output={OUTPUT_JSONL}")


if __name__ == "__main__":
    asyncio.run(main())

Persist replay-safe output status

Keep both first-pass and second-pass URLs so operations can approve, reject, or replay with deterministic history.

jsonl

{"sku":"SKU-4101","status":"completed","first_pass_candidates":["https://.../a.png","https://.../b.png"],"selected_url":"https://.../a.png","qa_text_pass":false,"second_pass_url":"https://.../a-fixed.png","final_url":"https://.../a-fixed.png"}
{"sku":"SKU-4102","status":"completed","first_pass_candidates":["https://.../c.png"],"selected_url":"https://.../c.png","qa_text_pass":true,"second_pass_url":null,"final_url":"https://.../c.png"}
{"sku":"SKU-4103","status":"failed","error":"HTTP 500: {"error":{"code":"server_error"}}"}

Notes on current shipped behavior

Reference generation: /v1/images/generations supports a single image_url anchor and n=1..4 outputs per request.
Edit retries: /v1/images/edits accepts multipart image upload and prompt-driven repair; optional mask upload exists but is currently ignored server-side.
QA layer: OCR/compliance checks are external to this API, so wire your own validator for exact text scoring.

Launch checklist

SKU CSV includes required text and logo constraints.

Pass-1 generation anchored to real product reference image.

QA function wired to your OCR/compliance service.

Failed SKUs replay through /v1/images/edits only.

Output JSONL captured for approvals, audits, and handoff.

Retry policy limited to exceptions to protect cost and throughput.

Need adjacent playbooks?

Catalog Batch Ingestion Multi-Room + Multi-Angle