commit 7883c9fd949957d0e1a692806b0b21830ebdb7cd Author: Kimi Claw Date: Thu Feb 19 01:32:42 2026 +0800 Initial commit: ai-self-improvement-digest skill with kimi_search support diff --git a/SKILL.md b/SKILL.md new file mode 100644 index 0000000..f51ad82 --- /dev/null +++ b/SKILL.md @@ -0,0 +1,205 @@ +--- +name: ai-self-improvement-digest +description: Create a daily digest focused on AI self-improvement material - content that helps an AI agent get better at its job. Use when setting up daily learning routines, building self-improving agents, or curating educational content for agent development. Covers harness engineering, skill development, self-evaluation, multi-agent coordination, memory management, and workflow automation. +--- + +# AI Self-Improvement Digest + +This skill creates a daily digest focused on AI self-improvement material, not general AI news. The goal is to surface content that helps an AI agent get better at its job. + +## What This Digest Covers + +1. **Harness & System Prompt Engineering** - How to structure agent instructions +2. **Skill & Tool Development** - New tools, MCP servers, integration patterns +3. **Self-Evaluation & Improvement** - How agents assess and improve themselves +4. **Multi-Agent Coordination** - Spawning, supervising, merging work +5. **Memory & Context Management** - RAG, long-term memory, compaction +6. **Workflow Automation** - Task decomposition, failure handling +7. **Foundational Research** - Academic work on agent capabilities + +## Prerequisites + +1. **Kimi Search** - The `kimi-search` plugin is used for web searches (enabled by default with Kimi Claw). + +2. **Tracking File** - Create `memory/ai-digest-posted.json`: + ```json + { + "posted": [], + "experiments": [], + "skillsEvaluated": [], + "setupChanges": [] + } + ``` + +## Daily Digest Workflow + +### Step 1: Deduplication (MANDATORY) + +Read `memory/ai-digest-posted.json`. Skip anything already posted (by URL or substantially similar topic). + +### Step 2: Scan Sources + +Use `kimi_search` and `kimi_fetch` to check these sources for content from last 24-72h: + +**Tier 1 (daily):** +- Anthropic Engineering: anthropic.com/engineering +- Simon Willison: simonwillison.net +- Geoff Huntley: ghuntley.com +- Hacker News: news.ycombinator.com (AI/agent threads) +- Lilian Weng: lilianweng.github.io + +**Tier 2 (2-3x/week):** +- Latent Space: latent.space +- Cursor Blog: cursor.com/blog +- Eugene Yan: eugeneyan.com +- Chip Huyen: huyenchip.com +- Mitchell Hashimoto: mitchellh.com + +**Tier 3 (weekly):** +- arXiv cs.CL/cs.AI +- GitHub Trending (AI agent repos, MCP servers) + +### Step 3: Filter for Self-Improvement Relevance + +Only include items that help improve capabilities in the 7 categories listed above. + +**EXCLUDE:** General AI news, model announcements, business news, ethics debates, items already in `ai-digest-posted.json`. + +### Step 4: Format (3-5 items) + +For each item, include: + +``` +**[Title]** — [Source] +What: [1-sentence summary] +Why it matters for self-improvement: [How this helps you get better] +Takeaway: [Specific pattern, technique, or experiment to try] +Relevance: [⭐ to ⭐⭐⭐⭐⭐] +``` + +### Step 5: Experiment Suggestion + +Include one small experiment to try based on the digest: + +``` +💡 Today's experiment: [One small thing to try that could improve capabilities] +``` + +### Step 6: Setup Review (MANDATORY) + +Review findings against existing setup (AGENTS.md, TOOLS.md, skills/, cron jobs). Make concrete, affirmative suggestions: + +``` +🔧 Setup Review +Based on today's findings: +- Let's add [specific thing] because [reason tied to content found] +- Let's update [existing thing] to [improvement] because [reason] +``` + +If nothing is actionable: "No changes needed today — our current setup handles these patterns well." + +### Step 7: Update Tracking + +Append new items to `memory/ai-digest-posted.json` with date, title, url, topic. + +## Output Format + +``` +🧠 AI Self-Improvement Digest — [Date] + +[Items formatted as above] + +💡 Today's experiment: [...] + +🔧 Setup Review +[Suggestions or "No changes needed today"] + +📊 Feedback: 👍 = useful | 👎 = skip these | 🔥 = more like this | 💬 = thoughts +``` + +## Source Priority Reference + +| Source | Priority | Focus | +|--------|----------|-------| +| Anthropic Engineering | ⭐⭐⭐ | Harness design, evals, multi-agent | +| Simon Willison | ⭐⭐⭐ | Practical patterns, tools | +| Geoff Huntley | ⭐⭐⭐ | Agent philosophy, MCP | +| Hacker News | ⭐⭐⭐ | High-signal AI/agent discussions | +| Lilian Weng | ⭐⭐⭐ | Deep technical AI, agent architectures | +| Latent Space | ⭐⭐ | Industry depth | +| Cursor Blog | ⭐⭐ | Coding agent patterns | +| Eugene Yan | ⭐⭐ | ML systems, production patterns | +| Chip Huyen | ⭐⭐ | ML systems design | +| arXiv cs.CL/cs.AI | ⭐⭐ | Research foundations | +| GitHub Trending | ⭐⭐ | New tools, repos | + +## Self-Improvement Loop + +The digest enables continuous improvement: + +**DAILY:** +- Read digest +- Pick 1 experiment to try +- Log outcome in `memory/ai-digest-posted.json` +- Review Setup Review suggestions with human + +**WEEKLY:** +- Review experiments +- Update harness/skills based on learnings +- Adjust source priorities based on value + +## Experiment Tracking + +Extend `memory/ai-digest-posted.json`: + +```json +{ + "posted": [...], + "experiments": [ + { + "date": "2026-02-16", + "fromArticle": "effective-harnesses", + "experiment": "Add checkpoint before sub-agent spawn", + "outcome": "Reduced context loss by 40%", + "learned": "Always checkpoint before spawning" + } + ], + "skillsEvaluated": [ + { + "date": "2026-02-16", + "skill": "mcp-postgres", + "verdict": "useful", + "notes": "Integrated for database queries" + } + ], + "setupChanges": [ + { + "date": "2026-02-16", + "change": "Added memory/experiments.md", + "reason": "Track harness experiments per Anthropic article", + "status": "implemented" + } + ] +} +``` + +## Cron Job Setup + +Schedule daily at 8:30 AM: + +```bash +openclaw cron add \ + --name ai-self-improvement-digest \ + --schedule "30 8 * * *" \ + --tz "America/New_York" \ + --message "Generate today's AI Self-Improvement Digest following the workflow in the ai-self-improvement-digest skill. Read memory/ai-digest-posted.json first for deduplication." +``` + +Or use the `cron` tool directly with `action: add` and the job configuration. + +## Key Principles + +1. **Ground suggestions** in what you already have +2. **Use affirmative voice** ("let's do X") not passive ("could consider X") +3. **Connect each suggestion** to a specific article/finding from the digest +4. **It's okay to have no suggestions** if nothing is actionable diff --git a/references/examples.md b/references/examples.md new file mode 100644 index 0000000..763f866 --- /dev/null +++ b/references/examples.md @@ -0,0 +1,134 @@ +# AI Self-Improvement Digest - Reference Guide + +## Example Digest Entries + +### Example 1: Harness Engineering + +**Building Effective Agent Harnesses** — Anthropic Engineering +What: Anthropic's guide on structuring system prompts for reliable agent behavior, including the "think-act-observe" loop pattern. +Why it matters for self-improvement: Shows how to design harnesses that make agents more predictable and debuggable when they fail. +Takeaway: Add explicit "pause and verify" checkpoints before high-stakes actions like spawning sub-agents or making external calls. +Relevance: ⭐⭐⭐⭐⭐ + +### Example 2: Tool Development + +**MCP: The USB-C for AI Applications** — Geoff Huntley +What: Deep dive into Model Context Protocol as a standard for tool integration, with patterns for building composable skills. +Why it matters for self-improvement: MCP skills are more portable and composable than ad-hoc integrations. +Takeaway: When building new skills, follow MCP patterns for resource exposure and tool definition. +Relevance: ⭐⭐⭐⭐ + +### Example 3: Self-Evaluation + +**Evaluating Language Model Agents** — Lilian Weng +What: Comprehensive framework for agent evaluation including trajectory analysis, tool use accuracy, and failure mode categorization. +Why it matters for self-improvement: Without evals, you can't know if changes actually improve performance. +Takeaway: Set up a simple regression test: save 5-10 representative tasks and re-run after skill updates. +Relevance: ⭐⭐⭐⭐⭐ + +### Example 4: Multi-Agent Coordination + +**Patterns for Multi-Agent Systems** — Simon Willison +What: Practical patterns for agent spawning, result aggregation, and error handling in distributed agent workflows. +Why it matters for self-improvement: Shows when to spawn vs when to handle inline, and how to merge parallel results. +Takeaway: Spawn sub-agents for tasks that need isolation; keep inline for context-dependent reasoning. +Relevance: ⭐⭐⭐⭐ + +### Example 5: Memory Management + +**Context Compaction Strategies** — arXiv +What: Techniques for managing long conversations including summarization, key-value extraction, and selective retention. +Why it matters for self-improvement: Long contexts degrade performance; smart compaction preserves what matters. +Takeaway: Before compaction, extract and save key facts to MEMORY.md; summarize the rest. +Relevance: ⭐⭐⭐⭐ + +## Search Queries by Category + +Use these queries with `kimi_search` to find relevant content: + +### Harness & System Prompts +- "system prompt engineering agent reliability" +- "agent harness design patterns" +- "prompt chaining best practices" +- "few-shot prompting agents" + +### Skill & Tool Development +- "MCP server patterns" +- "AI agent tool integration" +- "skill development framework" +- "agent capabilities extension" + +### Self-Evaluation +- "agent evaluation metrics" +- "LLM agent testing" +- "agent failure analysis" +- "trajectory evaluation" + +### Multi-Agent Coordination +- "multi-agent orchestration" +- "agent spawning patterns" +- "distributed agent systems" +- "agent result aggregation" + +### Memory & Context +- "context window management" +- "long conversation memory" +- "RAG for agents" +- "conversation summarization" + +### Workflow Automation +- "agent task decomposition" +- "agent error handling" +- "retry patterns agents" +- "agent workflow design" + +## Quality Indicators + +**High-signal content (include):** +- Specific techniques with code examples +- Lessons from production systems +- Failure modes and how to avoid them +- Comparative analysis of approaches +- Author has built real agent systems + +**Low-signal content (exclude):** +- Pure announcements without technique +- Marketing content +- General AI hype +- Ethics debates without implementation angle +- Surface-level listicles + +## Setup Review Examples + +### Good Example (specific, grounded, affirmative) + +🔧 Setup Review +Based on today's findings: +- Let's add a `memory/experiments.md` file to track harness experiments, since the Anthropic article showed experiment logging improves iteration speed +- Let's update the channel-monitor cron to include a self-check step before responding, based on the "pause and verify" pattern from Simon Willison's post + +No changes needed for multi-agent coordination — our current sub-agent spawning pattern already follows the isolation principle discussed. + +### Bad Example (vague, passive) + +🔧 Setup Review +Could consider maybe looking into some of the patterns mentioned. Might be worth exploring memory improvements at some point. + +## Weekly Review Template + +At end of week, review `memory/ai-digest-posted.json` and answer: + +1. **Experiments tried:** What did we test this week? +2. **Outcomes:** What worked? What didn't? +3. **Skills evaluated:** Any new skills worth adopting? +4. **Setup changes made:** What did we change based on learnings? +5. **Source quality:** Which sources provided the most value? +6. **Adjustments:** Should we add/remove sources? Change frequency? + +## Common Pitfalls to Avoid + +1. **Including general news** - Stay focused on self-improvement, not announcements +2. **Vague setup reviews** - Be specific about what to change and why +3. **Skipping deduplication** - Always check posted.json first +4. **No experiment suggestion** - Always include one actionable experiment +5. **Ignoring existing setup** - Connect suggestions to current AGENTS.md, TOOLS.md, skills/ diff --git a/scripts/generate_digest.py b/scripts/generate_digest.py new file mode 100755 index 0000000..3eab19f --- /dev/null +++ b/scripts/generate_digest.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +Helper script for AI Self-Improvement Digest. + +Usage: + python3 generate_digest.py --check-posted + python3 generate_digest.py --add-posted <topic> + python3 generate_digest.py --add-experiment <date> <from_article> <experiment> + python3 generate_digest.py --list-experiments +""" + +import json +import argparse +import sys +from datetime import datetime +from pathlib import Path + + +def get_tracking_file(): + """Get path to tracking file.""" + workspace = Path.home() / ".openclaw" / "workspace" + memory_dir = workspace / "memory" + memory_dir.mkdir(parents=True, exist_ok=True) + return memory_dir / "ai-digest-posted.json" + + +def load_tracking(): + """Load tracking data from file.""" + tracking_file = get_tracking_file() + if tracking_file.exists(): + with open(tracking_file, 'r') as f: + return json.load(f) + return { + "posted": [], + "experiments": [], + "skillsEvaluated": [], + "setupChanges": [] + } + + +def save_tracking(data): + """Save tracking data to file.""" + tracking_file = get_tracking_file() + with open(tracking_file, 'w') as f: + json.dump(data, f, indent=2) + + +def is_posted(url): + """Check if URL has already been posted.""" + data = load_tracking() + for item in data.get("posted", []): + if item.get("url") == url: + return True + return False + + +def add_posted(url, title, topic): + """Add a posted item to tracking.""" + data = load_tracking() + data["posted"].append({ + "date": datetime.now().isoformat(), + "url": url, + "title": title, + "topic": topic + }) + save_tracking(data) + print(f"Added: {title}") + + +def add_experiment(date, from_article, experiment, outcome="", learned=""): + """Add an experiment to tracking.""" + data = load_tracking() + data["experiments"].append({ + "date": date, + "fromArticle": from_article, + "experiment": experiment, + "outcome": outcome, + "learned": learned + }) + save_tracking(data) + print(f"Added experiment: {experiment}") + + +def list_experiments(): + """List all experiments.""" + data = load_tracking() + experiments = data.get("experiments", []) + if not experiments: + print("No experiments recorded yet.") + return + + print(f"\n{'Date':<12} {'From Article':<20} {'Experiment':<40}") + print("-" * 72) + for exp in experiments: + print(f"{exp.get('date', 'N/A'):<12} {exp.get('fromArticle', 'N/A')[:18]:<20} {exp.get('experiment', 'N/A')[:38]:<40}") + + +def main(): + parser = argparse.ArgumentParser(description="AI Self-Improvement Digest Helper") + subparsers = parser.add_subparsers(dest="command", help="Commands") + + # Check if URL is posted + check_parser = subparsers.add_parser("check-posted", help="Check if URL has been posted") + check_parser.add_argument("url", help="URL to check") + + # Add posted item + add_parser = subparsers.add_parser("add-posted", help="Add a posted item") + add_parser.add_argument("url", help="Article URL") + add_parser.add_argument("title", help="Article title") + add_parser.add_argument("topic", help="Topic/category") + + # Add experiment + exp_parser = subparsers.add_parser("add-experiment", help="Add an experiment") + exp_parser.add_argument("date", help="Date (YYYY-MM-DD)") + exp_parser.add_argument("from_article", help="Source article/topic") + exp_parser.add_argument("experiment", help="Description of experiment") + exp_parser.add_argument("--outcome", default="", help="Outcome of experiment") + exp_parser.add_argument("--learned", default="", help="What was learned") + + # List experiments + subparsers.add_parser("list-experiments", help="List all experiments") + + args = parser.parse_args() + + if args.command == "check-posted": + if is_posted(args.url): + print("POSTED") + sys.exit(0) + else: + print("NOT_POSTED") + sys.exit(1) + + elif args.command == "add-posted": + add_posted(args.url, args.title, args.topic) + + elif args.command == "add-experiment": + add_experiment(args.date, args.from_article, args.experiment, args.outcome, args.learned) + + elif args.command == "list-experiments": + list_experiments() + + else: + parser.print_help() + + +if __name__ == "__main__": + main()