Agent skill
bio-clinical-databases-tumor-mutational-burden
Calculate tumor mutational burden from panel or WES data with proper normalization and clinical thresholds. Use when assessing immunotherapy eligibility or characterizing tumor immunogenicity.
Install this agent skill to your Project
npx add-skill https://github.com/majiayu000/claude-skill-registry/tree/main/skills/data/tumor-mutational-burden
SKILL.md
Tumor Mutational Burden
TMB Calculation from VCF
from cyvcf2 import VCF
def calculate_tmb(vcf_path, panel_size_mb):
'''Calculate TMB (mutations per megabase)
Args:
vcf_path: Path to somatic VCF
panel_size_mb: Capture region size in megabases
Returns:
TMB value (mutations/Mb)
'''
vcf = VCF(vcf_path)
mutation_count = 0
for variant in vcf:
# Count nonsynonymous coding mutations
# Adjust filters based on VCF annotation format
if is_coding_nonsynonymous(variant):
mutation_count += 1
tmb = mutation_count / panel_size_mb
return tmb
def is_coding_nonsynonymous(variant):
'''Check if variant is coding nonsynonymous
Adjust logic based on your VCF annotation tool:
- VEP: CSQ field
- SnpEff: ANN field
- Funcotator: FUNCOTATION field
'''
# Example for VEP annotation
csq = variant.INFO.get('CSQ', '')
if not csq:
return False
# Check consequence types
nonsynonymous = ['missense_variant', 'nonsense', 'frameshift',
'inframe_insertion', 'inframe_deletion', 'stop_gained',
'stop_lost', 'start_lost']
for transcript in csq.split(','):
fields = transcript.split('|')
consequence = fields[1] if len(fields) > 1 else ''
if any(ns in consequence for ns in nonsynonymous):
return True
return False
Panel-Specific TMB
# Common panel sizes (in megabases)
# Check your specific panel's capture region size
PANEL_SIZES_MB = {
'FoundationOne CDx': 0.8,
'MSK-IMPACT': 1.14,
'TruSight Oncology 500': 1.94,
'Oncomine Comprehensive': 1.5,
'WES (exome)': 30.0, # Approximate coding region
'WGS': 3000.0, # Approximate
}
def calculate_tmb_panel(vcf_path, panel_name):
'''Calculate TMB for known panel'''
if panel_name not in PANEL_SIZES_MB:
raise ValueError(f'Unknown panel: {panel_name}')
return calculate_tmb(vcf_path, PANEL_SIZES_MB[panel_name])
TMB with Variant Filtering
def calculate_tmb_filtered(vcf_path, panel_size_mb, min_vaf=0.05, min_depth=100):
'''Calculate TMB with quality filters
Args:
vcf_path: Path to somatic VCF
panel_size_mb: Panel size in Mb
min_vaf: Minimum variant allele frequency (default 5%)
min_depth: Minimum read depth (default 100)
Filters:
- VAF >= 5%: Reduce false positives from sequencing errors
- Depth >= 100: Ensure reliable variant calls
- Exclude known polymorphisms (gnomAD AF > 1%)
- Include only coding nonsynonymous
'''
vcf = VCF(vcf_path)
mutation_count = 0
for variant in vcf:
# Quality filters
depth = variant.INFO.get('DP', 0)
vaf = get_vaf(variant)
if depth < min_depth:
continue
if vaf < min_vaf:
continue
# Exclude germline polymorphisms
gnomad_af = variant.INFO.get('gnomAD_AF', 0)
if gnomad_af > 0.01:
continue
# Count coding nonsynonymous
if is_coding_nonsynonymous(variant):
mutation_count += 1
return mutation_count / panel_size_mb
def get_vaf(variant):
'''Extract variant allele frequency from variant'''
# Format depends on caller (e.g., Mutect2, Strelka)
# Mutect2 format: AD field in genotype
try:
ad = variant.format('AD')[0] # First sample
if sum(ad) > 0:
return ad[1] / sum(ad)
except:
pass
return 0
Clinical TMB Thresholds
def classify_tmb(tmb_value, threshold='FDA'):
'''Classify TMB as high or low
Clinical thresholds:
- FDA (pembrolizumab): 10 mut/Mb
- ESMO: 10 mut/Mb
- Some studies use 16, 20 mut/Mb for specific cancers
Note: Panel-specific thresholds may differ
'''
thresholds = {
'FDA': 10,
'conservative': 16,
'strict': 20
}
cutoff = thresholds.get(threshold, 10)
if tmb_value >= cutoff:
return 'TMB-High'
else:
return 'TMB-Low'
# Example
tmb = 12.5
status = classify_tmb(tmb)
print(f'TMB: {tmb} mut/Mb -> {status}')
TMB by Variant Type
def detailed_tmb_analysis(vcf_path, panel_size_mb):
'''Calculate TMB broken down by variant type'''
vcf = VCF(vcf_path)
counts = {
'missense': 0,
'nonsense': 0,
'frameshift': 0,
'inframe_indel': 0,
'splice': 0,
'synonymous': 0,
'other': 0
}
for variant in vcf:
vtype = classify_variant_type(variant)
counts[vtype] = counts.get(vtype, 0) + 1
# TMB typically excludes synonymous
nonsynonymous_count = sum(v for k, v in counts.items()
if k != 'synonymous' and k != 'other')
results = {
'counts': counts,
'total_nonsynonymous': nonsynonymous_count,
'tmb': nonsynonymous_count / panel_size_mb,
'panel_size_mb': panel_size_mb
}
return results
TMB vs MSI Comparison
def tmb_msi_concordance(tmb_value, msi_status):
'''Compare TMB with MSI status
MSI-H tumors typically have high TMB (>10-20 mut/Mb)
TMB-H and MSI-H are correlated but not identical:
- ~80% MSI-H are TMB-H
- Many TMB-H are MSS (especially smoking-related)
Both predict immunotherapy response
'''
tmb_high = tmb_value >= 10
if msi_status == 'MSI-H' and tmb_high:
return 'Concordant TMB-H/MSI-H'
elif msi_status == 'MSI-H' and not tmb_high:
return 'Discordant MSI-H/TMB-L (uncommon)'
elif msi_status == 'MSS' and tmb_high:
return 'TMB-H/MSS (e.g., smoking-related)'
else:
return 'TMB-L/MSS'
Batch TMB Calculation
import pandas as pd
from pathlib import Path
def batch_tmb(vcf_dir, panel_size_mb, output_file):
'''Calculate TMB for multiple samples'''
results = []
for vcf_path in Path(vcf_dir).glob('*.vcf.gz'):
sample_id = vcf_path.stem.replace('.vcf', '')
tmb = calculate_tmb_filtered(str(vcf_path), panel_size_mb)
status = classify_tmb(tmb)
results.append({
'sample': sample_id,
'tmb': round(tmb, 2),
'status': status
})
df = pd.DataFrame(results)
df.to_csv(output_file, index=False)
return df
Related Skills
- variant-calling/somatic-variant-calling - Input variants
- variant-calling/clinical-interpretation - ACMG/AMP classification
- variant-calling/variant-annotation - VEP/SnpEff annotation
Recommended Agent Skills
Expand your agent's capabilities with these related and highly-rated skills.
agent-ops-spec
Manage specification documents in .agent/specs/. Use when user provides requirements, acceptance criteria, or feature descriptions that need to be tracked and validated against implementation.
agent-ops-state
Maintain .agent state files. Use at session start, after meaningful steps, and before concluding: read/update constitution/memory/focus/issues/baseline consistently.
agent-ops-spec
Manage specification documents in .agent/specs/. Use when user provides requirements, acceptance criteria, or feature descriptions that need to be tracked and validated against implementation.
agent-ops-testing
Test strategy, execution, and coverage analysis. Use when designing tests, running test suites, or analyzing test results beyond baseline checks.
agent-ops-testing
Test strategy, execution, and coverage analysis. Use when designing tests, running test suites, or analyzing test results beyond baseline checks.
agent-ops-state
Maintain .agent state files. Use at session start, after meaningful steps, and before concluding: read/update constitution/memory/focus/issues/baseline consistently.
Didn't find tool you were looking for?