POTE/scripts/scrape_alternative_sources.py
ilia 34aebb1c2e PR4: Phase 2 Analytics Foundation
Complete analytics module with returns, benchmarks, and performance metrics.

New Modules:
- src/pote/analytics/returns.py: Return calculator for trades
- src/pote/analytics/benchmarks.py: Benchmark comparison & alpha
- src/pote/analytics/metrics.py: Performance aggregations

Scripts:
- scripts/analyze_official.py: Analyze specific official
- scripts/calculate_all_returns.py: System-wide analysis

Tests:
- tests/test_analytics.py: Full coverage of analytics

Features:
 Calculate returns over 30/60/90/180 day windows
 Compare to market benchmarks (SPY, QQQ, etc.)
 Calculate abnormal returns (alpha)
 Aggregate stats by official, sector
 Top performer rankings
 Disclosure timing analysis
 Command-line analysis tools

~1,210 lines of new code, all tested
2025-12-15 11:33:21 -05:00

133 lines
4.0 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Scrape congressional trades from alternative sources.
Options:
1. Senate Stock Watcher (if available)
2. QuiverQuant (requires API key)
3. Capitol Trades (web scraping - be careful)
4. Manual CSV import
"""
import csv
import logging
from datetime import datetime
from pathlib import Path
from pote.db import get_session
from pote.ingestion.trade_loader import TradeLoader
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
def import_from_csv(csv_path: str):
"""
Import trades from CSV file.
CSV format:
name,party,chamber,state,ticker,side,value_min,value_max,transaction_date,disclosure_date
"""
logger.info(f"Reading trades from {csv_path}")
with open(csv_path, 'r') as f:
reader = csv.DictReader(f)
transactions = []
for row in reader:
# Convert CSV row to transaction format
txn = {
"representative": row["name"],
"party": row["party"],
"house": row["chamber"], # "House" or "Senate"
"state": row.get("state", ""),
"district": row.get("district", ""),
"ticker": row["ticker"],
"transaction": row["side"].capitalize(), # "Purchase" or "Sale"
"amount": f"${row['value_min']} - ${row['value_max']}",
"transaction_date": row["transaction_date"],
"disclosure_date": row.get("disclosure_date", row["transaction_date"]),
}
transactions.append(txn)
logger.info(f"Loaded {len(transactions)} transactions from CSV")
# Ingest into database
with next(get_session()) as session:
loader = TradeLoader(session)
stats = loader.ingest_transactions(transactions, source="csv_import")
logger.info(f"✅ Ingested: {stats['officials_created']} officials, "
f"{stats['securities_created']} securities, "
f"{stats['trades_ingested']} trades")
def create_sample_csv(output_path: str = "trades_template.csv"):
"""Create a template CSV file for manual entry."""
template_data = [
{
"name": "Bernie Sanders",
"party": "Independent",
"chamber": "Senate",
"state": "VT",
"district": "",
"ticker": "COIN",
"side": "sell",
"value_min": "15001",
"value_max": "50000",
"transaction_date": "2024-12-01",
"disclosure_date": "2024-12-15",
},
{
"name": "Alexandria Ocasio-Cortez",
"party": "Democrat",
"chamber": "House",
"state": "NY",
"district": "NY-14",
"ticker": "PLTR",
"side": "buy",
"value_min": "1001",
"value_max": "15000",
"transaction_date": "2024-11-15",
"disclosure_date": "2024-12-01",
},
]
with open(output_path, 'w', newline='') as f:
writer = csv.DictWriter(f, fieldnames=template_data[0].keys())
writer.writeheader()
writer.writerows(template_data)
logger.info(f"✅ Created template CSV: {output_path}")
logger.info("Edit this file and run: python scripts/scrape_alternative_sources.py import <file>")
def main():
"""Main entry point."""
import sys
if len(sys.argv) < 2:
print("Usage:")
print(" python scripts/scrape_alternative_sources.py template # Create CSV template")
print(" python scripts/scrape_alternative_sources.py import <csv_file> # Import from CSV")
sys.exit(1)
command = sys.argv[1]
if command == "template":
create_sample_csv()
elif command == "import":
if len(sys.argv) < 3:
print("Error: Please specify CSV file to import")
sys.exit(1)
import_from_csv(sys.argv[2])
else:
print(f"Unknown command: {command}")
sys.exit(1)
if __name__ == "__main__":
main()