#!/usr/bin/env python3 """ Scrape congressional trades from alternative sources. Options: 1. Senate Stock Watcher (if available) 2. QuiverQuant (requires API key) 3. Capitol Trades (web scraping - be careful) 4. Manual CSV import """ import csv import logging from datetime import datetime from pathlib import Path from pote.db import get_session from pote.ingestion.trade_loader import TradeLoader logging.basicConfig(level=logging.INFO) logger = logging.getLogger(__name__) def import_from_csv(csv_path: str): """ Import trades from CSV file. CSV format: name,party,chamber,state,ticker,side,value_min,value_max,transaction_date,disclosure_date """ logger.info(f"Reading trades from {csv_path}") with open(csv_path, 'r') as f: reader = csv.DictReader(f) transactions = [] for row in reader: # Convert CSV row to transaction format txn = { "representative": row["name"], "party": row["party"], "house": row["chamber"], # "House" or "Senate" "state": row.get("state", ""), "district": row.get("district", ""), "ticker": row["ticker"], "transaction": row["side"].capitalize(), # "Purchase" or "Sale" "amount": f"${row['value_min']} - ${row['value_max']}", "transaction_date": row["transaction_date"], "disclosure_date": row.get("disclosure_date", row["transaction_date"]), } transactions.append(txn) logger.info(f"Loaded {len(transactions)} transactions from CSV") # Ingest into database with next(get_session()) as session: loader = TradeLoader(session) stats = loader.ingest_transactions(transactions, source="csv_import") logger.info(f"✅ Ingested: {stats['officials_created']} officials, " f"{stats['securities_created']} securities, " f"{stats['trades_ingested']} trades") def create_sample_csv(output_path: str = "trades_template.csv"): """Create a template CSV file for manual entry.""" template_data = [ { "name": "Bernie Sanders", "party": "Independent", "chamber": "Senate", "state": "VT", "district": "", "ticker": "COIN", "side": "sell", "value_min": "15001", "value_max": "50000", "transaction_date": "2024-12-01", "disclosure_date": "2024-12-15", }, { "name": "Alexandria Ocasio-Cortez", "party": "Democrat", "chamber": "House", "state": "NY", "district": "NY-14", "ticker": "PLTR", "side": "buy", "value_min": "1001", "value_max": "15000", "transaction_date": "2024-11-15", "disclosure_date": "2024-12-01", }, ] with open(output_path, 'w', newline='') as f: writer = csv.DictWriter(f, fieldnames=template_data[0].keys()) writer.writeheader() writer.writerows(template_data) logger.info(f"✅ Created template CSV: {output_path}") logger.info("Edit this file and run: python scripts/scrape_alternative_sources.py import ") def main(): """Main entry point.""" import sys if len(sys.argv) < 2: print("Usage:") print(" python scripts/scrape_alternative_sources.py template # Create CSV template") print(" python scripts/scrape_alternative_sources.py import # Import from CSV") sys.exit(1) command = sys.argv[1] if command == "template": create_sample_csv() elif command == "import": if len(sys.argv) < 3: print("Error: Please specify CSV file to import") sys.exit(1) import_from_csv(sys.argv[2]) else: print(f"Unknown command: {command}") sys.exit(1) if __name__ == "__main__": main()