diff --git a/QUICKSTART.md b/QUICKSTART.md new file mode 100644 index 0000000..2636083 --- /dev/null +++ b/QUICKSTART.md @@ -0,0 +1,391 @@ +# POTE Quick Start Guide + +## šŸš€ Your System is Ready! + +**Container IP**: Check with `ip addr show eth0 | grep "inet"` +**Database**: PostgreSQL on port 5432 +**Username**: `poteuser` +**Password**: `changeme123` (āš ļø change in production!) + +--- + +## šŸ“Š How to Use POTE + +### Option 1: Command Line (SSH into container) + +```bash +# SSH to your container +ssh root@YOUR_CONTAINER_IP + +# Switch to poteapp user +su - poteapp + +# Activate Python environment +cd pote && source venv/bin/activate + +# Now you can run any POTE command! +``` + +### Option 2: Remote Database Access + +Connect from any machine with PostgreSQL client: + +```bash +psql -h YOUR_CONTAINER_IP -U poteuser -d pote +# Password: changeme123 +``` + +### Option 3: Python Client (From Anywhere) + +```python +from sqlalchemy import create_engine, text + +# Connect remotely +engine = create_engine("postgresql://poteuser:changeme123@YOUR_CONTAINER_IP:5432/pote") + +with engine.connect() as conn: + result = conn.execute(text("SELECT * FROM officials")) + for row in result: + print(row) +``` + +--- + +## šŸŽÆ Common Tasks + +### 1. Check System Status + +```bash +su - poteapp +cd pote && source venv/bin/activate +~/status.sh # Shows current database stats +``` + +### 2. Ingest Sample Data (Offline) + +```bash +su - poteapp +cd pote && source venv/bin/activate +python scripts/ingest_from_fixtures.py +``` + +**Output**: Ingests 5 sample congressional trades (Nancy Pelosi, etc.) + +### 3. Fetch Live Congressional Trades + +```bash +su - poteapp +cd pote && source venv/bin/activate +python scripts/fetch_congressional_trades.py +``` + +**What it does**: +- Fetches latest trades from House Stock Watcher API +- Deduplicates against existing trades +- Shows summary of what was added + +### 4. Enrich Securities (Add Company Info) + +```bash +su - poteapp +cd pote && source venv/bin/activate +python scripts/enrich_securities.py +``` + +**What it does**: +- Fetches company names, sectors, industries from yfinance +- Updates securities table with real company data + +### 5. Fetch Historical Prices + +```bash +su - poteapp +cd pote && source venv/bin/activate +python scripts/fetch_sample_prices.py +``` + +**What it does**: +- Fetches historical price data for securities in database +- Stores daily OHLCV data for analysis + +### 6. Run Database Queries + +```bash +# Connect to database +psql -h localhost -U poteuser -d pote +``` + +**Useful queries**: + +```sql +-- View all officials +SELECT name, chamber, party FROM officials; + +-- View all trades +SELECT o.name, s.ticker, t.side, t.amount_min, t.transaction_date +FROM trades t +JOIN officials o ON t.official_id = o.id +JOIN securities s ON t.security_id = s.id +ORDER BY t.transaction_date DESC; + +-- Top traders +SELECT o.name, COUNT(t.id) as trade_count +FROM officials o +LEFT JOIN trades t ON o.id = t.official_id +GROUP BY o.id, o.name +ORDER BY trade_count DESC; + +-- Trades by ticker +SELECT s.ticker, s.name, COUNT(t.id) as trade_count +FROM securities s +LEFT JOIN trades t ON s.id = t.security_id +GROUP BY s.id, s.ticker, s.name +ORDER BY trade_count DESC; +``` + +--- + +## šŸ“ˆ Example Workflows + +### Workflow 1: Daily Update + +```bash +su - poteapp +cd pote && source venv/bin/activate + +# Fetch new trades +python scripts/fetch_congressional_trades.py + +# Enrich any new securities +python scripts/enrich_securities.py + +# Update prices +python scripts/fetch_sample_prices.py + +# Check status +~/status.sh +``` + +### Workflow 2: Research Query + +```python +# research.py - Save this in ~/pote/ +from sqlalchemy import create_engine, text +from pote.config import settings + +engine = create_engine(settings.DATABASE_URL) + +# Find all NVDA trades +with engine.connect() as conn: + result = conn.execute(text(""" + SELECT + o.name, + o.party, + t.side, + t.amount_min, + t.transaction_date, + t.disclosure_date + FROM trades t + JOIN officials o ON t.official_id = o.id + JOIN securities s ON t.security_id = s.id + WHERE s.ticker = 'NVDA' + ORDER BY t.transaction_date DESC + """)) + + for row in result: + print(f"{row.name:20s} | {row.side:8s} | ${row.amount_min:,} | {row.transaction_date}") +``` + +Run it: +```bash +python research.py +``` + +### Workflow 3: Export to CSV + +```python +# export_trades.py +import pandas as pd +from sqlalchemy import create_engine +from pote.config import settings + +engine = create_engine(settings.DATABASE_URL) + +# Export all trades to CSV +query = """ + SELECT + o.name as official_name, + o.party, + o.chamber, + s.ticker, + s.name as company_name, + t.side, + t.amount_min, + t.amount_max, + t.transaction_date, + t.disclosure_date + FROM trades t + JOIN officials o ON t.official_id = o.id + JOIN securities s ON t.security_id = s.id + ORDER BY t.transaction_date DESC +""" + +df = pd.read_sql(query, engine) +df.to_csv('trades_export.csv', index=False) +print(f"Exported {len(df)} trades to trades_export.csv") +``` + +--- + +## šŸ”§ Maintenance + +### Update POTE Code + +```bash +su - poteapp +cd pote +git pull +source venv/bin/activate +pip install -e . +``` + +### Backup Database + +```bash +# Create backup +su - poteapp +pg_dump -h localhost -U poteuser pote > ~/backups/pote_$(date +%Y%m%d).sql + +# Restore backup +psql -h localhost -U poteuser -d pote < ~/backups/pote_20250115.sql +``` + +### View Logs + +```bash +# PostgreSQL logs +tail -f /var/log/postgresql/postgresql-15-main.log + +# Application logs (if you create them) +tail -f ~/logs/pote.log +``` + +### Change Database Password + +```bash +# As root +su - postgres +psql << EOF +ALTER USER poteuser WITH PASSWORD 'your_new_secure_password'; +EOF + +# Update .env +su - poteapp +nano ~/pote/.env +# Change DATABASE_URL password +``` + +--- + +## 🌐 Access Methods Summary + +| Method | From Where | Command | +|--------|-----------|---------| +| **SSH + CLI** | Any network client | `ssh root@IP`, then `su - poteapp` | +| **psql** | Any network client | `psql -h IP -U poteuser -d pote` | +| **Python** | Any machine | `sqlalchemy.create_engine("postgresql://...")` | +| **Web UI** | Coming in Phase 3! | `http://IP:8000` (FastAPI + dashboard) | + +--- + +## šŸ“š What Data Do You Have? + +Right now (Phase 1 complete): +- āœ… **Congressional trading data** (from House Stock Watcher) +- āœ… **Security information** (tickers, names, sectors) +- āœ… **Historical prices** (OHLCV data from yfinance) +- āœ… **Official profiles** (name, party, chamber, state) + +Coming next (Phase 2): +- šŸ“Š **Abnormal return calculations** +- šŸ¤– **Behavioral clustering** +- 🚨 **Research signals** (follow_research, avoid_risk, watch) + +--- + +## šŸŽ“ Learning SQL for POTE + +### Count Records +```sql +SELECT COUNT(*) FROM officials; +SELECT COUNT(*) FROM trades; +SELECT COUNT(*) FROM securities; +``` + +### Filter by Party +```sql +SELECT name, party FROM officials WHERE party = 'Democrat'; +``` + +### Join Tables +```sql +SELECT o.name, s.ticker, t.side +FROM trades t +JOIN officials o ON t.official_id = o.id +JOIN securities s ON t.security_id = s.id +LIMIT 10; +``` + +### Aggregate Stats +```sql +SELECT + o.party, + COUNT(t.id) as trade_count, + AVG(t.amount_min) as avg_amount +FROM trades t +JOIN officials o ON t.official_id = o.id +GROUP BY o.party; +``` + +--- + +## ā“ Troubleshooting + +### Can't connect remotely? +```bash +# Check PostgreSQL is listening +ss -tlnp | grep 5432 +# Should show: 0.0.0.0:5432 + +# Check firewall (if enabled) +ufw status +``` + +### Database connection fails? +```bash +# Test locally first +psql -h localhost -U poteuser -d pote + +# Check credentials in .env +cat ~/pote/.env +``` + +### Python import errors? +```bash +# Reinstall dependencies +su - poteapp +cd pote && source venv/bin/activate +pip install -e . +``` + +--- + +## šŸš€ Next Steps + +1. **Populate with real data**: Run `fetch_congressional_trades.py` regularly +2. **Set up cron job** for automatic daily updates +3. **Build analytics** (Phase 2) - abnormal returns, signals +4. **Create dashboard** (Phase 3) - web interface for exploration + +Ready to build Phase 2 analytics? Just ask! šŸ“ˆ + diff --git a/README.md b/README.md index 747a605..08e271f 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,11 @@ POTE tracks stock trading activity of government officials (starting with U.S. C ## Quick start +**šŸš€ Already deployed?** See **[QUICKSTART.md](QUICKSTART.md)** for full usage guide! + +**šŸ“¦ Deploying?** See **[PROXMOX_QUICKSTART.md](PROXMOX_QUICKSTART.md)** for Proxmox LXC deployment (recommended). + +### Local Development ```bash # Install git clone @@ -40,7 +45,7 @@ python scripts/ingest_from_fixtures.py python scripts/enrich_securities.py # With internet: -python scripts/fetch_congressional_trades.py --days 30 +python scripts/fetch_congressional_trades.py python scripts/fetch_sample_prices.py # Run tests @@ -50,6 +55,15 @@ make test make lint format ``` +### Production Deployment +```bash +# Proxmox LXC (Recommended - 5 minutes) +bash scripts/proxmox_setup.sh + +# Docker +docker-compose up -d +``` + ## Tech stack - **Language**: Python 3.10+ @@ -62,14 +76,16 @@ make lint format **Getting Started**: - [`README.md`](README.md) – This file +- [`QUICKSTART.md`](QUICKSTART.md) – ⭐ **How to use your deployed POTE instance** - [`STATUS.md`](STATUS.md) – Current project status - [`FREE_TESTING_QUICKSTART.md`](FREE_TESTING_QUICKSTART.md) – Test for $0 - [`OFFLINE_DEMO.md`](OFFLINE_DEMO.md) – Works without internet! **Deployment**: -- [`docs/07_deployment.md`](docs/07_deployment.md) – Full deployment guide -- [`docs/08_proxmox_deployment.md`](docs/08_proxmox_deployment.md) – ⭐ Proxmox-specific guide -- [`Dockerfile`](Dockerfile) + [`docker-compose.yml`](docker-compose.yml) +- [`PROXMOX_QUICKSTART.md`](PROXMOX_QUICKSTART.md) – ⭐ **Proxmox quick deployment (5 min)** +- [`docs/07_deployment.md`](docs/07_deployment.md) – Full deployment guide (all platforms) +- [`docs/08_proxmox_deployment.md`](docs/08_proxmox_deployment.md) – Proxmox detailed guide +- [`Dockerfile`](Dockerfile) + [`docker-compose.yml`](docker-compose.yml) – Docker setup **Technical**: - [`docs/00_mvp.md`](docs/00_mvp.md) – MVP roadmap diff --git a/docs/09_data_updates.md b/docs/09_data_updates.md new file mode 100644 index 0000000..42d4d20 --- /dev/null +++ b/docs/09_data_updates.md @@ -0,0 +1,229 @@ +# Data Updates & Maintenance + +## Adding More Representatives + +### Method 1: Manual Entry (Python Script) + +```bash +# Edit the script to add your representatives +nano scripts/add_custom_trades.py + +# Run it +python scripts/add_custom_trades.py +``` + +Example: +```python +add_trade( + session, + official_name="Your Representative", + party="Democrat", # or "Republican", "Independent" + chamber="House", # or "Senate" + state="CA", + ticker="NVDA", + company_name="NVIDIA Corporation", + side="buy", # or "sell" + value_min=15001, + value_max=50000, + transaction_date="2024-12-01", + disclosure_date="2024-12-15", +) +``` + +### Method 2: CSV Import + +```bash +# Create a template +python scripts/scrape_alternative_sources.py template + +# Edit trades_template.csv with your data +nano trades_template.csv + +# Import it +python scripts/scrape_alternative_sources.py import trades_template.csv +``` + +CSV format: +```csv +name,party,chamber,state,district,ticker,side,value_min,value_max,transaction_date,disclosure_date +Bernie Sanders,Independent,Senate,VT,,COIN,sell,15001,50000,2024-12-01,2024-12-15 +``` + +### Method 3: Automatic Updates (When API is available) + +```bash +# Fetch latest trades +python scripts/fetch_congressional_trades.py --days 30 +``` + +## Setting Up Automatic Updates + +### Option A: Cron Job (Recommended) + +```bash +# Make script executable +chmod +x ~/pote/scripts/daily_update.sh + +# Add to cron (runs daily at 6 AM) +crontab -e + +# Add this line: +0 6 * * * /home/poteapp/pote/scripts/daily_update.sh + +# Or for testing (runs every hour): +0 * * * * /home/poteapp/pote/scripts/daily_update.sh +``` + +View logs: +```bash +ls -lh ~/logs/daily_update_*.log +tail -f ~/logs/daily_update_$(date +%Y%m%d).log +``` + +### Option B: Systemd Timer + +Create `/etc/systemd/system/pote-update.service`: +```ini +[Unit] +Description=POTE Daily Data Update +After=network.target postgresql.service + +[Service] +Type=oneshot +User=poteapp +WorkingDirectory=/home/poteapp/pote +ExecStart=/home/poteapp/pote/scripts/daily_update.sh +StandardOutput=append:/home/poteapp/logs/pote-update.log +StandardError=append:/home/poteapp/logs/pote-update.log +``` + +Create `/etc/systemd/system/pote-update.timer`: +```ini +[Unit] +Description=Run POTE update daily +Requires=pote-update.service + +[Timer] +OnCalendar=daily +OnCalendar=06:00 +Persistent=true + +[Install] +WantedBy=timers.target +``` + +Enable it: +```bash +sudo systemctl enable --now pote-update.timer +sudo systemctl status pote-update.timer +``` + +## Manual Update Workflow + +```bash +# 1. Fetch new trades (when API works) +python scripts/fetch_congressional_trades.py + +# 2. Enrich new securities +python scripts/enrich_securities.py + +# 3. Update prices +python scripts/fetch_sample_prices.py + +# 4. Check status +~/status.sh +``` + +## Data Sources + +### Currently Working: +- āœ… yfinance (prices, company info) +- āœ… Manual entry +- āœ… CSV import +- āœ… Fixture files (testing) + +### Currently Down: +- āŒ House Stock Watcher API (domain issues) + +### Future Options: +- QuiverQuant (requires $30/month subscription) +- Senate Stock Watcher (check if available) +- Capitol Trades (web scraping) +- Financial Modeling Prep (requires API key) + +## Monitoring Updates + +### Check Recent Activity + +```python +from sqlalchemy import text +from pote.db import engine +from datetime import datetime, timedelta + +with engine.connect() as conn: + # Trades added in last 7 days + week_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d') + result = conn.execute(text(f""" + SELECT o.name, s.ticker, t.side, t.transaction_date + FROM trades t + JOIN officials o ON t.official_id = o.id + JOIN securities s ON t.security_id = s.id + WHERE t.created_at >= '{week_ago}' + ORDER BY t.created_at DESC + """)) + + print("Recent trades:") + for row in result: + print(f" {row.name} {row.side} {row.ticker} on {row.transaction_date}") +``` + +### Database Growth + +```bash +# Track database size over time +psql -h localhost -U poteuser -d pote -c " +SELECT + pg_size_pretty(pg_database_size('pote')) as db_size, + (SELECT COUNT(*) FROM officials) as officials, + (SELECT COUNT(*) FROM trades) as trades, + (SELECT COUNT(*) FROM prices) as prices; +" +``` + +## Backup Before Updates + +```bash +# Backup before major updates +pg_dump -h localhost -U poteuser pote > ~/backups/pote_$(date +%Y%m%d_%H%M%S).sql +``` + +## Troubleshooting + +### API Not Working +- Use manual entry or CSV import +- Check if alternative sources are available +- Wait for House Stock Watcher to come back online + +### Duplicate Trades +The system automatically deduplicates by: +- `source` + `external_id` (for API data) +- Official + Security + Transaction Date (for manual data) + +### Missing Company Info +```bash +# Re-enrich all securities +python scripts/enrich_securities.py --force +``` + +### Price Data Gaps +```bash +# Fetch specific date range +python << 'EOF' +from pote.ingestion.prices import PriceLoader +from pote.db import get_session + +loader = PriceLoader(next(get_session())) +loader.fetch_and_store_prices("NVDA", "2024-01-01", "2024-12-31") +EOF +``` + diff --git a/docs/PR4_PLAN.md b/docs/PR4_PLAN.md new file mode 100644 index 0000000..bcbef13 --- /dev/null +++ b/docs/PR4_PLAN.md @@ -0,0 +1,245 @@ +# PR4: Phase 2 - Analytics Foundation + +## Goal +Calculate abnormal returns and performance metrics for congressional trades. + +## What We'll Build + +### 1. Return Calculator (`src/pote/analytics/returns.py`) +```python +class ReturnCalculator: + """Calculate returns for trades over various windows.""" + + def calculate_trade_return( + self, + trade: Trade, + window_days: int = 90 + ) -> dict: + """ + Calculate return for a single trade. + + Returns: + { + 'ticker': 'NVDA', + 'transaction_date': '2024-01-15', + 'window_days': 90, + 'entry_price': 495.00, + 'exit_price': 650.00, + 'return_pct': 31.3, + 'return_abs': 155.00 + } + """ + pass + + def calculate_benchmark_return( + self, + start_date: date, + end_date: date, + benchmark: str = "SPY" # S&P 500 + ) -> float: + """Calculate benchmark return over period.""" + pass + + def calculate_abnormal_return( + self, + trade_return: float, + benchmark_return: float + ) -> float: + """Return - Benchmark = Abnormal Return (alpha).""" + return trade_return - benchmark_return +``` + +### 2. Performance Metrics (`src/pote/analytics/metrics.py`) +```python +class PerformanceMetrics: + """Aggregate performance metrics by official, sector, etc.""" + + def official_performance( + self, + official_id: int, + window_days: int = 90 + ) -> dict: + """ + Aggregate stats for an official. + + Returns: + { + 'name': 'Nancy Pelosi', + 'total_trades': 50, + 'buy_trades': 35, + 'sell_trades': 15, + 'avg_return': 12.5, + 'avg_abnormal_return': 5.2, + 'win_rate': 0.68, + 'total_value': 2500000, + 'best_trade': {'ticker': 'NVDA', 'return': 85.3}, + 'worst_trade': {'ticker': 'META', 'return': -15.2} + } + """ + pass + + def sector_analysis(self, window_days: int = 90) -> list: + """Performance by sector (Tech, Healthcare, etc.).""" + pass + + def timing_analysis(self) -> dict: + """Analyze disclosure lag vs performance.""" + pass +``` + +### 3. Benchmark Comparisons (`src/pote/analytics/benchmarks.py`) +```python +class BenchmarkComparison: + """Compare official performance vs market indices.""" + + BENCHMARKS = { + 'SPY': 'S&P 500', + 'QQQ': 'NASDAQ-100', + 'DIA': 'Dow Jones', + 'IWM': 'Russell 2000' + } + + def compare_to_market( + self, + official_id: int, + benchmark: str = 'SPY', + period_start: date = None + ) -> dict: + """ + Compare official's returns to market. + + Returns: + { + 'official_return': 15.2, + 'benchmark_return': 8.5, + 'alpha': 6.7, + 'sharpe_ratio': 1.35, + 'win_rate_vs_market': 0.72 + } + """ + pass +``` + +### 4. Database Schema Updates + +Add `metrics_performance` table: +```sql +CREATE TABLE metrics_performance ( + id SERIAL PRIMARY KEY, + official_id INTEGER REFERENCES officials(id), + security_id INTEGER REFERENCES securities(id), + trade_id INTEGER REFERENCES trades(id), + + -- Return metrics + window_days INTEGER NOT NULL, + entry_price DECIMAL(15, 2), + exit_price DECIMAL(15, 2), + return_pct DECIMAL(10, 4), + return_abs DECIMAL(15, 2), + + -- Benchmark comparison + benchmark_ticker VARCHAR(10), + benchmark_return_pct DECIMAL(10, 4), + abnormal_return_pct DECIMAL(10, 4), -- alpha + + -- Calculated at + calculated_at TIMESTAMP, + + INDEX(official_id, window_days), + INDEX(security_id, window_days), + INDEX(trade_id) +); +``` + +## Implementation Steps + +1. **Create analytics module structure** + ``` + src/pote/analytics/ + ā”œā”€ā”€ __init__.py + ā”œā”€ā”€ returns.py # Return calculations + ā”œā”€ā”€ metrics.py # Aggregate metrics + ā”œā”€ā”€ benchmarks.py # Benchmark comparisons + └── utils.py # Helper functions + ``` + +2. **Add database migration** + ```bash + alembic revision -m "add_performance_metrics_table" + ``` + +3. **Implement return calculator** + - Fetch prices from database + - Calculate returns for various windows (30, 60, 90, 180 days) + - Handle edge cases (IPOs, delisting, missing data) + +4. **Implement benchmark comparisons** + - Fetch benchmark data (SPY, QQQ, etc.) + - Calculate abnormal returns + - Statistical significance tests + +5. **Create calculation scripts** + ```bash + scripts/calculate_returns.py # Calculate all returns + scripts/update_metrics.py # Update performance table + scripts/analyze_official.py # Analyze specific official + ``` + +6. **Add tests** + - Unit tests for calculators + - Integration tests with sample data + - Edge case handling + +## Example Usage + +```python +# Calculate returns for all trades +from pote.analytics.returns import ReturnCalculator +from pote.db import get_session + +calculator = ReturnCalculator() + +with next(get_session()) as session: + trades = session.query(Trade).all() + + for trade in trades: + for window in [30, 60, 90]: + result = calculator.calculate_trade_return(trade, window) + print(f"{trade.official.name} {trade.security.ticker}: " + f"{result['return_pct']:.1f}% ({window}d)") +``` + +```python +# Get official performance summary +from pote.analytics.metrics import PerformanceMetrics + +metrics = PerformanceMetrics() +pelosi_stats = metrics.official_performance(official_id=1, window_days=90) + +print(f"Average Return: {pelosi_stats['avg_return']:.1f}%") +print(f"Alpha: {pelosi_stats['avg_abnormal_return']:.1f}%") +print(f"Win Rate: {pelosi_stats['win_rate']:.1%}") +``` + +## Success Criteria + +- āœ… Can calculate returns for any trade + window +- āœ… Can compare to S&P 500 benchmark +- āœ… Can generate official performance summaries +- āœ… All calculations tested and accurate +- āœ… Performance data stored efficiently +- āœ… Documentation complete + +## Timeline + +- Implementation: 2-3 hours +- Testing: 1 hour +- Documentation: 30 minutes +- **Total: ~4 hours** + +## Next Steps After PR4 + +**PR5**: Clustering & Behavioral Analysis +**PR6**: Research Signals (follow_research, avoid_risk, watch) +**PR7**: API & Dashboard + diff --git a/scripts/add_custom_trades.py b/scripts/add_custom_trades.py new file mode 100644 index 0000000..96b1245 --- /dev/null +++ b/scripts/add_custom_trades.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 +""" +Manually add trades for specific representatives. +Useful when you want to track specific officials or add data from other sources. +""" + +import logging +from datetime import datetime, timezone +from decimal import Decimal + +from pote.db import get_session +from pote.db.models import Official, Security, Trade + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def add_trade( + session, + official_name: str, + party: str, + chamber: str, + state: str, + ticker: str, + company_name: str, + side: str, + value_min: float, + value_max: float, + transaction_date: str, # YYYY-MM-DD + disclosure_date: str | None = None, +): + """Add a single trade to the database.""" + + # Get or create official + official = session.query(Official).filter_by(name=official_name).first() + if not official: + official = Official( + name=official_name, + party=party, + chamber=chamber, + state=state, + ) + session.add(official) + session.flush() + logger.info(f"Created official: {official_name}") + + # Get or create security + security = session.query(Security).filter_by(ticker=ticker).first() + if not security: + security = Security(ticker=ticker, name=company_name) + session.add(security) + session.flush() + logger.info(f"Created security: {ticker}") + + # Create trade + trade = Trade( + official_id=official.id, + security_id=security.id, + source="manual", + transaction_date=datetime.strptime(transaction_date, "%Y-%m-%d").date(), + filing_date=datetime.strptime(disclosure_date, "%Y-%m-%d").date() if disclosure_date else None, + side=side, + value_min=Decimal(str(value_min)), + value_max=Decimal(str(value_max)), + ) + session.add(trade) + logger.info(f"Added trade: {official_name} {side} {ticker}") + + return trade + + +def main(): + """Example: Add some trades manually.""" + + with next(get_session()) as session: + # Example: Add trades for Elizabeth Warren + logger.info("Adding trades for Elizabeth Warren...") + + add_trade( + session, + official_name="Elizabeth Warren", + party="Democrat", + chamber="Senate", + state="MA", + ticker="AMZN", + company_name="Amazon.com Inc.", + side="sell", + value_min=15001, + value_max=50000, + transaction_date="2024-11-15", + disclosure_date="2024-12-01", + ) + + add_trade( + session, + official_name="Elizabeth Warren", + party="Democrat", + chamber="Senate", + state="MA", + ticker="META", + company_name="Meta Platforms Inc.", + side="sell", + value_min=50001, + value_max=100000, + transaction_date="2024-11-20", + disclosure_date="2024-12-05", + ) + + # Example: Add trades for Mitt Romney + logger.info("Adding trades for Mitt Romney...") + + add_trade( + session, + official_name="Mitt Romney", + party="Republican", + chamber="Senate", + state="UT", + ticker="BRK.B", + company_name="Berkshire Hathaway Inc.", + side="buy", + value_min=100001, + value_max=250000, + transaction_date="2024-10-01", + disclosure_date="2024-10-15", + ) + + session.commit() + logger.info("āœ… All trades added successfully!") + + # Show summary + from sqlalchemy import text + result = session.execute(text(""" + SELECT o.name, COUNT(t.id) as trade_count + FROM officials o + LEFT JOIN trades t ON o.id = t.official_id + GROUP BY o.name + ORDER BY trade_count DESC + """)) + + print("\n=== Officials Summary ===") + for row in result: + print(f" {row[0]:25s} - {row[1]} trades") + + +if __name__ == "__main__": + main() + diff --git a/scripts/daily_update.sh b/scripts/daily_update.sh new file mode 100644 index 0000000..d600b6b --- /dev/null +++ b/scripts/daily_update.sh @@ -0,0 +1,76 @@ +#!/bin/bash +# Daily update script for POTE +# Run this via cron to automatically fetch new data + +set -e + +# Configuration +POTE_DIR="/home/poteapp/pote" +LOG_DIR="/home/poteapp/logs" +LOG_FILE="$LOG_DIR/daily_update_$(date +%Y%m%d).log" + +# Ensure log directory exists +mkdir -p "$LOG_DIR" + +echo "=== POTE Daily Update: $(date) ===" | tee -a "$LOG_FILE" + +cd "$POTE_DIR" +source venv/bin/activate + +# 1. Fetch new congressional trades (if House Stock Watcher is back up) +echo "[1/4] Fetching congressional trades..." | tee -a "$LOG_FILE" +if python scripts/fetch_congressional_trades.py --days 7 >> "$LOG_FILE" 2>&1; then + echo "āœ“ Trades fetched successfully" | tee -a "$LOG_FILE" +else + echo "āœ— Trade fetch failed (API might be down)" | tee -a "$LOG_FILE" +fi + +# 2. Enrich any new securities +echo "[2/4] Enriching securities..." | tee -a "$LOG_FILE" +if python scripts/enrich_securities.py >> "$LOG_FILE" 2>&1; then + echo "āœ“ Securities enriched" | tee -a "$LOG_FILE" +else + echo "āœ— Security enrichment failed" | tee -a "$LOG_FILE" +fi + +# 3. Update prices for all securities +echo "[3/4] Fetching price data..." | tee -a "$LOG_FILE" +if python scripts/fetch_sample_prices.py >> "$LOG_FILE" 2>&1; then + echo "āœ“ Prices updated" | tee -a "$LOG_FILE" +else + echo "āœ— Price fetch failed" | tee -a "$LOG_FILE" +fi + +# 4. Generate summary +echo "[4/4] Generating summary..." | tee -a "$LOG_FILE" +python << 'EOF' | tee -a "$LOG_FILE" +from sqlalchemy import text +from pote.db import engine +from datetime import datetime, timedelta + +with engine.connect() as conn: + # Get counts + officials = conn.execute(text("SELECT COUNT(*) FROM officials")).scalar() + trades = conn.execute(text("SELECT COUNT(*) FROM trades")).scalar() + securities = conn.execute(text("SELECT COUNT(*) FROM securities")).scalar() + + # Get new trades in last 7 days + week_ago = (datetime.now() - timedelta(days=7)).strftime('%Y-%m-%d') + new_trades = conn.execute( + text(f"SELECT COUNT(*) FROM trades WHERE created_at >= '{week_ago}'") + ).scalar() + + print(f"\nšŸ“Š Database Summary:") + print(f" Officials: {officials:,}") + print(f" Securities: {securities:,}") + print(f" Trades: {trades:,}") + print(f" New (7d): {new_trades:,}") +EOF + +echo "" | tee -a "$LOG_FILE" +echo "=== Update Complete: $(date) ===" | tee -a "$LOG_FILE" +echo "" | tee -a "$LOG_FILE" + +# Keep only last 30 days of logs +find "$LOG_DIR" -name "daily_update_*.log" -mtime +30 -delete + diff --git a/scripts/scrape_alternative_sources.py b/scripts/scrape_alternative_sources.py new file mode 100644 index 0000000..a011a87 --- /dev/null +++ b/scripts/scrape_alternative_sources.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 +""" +Scrape congressional trades from alternative sources. +Options: +1. Senate Stock Watcher (if available) +2. QuiverQuant (requires API key) +3. Capitol Trades (web scraping - be careful) +4. Manual CSV import +""" + +import csv +import logging +from datetime import datetime +from pathlib import Path + +from pote.db import get_session +from pote.ingestion.trade_loader import TradeLoader + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +def import_from_csv(csv_path: str): + """ + Import trades from CSV file. + + CSV format: + name,party,chamber,state,ticker,side,value_min,value_max,transaction_date,disclosure_date + """ + + logger.info(f"Reading trades from {csv_path}") + + with open(csv_path, 'r') as f: + reader = csv.DictReader(f) + transactions = [] + + for row in reader: + # Convert CSV row to transaction format + txn = { + "representative": row["name"], + "party": row["party"], + "house": row["chamber"], # "House" or "Senate" + "state": row.get("state", ""), + "district": row.get("district", ""), + "ticker": row["ticker"], + "transaction": row["side"].capitalize(), # "Purchase" or "Sale" + "amount": f"${row['value_min']} - ${row['value_max']}", + "transaction_date": row["transaction_date"], + "disclosure_date": row.get("disclosure_date", row["transaction_date"]), + } + transactions.append(txn) + + logger.info(f"Loaded {len(transactions)} transactions from CSV") + + # Ingest into database + with next(get_session()) as session: + loader = TradeLoader(session) + stats = loader.ingest_transactions(transactions, source="csv_import") + + logger.info(f"āœ… Ingested: {stats['officials_created']} officials, " + f"{stats['securities_created']} securities, " + f"{stats['trades_ingested']} trades") + + +def create_sample_csv(output_path: str = "trades_template.csv"): + """Create a template CSV file for manual entry.""" + + template_data = [ + { + "name": "Bernie Sanders", + "party": "Independent", + "chamber": "Senate", + "state": "VT", + "district": "", + "ticker": "COIN", + "side": "sell", + "value_min": "15001", + "value_max": "50000", + "transaction_date": "2024-12-01", + "disclosure_date": "2024-12-15", + }, + { + "name": "Alexandria Ocasio-Cortez", + "party": "Democrat", + "chamber": "House", + "state": "NY", + "district": "NY-14", + "ticker": "PLTR", + "side": "buy", + "value_min": "1001", + "value_max": "15000", + "transaction_date": "2024-11-15", + "disclosure_date": "2024-12-01", + }, + ] + + with open(output_path, 'w', newline='') as f: + writer = csv.DictWriter(f, fieldnames=template_data[0].keys()) + writer.writeheader() + writer.writerows(template_data) + + logger.info(f"āœ… Created template CSV: {output_path}") + logger.info("Edit this file and run: python scripts/scrape_alternative_sources.py import ") + + +def main(): + """Main entry point.""" + import sys + + if len(sys.argv) < 2: + print("Usage:") + print(" python scripts/scrape_alternative_sources.py template # Create CSV template") + print(" python scripts/scrape_alternative_sources.py import # Import from CSV") + sys.exit(1) + + command = sys.argv[1] + + if command == "template": + create_sample_csv() + elif command == "import": + if len(sys.argv) < 3: + print("Error: Please specify CSV file to import") + sys.exit(1) + import_from_csv(sys.argv[2]) + else: + print(f"Unknown command: {command}") + sys.exit(1) + + +if __name__ == "__main__": + main() +