#!/usr/bin/env python3 """ wa_extractor.py — WhatsApp Pending Requests → wa.me Link Generator Paste raw HTML from the pending requests div, extract all phone numbers, output batched wa.me links ready for community admin invites. """ import re import tkinter as tk from tkinter import filedialog, messagebox, scrolledtext from pathlib import Path # ── Optional: use BeautifulSoup if available, else fall back to raw regex ── try: from bs4 import BeautifulSoup HAS_BS4 = True except ImportError: HAS_BS4 = False # ───────────────────────────────────────────── # CORE EXTRACTION LOGIC # ───────────────────────────────────────────── # Matches international phone numbers: optional +, country code, digits/spaces/hyphens # Handles formats like: +27821234567 +27 82 123 4567 +1-800-555-0199 0027821234567 PHONE_REGEX = re.compile( r'(? str: return re.sub(r'\D', '', s) def extract_numbers_from_html(html: str) -> list[str]: """Extract and normalise phone numbers from raw HTML.""" candidates = [] if HAS_BS4: soup = BeautifulSoup(html, 'html.parser') text = soup.get_text(separator=' ') # Also check all attribute values (title, aria-label, data-*) for tag in soup.find_all(True): for attr_val in tag.attrs.values(): if isinstance(attr_val, str): text += ' ' + attr_val elif isinstance(attr_val, list): text += ' ' + ' '.join(attr_val) else: # Strip HTML tags with regex if bs4 not available text = re.sub(r'<[^>]+>', ' ', html) seen = set() for match in PHONE_REGEX.finditer(text): raw = match.group(0) digits = strip_non_digits(raw) # Normalise 00XX → +XX if digits.startswith('00'): digits = digits[2:] # Sanity check: 7–15 digits (E.164 range) if 7 <= len(digits) <= 15: key = digits if key not in seen: seen.add(key) candidates.append('+' + digits) return candidates def format_output(numbers: list[str], batch_size: int = 20) -> str: """Format numbers as wa.me links, with a separator every batch_size entries.""" lines = [] for i, num in enumerate(numbers): lines.append(f"https://wa.me/{num}") if (i + 1) % batch_size == 0 and i + 1 < len(numbers): lines.append('') lines.append('─' * 40) lines.append('') return '\n'.join(lines) # ───────────────────────────────────────────── # TKINTER UI # ───────────────────────────────────────────── BG = '#0e0e0e' BG_PANEL = '#161616' BG_INPUT = '#1a1a1a' ACCENT = '#25d366' # WhatsApp green ACCENT2 = '#128c7e' TEXT = '#e8e8e8' TEXT_DIM = '#666666' BORDER = '#2a2a2a' FONT_MONO = ('JetBrains Mono', 10) if True else ('Courier New', 10) FONT_UI = ('JetBrains Mono', 10) FONT_TITLE = ('JetBrains Mono', 15, 'bold') FONT_SMALL = ('JetBrains Mono', 8) class App(tk.Tk): def __init__(self): super().__init__() self.title('wa.me Extractor') self.configure(bg=BG) self.resizable(True, True) self.minsize(740, 600) self._html_path: Path | None = None self._numbers: list[str] = [] self._build_ui() self._center() def _center(self): self.update_idletasks() w, h = 860, 680 sw = self.winfo_screenwidth() sh = self.winfo_screenheight() self.geometry(f'{w}x{h}+{(sw-w)//2}+{(sh-h)//2}') # ── Layout ────────────────────────────── def _build_ui(self): self.columnconfigure(0, weight=1) self.rowconfigure(2, weight=1) # ── Header ── hdr = tk.Frame(self, bg=BG, pady=18) hdr.grid(row=0, column=0, sticky='ew', padx=28) tk.Label(hdr, text='⬡ wa.me extractor', font=FONT_TITLE, fg=ACCENT, bg=BG).pack(side='left') self._bs4_badge(hdr) # ── Controls row ── ctrl = tk.Frame(self, bg=BG_PANEL, pady=14, padx=20) ctrl.grid(row=1, column=0, sticky='ew', padx=0) ctrl.columnconfigure(1, weight=1) tk.Label(ctrl, text='HTML source:', font=FONT_UI, fg=TEXT_DIM, bg=BG_PANEL).grid(row=0, column=0, sticky='w', padx=(0,10)) self._path_var = tk.StringVar(value='no file selected') tk.Label(ctrl, textvariable=self._path_var, font=FONT_SMALL, fg=TEXT_DIM, bg=BG_PANEL, anchor='w').grid(row=0, column=1, sticky='ew') btn_open = self._btn(ctrl, '📂 open .txt', self._open_file) btn_open.grid(row=0, column=2, padx=(12,6)) btn_run = self._btn(ctrl, '⚡ extract', self._run, primary=True) btn_run.grid(row=0, column=3, padx=(0,0)) # Batch size tk.Label(ctrl, text=' batch size:', font=FONT_UI, fg=TEXT_DIM, bg=BG_PANEL).grid(row=0, column=4, padx=(18,6)) self._batch_var = tk.StringVar(value='20') batch_spin = tk.Spinbox(ctrl, from_=5, to=50, increment=5, textvariable=self._batch_var, width=4, font=FONT_UI, bg=BG_INPUT, fg=ACCENT, insertbackground=ACCENT, buttonbackground=BG_INPUT, relief='flat', highlightthickness=1, highlightcolor=BORDER, highlightbackground=BORDER) batch_spin.grid(row=0, column=5, padx=(0,2)) # ── Main panes ── panes = tk.Frame(self, bg=BG) panes.grid(row=2, column=0, sticky='nsew', padx=20, pady=(0,12)) panes.columnconfigure(0, weight=1) panes.columnconfigure(2, weight=1) panes.rowconfigure(1, weight=1) # Left — raw HTML input self._label(panes, 'paste html / or open file above', 0, 0) self._html_box = self._text_area(panes, 1, 0) # Divider div = tk.Frame(panes, bg=BORDER, width=1) div.grid(row=0, column=1, rowspan=2, sticky='ns', padx=10) # Right — output self._out_label_var = tk.StringVar(value='wa.me links') self._label_var(panes, self._out_label_var, 0, 2) self._out_box = self._text_area(panes, 1, 2, editable=False) # ── Footer ── foot = tk.Frame(self, bg=BG, pady=8) foot.grid(row=3, column=0, sticky='ew', padx=20) foot.columnconfigure(0, weight=1) self._status_var = tk.StringVar(value='ready — open an HTML dump or paste below') tk.Label(foot, textvariable=self._status_var, font=FONT_SMALL, fg=TEXT_DIM, bg=BG, anchor='w').grid(row=0, column=0, sticky='w') btn_copy = self._btn(foot, '⎘ copy all', self._copy_output) btn_copy.grid(row=0, column=1, padx=(6,4)) btn_save = self._btn(foot, '💾 save .txt', self._save_output, primary=True) btn_save.grid(row=0, column=2) # ── Helpers ───────────────────────────── def _btn(self, parent, text, cmd, primary=False): fg = BG if primary else TEXT bg = ACCENT if primary else BG_INPUT abg = ACCENT2 if primary else BORDER b = tk.Button(parent, text=text, command=cmd, font=FONT_UI, fg=fg, bg=bg, activeforeground=fg, activebackground=abg, relief='flat', padx=12, pady=5, cursor='hand2', bd=0) return b def _label(self, parent, text, row, col): tk.Label(parent, text=text.upper(), font=FONT_SMALL, fg=TEXT_DIM, bg=BG, anchor='w' ).grid(row=row, column=col, sticky='w', pady=(0,4)) def _label_var(self, parent, var, row, col): tk.Label(parent, textvariable=var, font=FONT_SMALL, fg=TEXT_DIM, bg=BG, anchor='w' ).grid(row=row, column=col, sticky='w', pady=(0,4)) def _text_area(self, parent, row, col, editable=True): frame = tk.Frame(parent, bg=BORDER, padx=1, pady=1) frame.grid(row=row, column=col, sticky='nsew') frame.rowconfigure(0, weight=1) frame.columnconfigure(0, weight=1) t = scrolledtext.ScrolledText( frame, font=FONT_MONO, bg=BG_INPUT, fg=TEXT, insertbackground=ACCENT, selectbackground=ACCENT2, relief='flat', bd=0, padx=10, pady=10, wrap='none', state='normal' if editable else 'disabled' ) t.grid(row=0, column=0, sticky='nsew') return t def _bs4_badge(self, parent): colour = ACCENT if HAS_BS4 else '#e05252' label = 'bs4 ✓' if HAS_BS4 else 'bs4 ✗ (regex fallback)' tk.Label(parent, text=label, font=FONT_SMALL, fg=colour, bg=BG).pack(side='right', padx=4) # ── Actions ───────────────────────────── def _open_file(self): path = filedialog.askopenfilename( title='Select HTML dump (.txt)', filetypes=[('Text files', '*.txt'), ('All files', '*.*')] ) if not path: return self._html_path = Path(path) self._path_var.set(str(self._html_path)) content = self._html_path.read_text(encoding='utf-8', errors='replace') self._html_box.delete('1.0', tk.END) self._html_box.insert('1.0', content) self._status_var.set(f'loaded {len(content):,} chars — hit ⚡ extract') def _run(self): html = self._html_box.get('1.0', tk.END).strip() if not html: messagebox.showwarning('Nothing to parse', 'Paste HTML or open a .txt file first.') return try: batch = int(self._batch_var.get()) except ValueError: batch = 20 self._numbers = extract_numbers_from_html(html) count = len(self._numbers) if count == 0: self._status_var.set('⚠ no phone numbers found — check your HTML') messagebox.showinfo('No numbers found', 'Could not find any international phone numbers.\n' 'Make sure the HTML includes the +XX prefixed numbers.') return output = format_output(self._numbers, batch_size=batch) batches = (count + batch - 1) // batch self._out_box.config(state='normal') self._out_box.delete('1.0', tk.END) self._out_box.insert('1.0', output) self._out_box.config(state='disabled') self._out_label_var.set(f'wa.me links [{count} numbers · {batches} batches]') self._status_var.set( f'✓ extracted {count} numbers → {batches} batches of ≤{batch}' ) def _copy_output(self): text = self._out_box.get('1.0', tk.END).strip() if not text: self._status_var.set('nothing to copy yet') return self.clipboard_clear() self.clipboard_append(text) self._status_var.set('✓ copied to clipboard') def _save_output(self): text = self._out_box.get('1.0', tk.END).strip() if not text: messagebox.showwarning('Nothing to save', 'Run extraction first.') return path = filedialog.asksaveasfilename( title='Save wa.me links', defaultextension='.txt', initialfile='wame_links.txt', filetypes=[('Text files', '*.txt'), ('All files', '*.*')] ) if not path: return Path(path).write_text(text, encoding='utf-8') self._status_var.set(f'✓ saved → {path}') # ───────────────────────────────────────────── # ENTRY POINT # ───────────────────────────────────────────── if __name__ == '__main__': app = App() app.mainloop()