from __future__ import annotations import json import re from typing import Any, Dict, Optional, Tuple # Reuse executor's registry for original (untrimmed) requests try: from agentui.pipeline.executor import register_http_request as _reg_http_req # type: ignore except Exception: # pragma: no cover _reg_http_req = None # type: ignore # -------- HTTP editable text parser (safe) -------- def parse_editable_http(s: str) -> Tuple[str, str, Dict[str, str], str]: """ Parse text pasted from Request area into (method, url, headers, body_text). Stops header parsing when a line is not a valid HTTP header key (prevents treating JSON like '"contents": ...' as header). """ method, url = "POST", "" headers: Dict[str, str] = {} body = "" try: if not isinstance(s, str) or not s.strip(): return method, url, headers, body txt = s.replace("\r\n", "\n") lines = txt.split("\n") if not lines: return method, url, headers, body first = (lines[0] or "").strip() m = re.match(r"^([A-Z]+)\s+(\S+)(?:\s+HTTP/\d+(?:\.\d+)?)?$", first) i = 1 if m: method = (m.group(1) or "POST").strip().upper() url = (m.group(2) or "").strip() else: i = 0 # no start-line -> treat as headers/body only def _is_header_line(ln: str) -> bool: if ":" not in ln: return False name = ln.split(":", 1)[0].strip() # HTTP token: only letters/digits/hyphen. Prevents JSON keys like "contents": from being treated as headers. return bool(re.fullmatch(r"[A-Za-z0-9\-]+", name)) # Read headers until blank line OR until line not looking like header (start of body) while i < len(lines): ln = lines[i] if ln.strip() == "": i += 1 break if not _is_header_line(ln): break k, v = ln.split(":", 1) headers[str(k).strip()] = str(v).strip() i += 1 # Remainder is body (JSON or text) body = "\n".join(lines[i:]) if i < len(lines) else "" except Exception: pass return method, url, headers, body # -------- Headers helpers -------- def dedupe_headers(h: Dict[str, Any]) -> Dict[str, Any]: """ Case-insensitive dedupe; drop Host/Content-Length (httpx will set proper). Last value wins. """ try: dedup: Dict[str, Tuple[str, Any]] = {} for k, v in (h or {}).items(): lk = str(k).strip().lower() if lk in {"host", "content-length"}: continue dedup[lk] = (k, v) return {orig_k: val for (_, (orig_k, val)) in dedup.items()} except Exception: return dict(h or {}) def content_type_is_json(h: Dict[str, Any]) -> bool: try: return any(str(k).lower() == "content-type" and "json" in str(v).lower() for k, v in (h or {}).items()) except Exception: return False # -------- JSON parsing & normalization helpers -------- def try_parse_json(s: Any) -> Optional[Any]: try: if isinstance(s, (dict, list)): return s if isinstance(s, str) and s.strip(): return json.loads(s) except Exception: return None return None def normalize_jsonish_text(s: Any) -> str: """ Normalize JSON-looking text safely: - If whole text is a quoted JSON string, decode via json.loads to inner string. - Replace visible \\n/\\r/\\t outside JSON string literals with real control chars. - Escape raw CR/LF/TAB inside JSON string literals as \\n/\\r/\\t to keep JSON valid. """ try: txt = str(s if s is not None else "") except Exception: return "" # If whole text looks like a quoted JSON string: decode to inner string try: if len(txt) >= 2 and txt[0] == '"' and txt[-1] == '"': v = json.loads(txt) if isinstance(v, str): txt = v except Exception: pass out_chars = [] i = 0 n = len(txt) in_str = False esc = False while i < n: ch = txt[i] if in_str: # escape raw control chars within JSON string literal if ch == "\r": # CRLF -> \n if (i + 1) < n and txt[i + 1] == "\n": out_chars.append("\\n") i += 2 esc = False continue out_chars.append("\\r") i += 1 esc = False continue if ch == "\n": out_chars.append("\\n") i += 1 esc = False continue if ch == "\t": out_chars.append("\\t") i += 1 esc = False continue out_chars.append(ch) if esc: esc = False else: if ch == "\\": esc = True elif ch == '"': in_str = False i += 1 continue # not in string literal if ch == '"': in_str = True out_chars.append(ch) i += 1 continue if ch == "\\" and (i + 1) < n: nx = txt[i + 1] if nx == "n": out_chars.append("\n") i += 2 continue if nx == "r": out_chars.append("\r") i += 2 continue if nx == "t": out_chars.append("\t") i += 2 continue out_chars.append(ch) i += 1 return "".join(out_chars) def extract_json_trailing(s: str) -> Optional[Any]: """ Pull trailing JSON object/array from mixed text: - Try whole text first - Then scan from last '{' or '[' backward. """ try: if not isinstance(s, str): return None txt = s.strip() try: return json.loads(txt) except Exception: pass idx = txt.rfind("{") while idx >= 0: seg = txt[idx:] try: return json.loads(seg) except Exception: idx = txt.rfind("{", 0, idx) idx = txt.rfind("[") while idx >= 0: seg = txt[idx:] try: return json.loads(seg) except Exception: idx = txt.rfind("[", 0, idx) return None except Exception: return None def global_unescape_jsonish(s: str) -> str: """ Last-resort: unicode_escape decode to convert \\n -> \n, \\" -> ", \\\\ -> \, \\uXXXX -> char, etc. """ try: import codecs as _codecs return _codecs.decode(s, "unicode_escape") except Exception: try: return ( s.replace("\\n", "\n") .replace("\\r", "\r") .replace("\\t", "\t") .replace('\\"', '"') .replace("\\\\", "\\") ) except Exception: return s def looks_jsonish(txt: Any) -> bool: try: s = str(txt or "") if "{" in s or "[" in s: return True # also patterns like key: return bool(re.search(r'\s["\']?[A-Za-z0-9_\-]+["\']?\s*:', s)) except Exception: return False def deep_merge_dicts(a: Any, b: Any) -> Any: """ Merge dicts (b over a, recursively). Lists or non-dicts are replaced by b. """ if isinstance(a, dict) and isinstance(b, dict): out = dict(a) for k, v in b.items(): if (k in a) and isinstance(a.get(k), dict) and isinstance(v, dict): out[k] = deep_merge_dicts(a.get(k), v) else: out[k] = v return out return b # ---- Trim-aware merge that preserves original binary/base64 fields ---- def is_trimmed_b64_string(s: Any) -> bool: try: if not isinstance(s, str): return False return "(trimmed " in s except Exception: return False def looks_base64ish(s: Any) -> bool: try: if not isinstance(s, str) or len(s) < 64: return False return bool(re.fullmatch(r"[A-Za-z0-9+/=\r\n]+", s)) except Exception: return False def merge_lists_preserving_b64(orig_list: Any, edited_list: Any) -> Any: """ Merge lists with base64-trimmed preservation but DO NOT pad from original: - Result length equals edited_list length (indices beyond edited are dropped). - At each index: * If edited value is a trimmed placeholder string and original has a string → keep original. * If both dicts → recurse via deep_merge_preserving_b64. * If both lists → recurse via merge_lists_preserving_b64. * Else → take edited value as-is. """ if not isinstance(edited_list, list): return edited_list if not isinstance(orig_list, list): orig_list = [] out = [] for i, ev in enumerate(edited_list): ov = orig_list[i] if i < len(orig_list) else None if isinstance(ev, str) and is_trimmed_b64_string(ev) and isinstance(ov, str): out.append(ov) elif isinstance(ev, dict) and isinstance(ov, dict): out.append(deep_merge_preserving_b64(ov, ev)) elif isinstance(ev, list) and isinstance(ov, list): out.append(merge_lists_preserving_b64(ov, ev)) else: out.append(ev) return out def deep_merge_preserving_b64(orig: Any, edited: Any) -> Any: """ Merge preserving original base64/data_url only for trimmed placeholders, with strict edited-shape: - If edited is a trimmed placeholder string and orig is a string → keep orig. - Dicts: RESULT CONTAINS ONLY KEYS FROM EDITED. Keys missing in edited are treated as deleted. For each present key: recurse (dict/list) or take edited value; for trimmed strings keep orig. - Lists: delegate to merge_lists_preserving_b64 (result length = edited length). - Other types: replace with edited. """ if isinstance(edited, str) and is_trimmed_b64_string(edited) and isinstance(orig, str): return orig if isinstance(orig, dict) and isinstance(edited, dict): out: Dict[str, Any] = {} for k, ev in edited.items(): ov = orig.get(k) if isinstance(ev, str) and is_trimmed_b64_string(ev) and isinstance(ov, str): out[k] = ov elif isinstance(ev, dict) and isinstance(ov, dict): out[k] = deep_merge_preserving_b64(ov, ev) elif isinstance(ev, list) and isinstance(ov, list): out[k] = merge_lists_preserving_b64(ov, ev) else: out[k] = ev return out if isinstance(orig, list) and isinstance(edited, list): return merge_lists_preserving_b64(orig, edited) return edited def salvage_json_for_send( edited_body_text: Any, headers: Dict[str, Any], orig_json: Optional[Any], prefer_registry_original: bool = True, ) -> Tuple[Optional[Any], Optional[str]]: """ Build (final_json, final_text) for outgoing request body. Strategy: - Normalize text for JSON. - Try parse; then try trailing extract; then unicode_escape unescape and retry. - If prefer_registry_original=True and orig_json present: * If edited_json present: deep-merge with base64 preservation, but ONLY keep keys present in edited; lists are limited to the edited length (no padding from original). * If not: DO NOT resurrect original. Empty/whitespace → send empty text; otherwise send raw text as-is. - Else: * If edited_json present => final_json = edited_json * Else: if content-type is json and orig_json present => final_json = orig_json else send raw text. """ # Normalize and attempt parse norm = normalize_jsonish_text(edited_body_text) edited_json = try_parse_json(norm) if edited_json is None: edited_json = extract_json_trailing(norm) if edited_json is None: ue = global_unescape_jsonish(str(edited_body_text or "")) if isinstance(ue, str) and ue != edited_body_text: ue_norm = normalize_jsonish_text(ue) edited_json = try_parse_json(ue_norm) or extract_json_trailing(ue_norm) json_ct = content_type_is_json(headers) # Prefer original registry JSON where applicable if prefer_registry_original and orig_json is not None: if edited_json is None: # Respect full manual control: do NOT resurrect original JSON. # Empty/whitespace → send empty text; otherwise send raw text as-is. if isinstance(norm, str) and not norm.strip(): return None, "" else: return None, str(edited_body_text or "") else: # Merge edits over original with trimmed-b64 preservation, but keep only keys present in edited # and limit lists to the edited length. return deep_merge_preserving_b64(orig_json, edited_json), None # No prefer or no orig_json if edited_json is not None: return edited_json, None if json_ct and orig_json is not None: # Hard salvage for declared JSON payloads maybe = try_parse_json(norm) or extract_json_trailing(norm) return (maybe if maybe is not None else orig_json), None # Plain text fallback return None, str(edited_body_text or "") # -------- Registry wrapper -------- def register_manual_request(req_id: str, info: Dict[str, Any]) -> None: try: if _reg_http_req: _reg_http_req(req_id, info) except Exception: pass