banner(“PASO 3: Creación del marco de datos de análisis”) def process_example(ex): traj = normalize_trajectory(ex.get(“trajectory”)) rc = role_counts(traj) nf, add, dele, _files, _exts = parse_patch(ex.get(“model_patch”)) meta = normalize_metadata(ex.get(“metadata”)) full_text = “\n”.join(message_text(m) for m in traj) return { “instance_id”: ex.get(“instance_id”), “repo”: ex.get(“repo”), “idioma”: (ex.get(“idioma”) o “desconocido”).lower(), “licencia”: ex.get(“licencia”), “resuelto”: ex.get(“resuelto”), “agente”: ex.get(“_agent”), “model”: ex.get(“_model”), “n_messages”: len(traj), “n_system”: rc.get(“system”, 0), “n_user”: rc.get(“user”, 0), “n_assistant”: rc.get(“assistant”, 0), “n_tool”: rc.get(“tool”, 0), “patch_files”: nf, “patch_add”: agregar, “patch_del”: eliminar, “patch_churn”: agregar + eliminar, “traj_tokens”: count_tokens(full_text), “categoría”: meta.get(“categoría”), “meta_files”: meta.get(“num_modified_files”), “meta_lines”: meta.get(“num_modified_lines”), “_tools”: extraer_tool_names(traj), } registros = [process_example(ex) for ex in raw_rows]
df = pd.DataFrame(registros) df[“is_resolved”] = (gl[“resolved”] == 1) gl[“known_label”] = gl[“resolved”].isin([0, 1]) print(f”Marco de datos: {df.shape[0]} filas x {df.forma[1]} cols”) print(“\nResumen numérico:”) print(df[[“n_messages”, “n_assistant”, “n_tool”,
“patch_files”, “patch_churn”, “traj_tokens”]].describe().ronda(1))
df = pd.DataFrame(registros) df[“is_resolved”] = (gl[“resolved”] == 1) gl[“known_label”] = gl[“resolved”].isin([0, 1]) print(f”Marco de datos: {df.shape[0]} filas x {df.forma[1]} cols”) print(“\nResumen numérico:”) print(df[[“n_messages”, “n_assistant”, “n_tool”,
“patch_files”, “patch_churn”, “traj_tokens”]].describe().ronda(1))