A-A+
Burp Suite插件:美化JSON/JSONP、解码Base64和Unicode

【注意:此文章为博主原创文章!转载需注意,请带原文链接,至少也要是txt格式!】
此版本主要编码问题修复
- 智能编码检测:
- 从HTTP响应头的
Content-Type
中提取charset
信息 - 按优先级尝试多种编码(UTF-8, GBK, GB2312, Big5等)
- 验证解码结果的有效性
- 从HTTP响应头的
- 中文字符处理:
- 在JSON格式化时使用
ensure_ascii=False
保持中文字符 - 扩展可打印字符检测范围,包含中文字符
- 智能处理Base64解码后的中文内容
- 在JSON格式化时使用
- 乱码检测:
- 检测常见乱码模式如"锟斤拷"
- 统计替换字符比例判断解码质量
- 失败时使用错误替换而非抛出异常
- 编码保持:
- 处理完成后使用原始编码重新编码
- 避免在处理过程中丢失编码信息
最新的这个版本应该能够很好地处理中文等多字节字符,避免出现乱码问题。
请把以下内容保存为python脚本,然后通过burp加载即可。
# -*- coding:utf-8 -*-
# __ver__ = python2/jython compatible
from burp import IBurpExtender
from burp import IHttpListener
import re
import json
import base64
import sys
try:
import jsbeautifier
JS_BEAUTIFIER_AVAILABLE = True
except ImportError:
JS_BEAUTIFIER_AVAILABLE = False
print("WARNING: jsbeautifier not available. JSON formatting will be basic.")
class BurpExtender(IBurpExtender, IHttpListener):
"""
Burp Suite插件:美化JSON/JSONP、解码Base64和Unicode
支持的工具:Proxy, Scanner, Intruder, Repeater
"""
def __init__(self):
self.python_version = sys.version_info[0]
self.max_body_size = 150000 # 最大处理body大小
self.min_json_size = 5 # 最小JSON大小
# 常见的编码列表,按优先级排序
self.encoding_list = ['utf-8', 'gbk', 'gb2312', 'big5', 'iso-8859-1', 'windows-1252']
def registerExtenderCallbacks(self, callbacks):
self._helpers = callbacks.getHelpers()
self._callbacks = callbacks
# 设置插件名称
self._callbacks.setExtensionName("Enhanced JSON-Base64-Unicode Beautifier")
# 注册HTTP监听器
callbacks.registerHttpListener(self)
# 设置JS Beautifier选项
if JS_BEAUTIFIER_AVAILABLE:
self.js_opts = jsbeautifier.default_options()
self.js_opts.indent_size = 2
print("Enhanced JSON-Base64-Unicode Beautifier loaded successfully!")
def processHttpMessage(self, toolFlag, messageIsRequest, messageInfo):
"""
处理HTTP消息
toolFlag: 4=Proxy, 16=Scanner, 32=Intruder, 64=Repeater
"""
# 只处理指定工具的响应
if toolFlag not in [4, 16, 32, 64] or messageIsRequest:
return
try:
response = messageInfo.getResponse()
if not response:
return
analyzed_response = self._helpers.analyzeResponse(response)
headers = analyzed_response.getHeaders()
body_offset = analyzed_response.getBodyOffset()
body = response[body_offset:]
if not body or len(body) == 0:
return
# 检测并转换为正确编码的字符串
body_string, detected_encoding = self._smart_decode_bytes(body, headers)
if len(body_string) > self.max_body_size:
self._print_debug("Body too large (" + str(len(body_string)) + " bytes), skipping processing")
return
# 处理响应体
processed_body = self._process_response_body(body_string, headers)
if processed_body != body_string:
# 将处理后的内容转换回字节数组,保持原始编码
processed_bytes = self._encode_string_to_bytes(processed_body, detected_encoding)
new_response = self._helpers.buildHttpMessage(headers, processed_bytes)
messageInfo.setResponse(new_response)
self._print_debug("Response updated with processed content (encoding: " + detected_encoding + ")")
except Exception as e:
self._print_error("Error processing HTTP message: " + str(e))
def _smart_decode_bytes(self, byte_data, headers):
"""智能解码字节数据为字符串"""
if not byte_data:
return "", "utf-8"
# 首先尝试从HTTP头中获取编码信息
detected_encoding = self._get_encoding_from_headers(headers)
# 如果从头部获取到编码,优先使用
if detected_encoding:
try:
if hasattr(byte_data, 'tostring'):
raw_string = byte_data.tostring()
else:
raw_string = str(byte_data)
# 尝试用检测到的编码解码
if isinstance(raw_string, str):
# 在Jython中,需要先转为bytes再解码
decoded_string = raw_string.decode(detected_encoding)
return decoded_string, detected_encoding
else:
return raw_string, detected_encoding
except Exception as e:
self._print_debug("Failed to decode with detected encoding " + detected_encoding + ": " + str(e))
# 如果头部检测失败,尝试多种编码
if hasattr(byte_data, 'tostring'):
raw_string = byte_data.tostring()
else:
raw_string = str(byte_data)
# 尝试不同的编码
for encoding in self.encoding_list:
try:
if isinstance(raw_string, str):
decoded_string = raw_string.decode(encoding)
# 简单验证:检查是否有明显的乱码
if self._is_valid_decoded_string(decoded_string):
self._print_debug("Successfully decoded with encoding: " + encoding)
return decoded_string, encoding
else:
return raw_string, encoding
except Exception:
continue
# 如果所有编码都失败,使用UTF-8并忽略错误
try:
if isinstance(raw_string, str):
decoded_string = raw_string.decode('utf-8', 'replace')
else:
decoded_string = raw_string
self._print_debug("Using UTF-8 with error replacement")
return decoded_string, 'utf-8'
except Exception:
return str(raw_string), 'utf-8'
def _get_encoding_from_headers(self, headers):
"""从HTTP头中提取编码信息"""
for header in headers:
header_lower = header.lower()
if header_lower.startswith("content-type:"):
# 查找charset参数
charset_match = re.search(r'charset=([^;\s]+)', header_lower)
if charset_match:
charset = charset_match.group(1).strip('"\'')
# 标准化编码名称
if charset in ['gb2312', 'gbk', 'gb18030']:
return 'gbk'
elif charset in ['utf-8', 'utf8']:
return 'utf-8'
elif charset in ['big5', 'big5-hkscs']:
return 'big5'
elif charset in ['iso-8859-1', 'latin-1']:
return 'iso-8859-1'
else:
return charset
return None
def _is_valid_decoded_string(self, decoded_string):
"""简单验证解码后的字符串是否有效"""
if not decoded_string:
return True
# 检查是否有过多的替换字符(乱码指示)
replacement_char_count = decoded_string.count(u'\ufffd')
if len(decoded_string) > 0 and float(replacement_char_count) / len(decoded_string) > 0.1:
return False
# 检查是否有明显的乱码模式
if '锟斤拷' in decoded_string or '????' in decoded_string:
return False
return True
def _encode_string_to_bytes(self, string_data, encoding):
"""将字符串编码为字节数组"""
try:
if isinstance(string_data, unicode):
encoded_data = string_data.encode(encoding, 'replace')
else:
encoded_data = str(string_data)
return encoded_data
except Exception as e:
self._print_debug("Error encoding string: " + str(e))
try:
return string_data.encode('utf-8', 'replace')
except:
return str(string_data)
def _process_response_body(self, body_string, headers):
"""处理响应体内容"""
try:
# 检查是否为JSON内容
is_json_content = self._is_json_content(body_string, headers)
# 1. 尝试解析和格式化JSON/JSONP
if is_json_content:
body_string = self._beautify_json(body_string)
# 2. 解码Base64内容
body_string = self._decode_base64(body_string)
# 3. 解码Unicode内容
body_string = self._decode_unicode(body_string)
return body_string
except Exception as e:
self._print_error("Error processing response body: " + str(e))
return body_string
def _is_json_content(self, body_string, headers):
"""判断是否为JSON内容"""
# 检查Content-Type头
for header in headers:
if header.lower().startswith("content-type:"):
content_type = header.lower()
if any(ct in content_type for ct in ["application/json", "text/javascript", "application/javascript"]):
return True
# 检查内容格式
stripped_body = body_string.strip()
if (len(body_string) >= self.min_json_size and
stripped_body.startswith(("{", "[")) and
stripped_body.endswith(("}", "]"))):
return True
return False
def _beautify_json(self, content):
"""美化JSON/JSONP内容"""
try:
# 尝试提取JSONP中的JSON部分
json_match = re.search(r'({.*}|\[.*\])', content, re.DOTALL)
if json_match:
json_content = json_match.group(1)
# 尝试解析JSON
parsed_json = json.loads(json_content)
# 格式化JSON,确保中文字符正确显示
if JS_BEAUTIFIER_AVAILABLE:
formatted_json = jsbeautifier.beautify(json.dumps(parsed_json, ensure_ascii=False))
else:
formatted_json = json.dumps(parsed_json, indent=2, sort_keys=True, ensure_ascii=False)
# 如果是JSONP,替换原始JSON部分
if json_match.start() > 0 or json_match.end() < len(content):
content = content[:json_match.start()] + formatted_json + content[json_match.end():]
else:
content = formatted_json
self._print_debug("JSON content beautified")
except ValueError as e:
self._print_debug("Not valid JSON content: " + str(e))
except Exception as e:
self._print_error("Error beautifying JSON: " + str(e))
return content
def _decode_base64(self, content):
"""解码Base64内容"""
try:
# 匹配可能的Base64字符串(至少8个字符,避免误匹配)
base64_pattern = re.compile(r'[A-Za-z0-9+/]{8,}(?:==|=)?')
matches = base64_pattern.findall(content)
if not matches:
return content
modified_content = content
decoded_count = 0
# 使用set避免重复处理
unique_matches = set(matches)
for match in unique_matches:
try:
# 尝试解码
decoded_bytes = base64.b64decode(match)
# 验证解码结果是否为可打印字符
if self._is_printable_content(decoded_bytes):
# 尝试多种编码解码Base64结果
decoded_string = self._decode_bytes_smart(decoded_bytes)
if decoded_string:
# 替换原始Base64字符串
modified_content = modified_content.replace(match, decoded_string)
decoded_count += 1
match_preview = match[:20] + "..." if len(match) > 20 else match
decoded_preview = decoded_string[:20] + "..." if len(decoded_string) > 20 else decoded_string
self._print_debug("Base64 decoded: " + match_preview + " -> " + decoded_preview)
except Exception:
continue
if decoded_count > 0:
self._print_debug("Total Base64 strings decoded: " + str(decoded_count))
return modified_content
except Exception as e:
self._print_error("Error decoding Base64: " + str(e))
return content
def _decode_bytes_smart(self, byte_data):
"""智能解码字节数据"""
if not byte_data:
return ""
# 尝试多种编码
for encoding in self.encoding_list:
try:
decoded = byte_data.decode(encoding)
if self._is_valid_decoded_string(decoded):
return decoded
except Exception:
continue
# 如果都失败,使用UTF-8并替换错误字符
try:
return byte_data.decode('utf-8', 'replace')
except:
return str(byte_data)
def _decode_unicode(self, content):
"""解码Unicode转义序列"""
try:
# 匹配Unicode转义序列
unicode_pattern = re.compile(r'(?:\\u[0-9a-fA-F]{4})+')
matches = unicode_pattern.findall(content)
if not matches:
return content
modified_content = content
decoded_count = 0
# 使用set避免重复处理
unique_matches = set(matches)
for match in unique_matches:
try:
# 解码Unicode转义序列
decoded_string = match.decode('unicode_escape')
modified_content = modified_content.replace(match, decoded_string)
decoded_count += 1
self._print_debug("Unicode decoded: " + match + " -> " + decoded_string)
except Exception:
continue
if decoded_count > 0:
self._print_debug("Total Unicode sequences decoded: " + str(decoded_count))
return modified_content
except Exception as e:
self._print_error("Error decoding Unicode: " + str(e))
return content
def _is_printable_content(self, data):
"""检查字节数据是否主要包含可打印字符"""
try:
if len(data) == 0:
return False
# 计算可打印字符的比例
printable_count = 0
for byte_val in data:
char_code = ord(byte_val)
# 可打印ASCII字符范围 + 常见扩展字符 + 中文字符范围
if (32 <= char_code <= 126 or
char_code in [9, 10, 13] or
char_code >= 128): # 包括非ASCII字符(如中文)
printable_count += 1
return float(printable_count) / len(data) > 0.7 # 70%以上为可打印字符
except Exception:
return False
def _print_debug(self, message):
"""打印调试信息"""
print("[DEBUG] " + message)
def _print_error(self, message):
"""打印错误信息"""
print("[ERROR] " + message)
布施恩德可便相知重
微信扫一扫打赏
支付宝扫一扫打赏