A-A+

Burp Suite插件：美化JSON/JSONP、解码Base64和Unicode

2025年06月20日 11:04 学习笔记暂无评论共9647字 (阅读836 views次)

Shrimp-虾米-原创文章

【注意：此文章为博主原创文章！转载需注意，请带原文链接，至少也要是txt格式！】

此版本主要编码问题修复

智能编码检测：
- 从HTTP响应头的Content-Type中提取charset信息
- 按优先级尝试多种编码（UTF-8, GBK, GB2312, Big5等）
- 验证解码结果的有效性
中文字符处理：
- 在JSON格式化时使用ensure_ascii=False保持中文字符
- 扩展可打印字符检测范围，包含中文字符
- 智能处理Base64解码后的中文内容
乱码检测：
- 检测常见乱码模式如"锟斤拷"
- 统计替换字符比例判断解码质量
- 失败时使用错误替换而非抛出异常
编码保持：
- 处理完成后使用原始编码重新编码
- 避免在处理过程中丢失编码信息

最新的这个版本应该能够很好地处理中文等多字节字符，避免出现乱码问题。

请把以下内容保存为python脚本，然后通过burp加载即可。

# -*- coding:utf-8 -*-
# __ver__ = python2/jython compatible

from burp import IBurpExtender
from burp import IHttpListener
import re
import json
import base64
import sys

try:
    import jsbeautifier
    JS_BEAUTIFIER_AVAILABLE = True
except ImportError:
    JS_BEAUTIFIER_AVAILABLE = False
    print("WARNING: jsbeautifier not available. JSON formatting will be basic.")

class BurpExtender(IBurpExtender, IHttpListener):
    """
    Burp Suite插件：美化JSON/JSONP、解码Base64和Unicode
    支持的工具：Proxy, Scanner, Intruder, Repeater
    """
    
    def __init__(self):
        self.python_version = sys.version_info[0]
        self.max_body_size = 150000  # 最大处理body大小
        self.min_json_size = 5  # 最小JSON大小
        # 常见的编码列表，按优先级排序
        self.encoding_list = ['utf-8', 'gbk', 'gb2312', 'big5', 'iso-8859-1', 'windows-1252']
        
    def registerExtenderCallbacks(self, callbacks):
        self._helpers = callbacks.getHelpers()
        self._callbacks = callbacks
        
        # 设置插件名称
        self._callbacks.setExtensionName("Enhanced JSON-Base64-Unicode Beautifier")
        
        # 注册HTTP监听器
        callbacks.registerHttpListener(self)
        
        # 设置JS Beautifier选项
        if JS_BEAUTIFIER_AVAILABLE:
            self.js_opts = jsbeautifier.default_options()
            self.js_opts.indent_size = 2
            
        print("Enhanced JSON-Base64-Unicode Beautifier loaded successfully!")

    def processHttpMessage(self, toolFlag, messageIsRequest, messageInfo):
        """
        处理HTTP消息
        toolFlag: 4=Proxy, 16=Scanner, 32=Intruder, 64=Repeater
        """
        # 只处理指定工具的响应
        if toolFlag not in [4, 16, 32, 64] or messageIsRequest:
            return
            
        try:
            response = messageInfo.getResponse()
            if not response:
                return
                
            analyzed_response = self._helpers.analyzeResponse(response)
            headers = analyzed_response.getHeaders()
            body_offset = analyzed_response.getBodyOffset()
            body = response[body_offset:]
            
            if not body or len(body) == 0:
                return
                
            # 检测并转换为正确编码的字符串
            body_string, detected_encoding = self._smart_decode_bytes(body, headers)
            
            if len(body_string) > self.max_body_size:
                self._print_debug("Body too large (" + str(len(body_string)) + " bytes), skipping processing")
                return
                
            # 处理响应体
            processed_body = self._process_response_body(body_string, headers)
            
            if processed_body != body_string:
                # 将处理后的内容转换回字节数组，保持原始编码
                processed_bytes = self._encode_string_to_bytes(processed_body, detected_encoding)
                new_response = self._helpers.buildHttpMessage(headers, processed_bytes)
                messageInfo.setResponse(new_response)
                self._print_debug("Response updated with processed content (encoding: " + detected_encoding + ")")
                
        except Exception as e:
            self._print_error("Error processing HTTP message: " + str(e))

    def _smart_decode_bytes(self, byte_data, headers):
        """智能解码字节数据为字符串"""
        if not byte_data:
            return "", "utf-8"
            
        # 首先尝试从HTTP头中获取编码信息
        detected_encoding = self._get_encoding_from_headers(headers)
        
        # 如果从头部获取到编码，优先使用
        if detected_encoding:
            try:
                if hasattr(byte_data, 'tostring'):
                    raw_string = byte_data.tostring()
                else:
                    raw_string = str(byte_data)
                
                # 尝试用检测到的编码解码
                if isinstance(raw_string, str):
                    # 在Jython中，需要先转为bytes再解码
                    decoded_string = raw_string.decode(detected_encoding)
                    return decoded_string, detected_encoding
                else:
                    return raw_string, detected_encoding
            except Exception as e:
                self._print_debug("Failed to decode with detected encoding " + detected_encoding + ": " + str(e))
        
        # 如果头部检测失败，尝试多种编码
        if hasattr(byte_data, 'tostring'):
            raw_string = byte_data.tostring()
        else:
            raw_string = str(byte_data)
        
        # 尝试不同的编码
        for encoding in self.encoding_list:
            try:
                if isinstance(raw_string, str):
                    decoded_string = raw_string.decode(encoding)
                    # 简单验证：检查是否有明显的乱码
                    if self._is_valid_decoded_string(decoded_string):
                        self._print_debug("Successfully decoded with encoding: " + encoding)
                        return decoded_string, encoding
                else:
                    return raw_string, encoding
            except Exception:
                continue
        
        # 如果所有编码都失败，使用UTF-8并忽略错误
        try:
            if isinstance(raw_string, str):
                decoded_string = raw_string.decode('utf-8', 'replace')
            else:
                decoded_string = raw_string
            self._print_debug("Using UTF-8 with error replacement")
            return decoded_string, 'utf-8'
        except Exception:
            return str(raw_string), 'utf-8'

    def _get_encoding_from_headers(self, headers):
        """从HTTP头中提取编码信息"""
        for header in headers:
            header_lower = header.lower()
            if header_lower.startswith("content-type:"):
                # 查找charset参数
                charset_match = re.search(r'charset=([^;\s]+)', header_lower)
                if charset_match:
                    charset = charset_match.group(1).strip('"\'')
                    # 标准化编码名称
                    if charset in ['gb2312', 'gbk', 'gb18030']:
                        return 'gbk'
                    elif charset in ['utf-8', 'utf8']:
                        return 'utf-8'
                    elif charset in ['big5', 'big5-hkscs']:
                        return 'big5'
                    elif charset in ['iso-8859-1', 'latin-1']:
                        return 'iso-8859-1'
                    else:
                        return charset
        return None

    def _is_valid_decoded_string(self, decoded_string):
        """简单验证解码后的字符串是否有效"""
        if not decoded_string:
            return True
        
        # 检查是否有过多的替换字符（乱码指示）
        replacement_char_count = decoded_string.count(u'\ufffd')
        if len(decoded_string) > 0 and float(replacement_char_count) / len(decoded_string) > 0.1:
            return False
        
        # 检查是否有明显的乱码模式
        if '锟斤拷' in decoded_string or '????' in decoded_string:
            return False
            
        return True

    def _encode_string_to_bytes(self, string_data, encoding):
        """将字符串编码为字节数组"""
        try:
            if isinstance(string_data, unicode):
                encoded_data = string_data.encode(encoding, 'replace')
            else:
                encoded_data = str(string_data)
            return encoded_data
        except Exception as e:
            self._print_debug("Error encoding string: " + str(e))
            try:
                return string_data.encode('utf-8', 'replace')
            except:
                return str(string_data)

    def _process_response_body(self, body_string, headers):
        """处理响应体内容"""
        try:
            # 检查是否为JSON内容
            is_json_content = self._is_json_content(body_string, headers)
            
            # 1. 尝试解析和格式化JSON/JSONP
            if is_json_content:
                body_string = self._beautify_json(body_string)
            
            # 2. 解码Base64内容
            body_string = self._decode_base64(body_string)
            
            # 3. 解码Unicode内容
            body_string = self._decode_unicode(body_string)
            
            return body_string
            
        except Exception as e:
            self._print_error("Error processing response body: " + str(e))
            return body_string

    def _is_json_content(self, body_string, headers):
        """判断是否为JSON内容"""
        # 检查Content-Type头
        for header in headers:
            if header.lower().startswith("content-type:"):
                content_type = header.lower()
                if any(ct in content_type for ct in ["application/json", "text/javascript", "application/javascript"]):
                    return True
        
        # 检查内容格式
        stripped_body = body_string.strip()
        if (len(body_string) >= self.min_json_size and 
            stripped_body.startswith(("{", "[")) and 
            stripped_body.endswith(("}", "]"))):
            return True
            
        return False

    def _beautify_json(self, content):
        """美化JSON/JSONP内容"""
        try:
            # 尝试提取JSONP中的JSON部分
            json_match = re.search(r'({.*}|\[.*\])', content, re.DOTALL)
            if json_match:
                json_content = json_match.group(1)
                
                # 尝试解析JSON
                parsed_json = json.loads(json_content)
                
                # 格式化JSON，确保中文字符正确显示
                if JS_BEAUTIFIER_AVAILABLE:
                    formatted_json = jsbeautifier.beautify(json.dumps(parsed_json, ensure_ascii=False))
                else:
                    formatted_json = json.dumps(parsed_json, indent=2, sort_keys=True, ensure_ascii=False)
                
                # 如果是JSONP，替换原始JSON部分
                if json_match.start() > 0 or json_match.end() < len(content):
                    content = content[:json_match.start()] + formatted_json + content[json_match.end():]
                else:
                    content = formatted_json
                    
                self._print_debug("JSON content beautified")
                
        except ValueError as e:
            self._print_debug("Not valid JSON content: " + str(e))
        except Exception as e:
            self._print_error("Error beautifying JSON: " + str(e))
            
        return content

    def _decode_base64(self, content):
        """解码Base64内容"""
        try:
            # 匹配可能的Base64字符串（至少8个字符，避免误匹配）
            base64_pattern = re.compile(r'[A-Za-z0-9+/]{8,}(?:==|=)?')
            matches = base64_pattern.findall(content)
            
            if not matches:
                return content
                
            modified_content = content
            decoded_count = 0
            
            # 使用set避免重复处理
            unique_matches = set(matches)
            
            for match in unique_matches:
                try:
                    # 尝试解码
                    decoded_bytes = base64.b64decode(match)
                    
                    # 验证解码结果是否为可打印字符
                    if self._is_printable_content(decoded_bytes):
                        # 尝试多种编码解码Base64结果
                        decoded_string = self._decode_bytes_smart(decoded_bytes)
                        
                        if decoded_string:
                            # 替换原始Base64字符串
                            modified_content = modified_content.replace(match, decoded_string)
                            decoded_count += 1
                            
                            match_preview = match[:20] + "..." if len(match) > 20 else match
                            decoded_preview = decoded_string[:20] + "..." if len(decoded_string) > 20 else decoded_string
                            self._print_debug("Base64 decoded: " + match_preview + " -> " + decoded_preview)
                        
                except Exception:
                    continue
                    
            if decoded_count > 0:
                self._print_debug("Total Base64 strings decoded: " + str(decoded_count))
                
            return modified_content
            
        except Exception as e:
            self._print_error("Error decoding Base64: " + str(e))
            return content

    def _decode_bytes_smart(self, byte_data):
        """智能解码字节数据"""
        if not byte_data:
            return ""
        
        # 尝试多种编码
        for encoding in self.encoding_list:
            try:
                decoded = byte_data.decode(encoding)
                if self._is_valid_decoded_string(decoded):
                    return decoded
            except Exception:
                continue
        
        # 如果都失败，使用UTF-8并替换错误字符
        try:
            return byte_data.decode('utf-8', 'replace')
        except:
            return str(byte_data)

    def _decode_unicode(self, content):
        """解码Unicode转义序列"""
        try:
            # 匹配Unicode转义序列
            unicode_pattern = re.compile(r'(?:\\u[0-9a-fA-F]{4})+')
            matches = unicode_pattern.findall(content)
            
            if not matches:
                return content
                
            modified_content = content
            decoded_count = 0
            
            # 使用set避免重复处理
            unique_matches = set(matches)
            
            for match in unique_matches:
                try:
                    # 解码Unicode转义序列
                    decoded_string = match.decode('unicode_escape')
                    
                    modified_content = modified_content.replace(match, decoded_string)
                    decoded_count += 1
                    self._print_debug("Unicode decoded: " + match + " -> " + decoded_string)
                    
                except Exception:
                    continue
                    
            if decoded_count > 0:
                self._print_debug("Total Unicode sequences decoded: " + str(decoded_count))
                
            return modified_content
            
        except Exception as e:
            self._print_error("Error decoding Unicode: " + str(e))
            return content

    def _is_printable_content(self, data):
        """检查字节数据是否主要包含可打印字符"""
        try:
            if len(data) == 0:
                return False
                
            # 计算可打印字符的比例
            printable_count = 0
            for byte_val in data:
                char_code = ord(byte_val)
                    
                # 可打印ASCII字符范围 + 常见扩展字符 + 中文字符范围
                if (32 <= char_code <= 126 or 
                    char_code in [9, 10, 13] or 
                    char_code >= 128):  # 包括非ASCII字符（如中文）
                    printable_count += 1
                    
            return float(printable_count) / len(data) > 0.7  # 70%以上为可打印字符
            
        except Exception:
            return False

    def _print_debug(self, message):
        """打印调试信息"""
        print("[DEBUG] " + message)

    def _print_error(self, message):
        """打印错误信息"""
        print("[ERROR] " + message)

版权声明：本站原创文章，由gdd发表在学习笔记分类下，于2025年06月20日最后更新
转载请注明：Burp Suite插件：美化JSON/JSONP、解码Base64和Unicode> | 蜗居 - [复制链接]

给我留言取消回复