Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Codegen generating deserializers and serializers for the JDWP commands #73

Open
wants to merge 13 commits into
base: main
Choose a base branch
from
Empty file.
31 changes: 31 additions & 0 deletions projects/jdwp/serializers/jdwp_packet_erializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
""" JDWP serializer classes. """
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How do you see classes in this file being used ? Those classes are not used by the codegen, it's unclear to me yet whether they should be included in this PR.



class JDWPPacketHeader:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's probably a good idea to make this a dataclasses .dataclass or typing.NamedTuple.

def __init__(self, length: int, id: int, flags: int, command_set: int, command: int):
self.length = length
self.id = id
self.flags = flags
self.command_set = command_set
self.command = command

def serialize(self) -> bytes:
length_bytes = self.length.to_bytes(4, byteorder='big')
id_bytes = self.id.to_bytes(4, byteorder='big')
flags_bytes = self.flags.to_bytes(1, byteorder='big')
command_set_bytes = self.command_set.to_bytes(1, byteorder='big')
command_bytes = self.command.to_bytes(1, byteorder='big')
return length_bytes + id_bytes + flags_bytes + command_set_bytes + command_bytes


class JDWPPacket:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

And this too.

def __init__(self, header: JDWPPacketHeader, payload: bytes):
self.header = header
self.payload = payload

def serialize(self) -> bytes:
header_bytes = self.header.serialize()
return header_bytes + self.payload

def serialize_jdwp_packet(packet: JDWPPacket) -> bytes:
return packet.serialize()
21 changes: 21 additions & 0 deletions projects/jdwp/serializers/reference_type_serializer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Command Set: ReferenceType """
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please don't check generated code in, we'll setup buck rules that generate those files on the fly when the debugger is built.


from projects.jdwp.defs.command_sets.reference_type import Signature


class SignatureCommand:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think we want to generate something more like this:

@dataclasses.dataclass
class SignatureCommand(Command):
    types: ReferenceTypeId
    
    async def serialize(...):
        ...

    @staticmethod
    async def parse(...):
        ...

    async def parse_response():
       ...

@staticmethod
def serialize(command):
serialized_data = bytearray()
serialized_data += command.refType.to_bytes(8, 'big')
return serialized_data



@staticmethod
def deserialize(data):
command = Signature()
command.refType = int.from_bytes(data[:8], 'big')
data = data[8:]
return command, data

99 changes: 99 additions & 0 deletions projects/jdwp/serializers/serializer_codegen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
import re
import os
from projects.jdwp.defs.schema import Type, Struct, CommandSet, Command, Field
from projects.jdwp.defs.command_sets.reference_type import ReferenceType


class CommandSerializerGenerator:
def __init__(self):
self.serializer_code: str = ""

def generate_field_serializer(self, field: Field) -> str:
serializer_code: str = ""
if isinstance(field, Struct):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should never be true, this should be (and I believe is) guaranteed on the type system level.

for subfield in field.fields:
serializer_code += self.generate_field_serializer(subfield)
elif field.type == Type.INT:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

What could work pretty well here is pattern matching:

match field.type:
    case Type.INT:
        return f"out.writeInt(self.{field.name})"

    case Type.STRING:
        return f"out.writeString(self.{field.name})"

    ...

    case _:
        raise Exception(f"Unrecognized type: {field.type}")

serializer_code = (
f" serialized_data += command.{field.name}.to_bytes(4, 'big')"
)
elif field.type == Type.STRING:
serializer_code = (
f" serialized_data += command.{field.name}.encode('utf-8')"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not the format that we want. The spec says:

A UTF-8 encoded string, not zero terminated, preceded by a four-byte integer length.
I think it will be a good approach to introduce some kind of input and output stream abstractions that knows how to read/write values of all types represented by PrimitiveType union.

)
elif field.type == Type.OBJECT_ID:
serializer_code = (
f" serialized_data += command.{field.name}.to_bytes(8, 'big')"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is not that simple. The spec says:

Object ids, reference type ids, field ids, method ids, and frame ids may be sized differently in different target VM implementations. Typically, their sizes correspond to size of the native identifiers used for these items in JNI and JVMDI calls. The maximum size of any of these types is 8 bytes. The "idSizes" command in the VirtualMachine command set is used by the debugger to determine the size of each of these types.

This is one more reason to have input/output stream abstractions. If we do that then we'll be able to configure them with output of "idSizes" command.

)
elif field.type == Type.REFERENCE_TYPE_ID:
serializer_code = (
f" serialized_data += command.{field.name}.to_bytes(8, 'big')"
)
return serializer_code

def generate_field_deserializer(self, field: Field) -> str:
deserializer_code: str = ""
if isinstance(field, Struct):
for subfield in field.fields:
deserializer_code += self.generate_field_deserializer(subfield)
elif field.type == Type.INT:
deserializer_code = f" command.{field.name} = int.from_bytes(data[:4], 'big')\n data = data[4:]"
elif field.type == Type.STRING:
deserializer_code = f" null_terminator = data.index(0)\n command.{field.name} = data[:null_terminator].decode('utf-8')\n\t\tdata = data[null_terminator + 1:]"
elif field.type == Type.OBJECT_ID:
deserializer_code = f" command.{field.name} = int.from_bytes(data[:8], 'big')\n data = data[8:]"
elif field.type == Type.REFERENCE_TYPE_ID:
deserializer_code = f" command.{field.name} = int.from_bytes(data[:8], 'big')\n data = data[8:]"
return deserializer_code

def generate_command_serializer(self, command: Command) -> str:
serializer_code: str = f"""
class {command.name}Command:
@staticmethod
def serialize(command):
serialized_data = bytearray()
{self.generate_field_serializer(command.out)}
return serialized_data
"""
return serializer_code

def generate_command_deserializer(self, command: Command) -> str:
deserializer_code: str = f"""
@staticmethod
def deserialize(data):
command = {command.name}()
{"".join(self.generate_field_deserializer(field) for field in command.out.fields)}
return command, data
"""
return deserializer_code

def _convert_to_snake_case(self, name: str) -> str:
name = re.sub("([a-z0-9])([A-Z])", r"\1_\2", name).lower()
return name

def generate_command_set_file(self, command_set: CommandSet) -> str:
self.serializer_code = f"""\"\"\"Command Set: {command_set.name} \"\"\"

from projects.jdwp.defs.command_sets.{self._convert_to_snake_case(command_set.name)} import {", ".join([command.name for command in command_set.commands])}

{"".join(self.generate_command_serializer(command) for command in command_set.commands)}

{"".join(self.generate_command_deserializer(command) for command in command_set.commands)}
"""
return self.serializer_code

def generate_serializer_file(self, command_set: CommandSet) -> None:
command_set_name = command_set.name
serializer_file_name: str = (
f"{self._convert_to_snake_case(command_set_name)}_serializer.py"
)
command_set_code: str = self.generate_command_set_file(command_set)
output_dir: str = os.path.dirname(os.path.realpath(__file__))
file_path: str = os.path.join(output_dir, serializer_file_name)
with open(file_path, "w") as output_file:
output_file.write(command_set_code)
print(f"Generated serializer code saved to {serializer_file_name}")


serializer_generator = CommandSerializerGenerator()
gen_file = serializer_generator.generate_serializer_file(ReferenceType)
Empty file added projects/jdwp/tests/__init__.py
Empty file.
17 changes: 17 additions & 0 deletions projects/jdwp/tests/test_jdwp_serialization.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import unittest
from projects.jdwp.serializers.jdwp_packet_erializer import JDWPPacketHeader, JDWPPacket, serialize_jdwp_packet


class TestJDWPPacketSerialization(unittest.TestCase):
def test_jdwp_packet_serialization(self):
header = JDWPPacketHeader(15, 1, 0x80, 2, 3)

payload = b"Sample Payload"
packet = JDWPPacket(header, payload)

serialized_packet = serialize_jdwp_packet(packet)
self.assertEqual(len(serialized_packet), 15)


if __name__ == '__main__':
unittest.main()
Loading