-
Notifications
You must be signed in to change notification settings - Fork 293
Aruha 473 check size of events #515
Changes from 40 commits
9dabfa2
cc50d0c
761d455
146d6a0
8849782
a061392
0a791da
df5b675
e261cb8
217be7f
ddd7a4f
957938b
49c1846
cc15d1a
3f9bf87
6f7a450
f315836
8871dea
f5e6367
d5e6f3d
12253ed
30da84a
7287c40
d8af912
0fbaa46
20aed22
b9c36f8
33bbc03
f6fcfe9
f90e477
43a337a
bf90d6e
0330145
4cc0bcd
30db7f5
781db1e
ffd729a
e3a4562
58cb450
3f2a2ac
57e67d3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,19 +1,82 @@ | ||
package org.zalando.nakadi.domain; | ||
|
||
import org.json.JSONArray; | ||
import org.json.JSONException; | ||
|
||
import java.util.ArrayList; | ||
import java.util.List; | ||
|
||
public class BatchFactory { | ||
|
||
public static List<BatchItem> from(final JSONArray events) { | ||
final List<BatchItem> batch = new ArrayList<>(events.length()); | ||
for (int i = 0; i < events.length(); i++) { | ||
batch.add(new BatchItem(events.getJSONObject(i))); | ||
public static List<BatchItem> from(final String events) { | ||
final List<BatchItem> batch = new ArrayList<>(); | ||
StringBuilder sb = new StringBuilder(); | ||
int brackets = 0; | ||
boolean insideQuote = false; | ||
boolean escaped = false; | ||
int start = 0; | ||
final int length = events.length(); | ||
int end = length - 1; | ||
|
||
while (isEmptyCharacter(events.charAt(start)) && start < end) { | ||
start++; | ||
} | ||
while (isEmptyCharacter(events.charAt(end)) && end > start) { | ||
end--; | ||
} | ||
if (!(events.charAt(start) == '[')) { | ||
throw new JSONException(String.format("Unexpected character %s in position %d, expected '['", | ||
events.charAt(start), start)); | ||
} | ||
start++; | ||
if (!(events.charAt(end) == ']')) { | ||
throw new JSONException(String.format("Unexpected character %s in position %d, expected ']'", | ||
events.charAt(end), end)); | ||
} | ||
|
||
for (int i = start; i < end; i++) { | ||
if (!escaped && events.charAt(i) == '"') { | ||
if (insideQuote) { | ||
insideQuote = false; | ||
} else { | ||
insideQuote = true; | ||
} | ||
} | ||
if (escaped) { | ||
sb.append(events.charAt(i)); | ||
escaped = false; | ||
} else if (!escaped && events.charAt(i) == '\\') { | ||
sb.append(events.charAt(i)); | ||
escaped = true; | ||
} else if (insideQuote) { | ||
sb.append(events.charAt(i)); | ||
} else { | ||
if (events.charAt(i) == '{') { | ||
brackets++; | ||
} | ||
if (events.charAt(i) == '}') { | ||
brackets--; | ||
} | ||
if (!((brackets == 0) && ((events.charAt(i) == ',') | ||
|| isEmptyCharacter(events.charAt(i))))) { | ||
sb.append(events.charAt(i)); | ||
} | ||
if (brackets == 0 && !isEmptyCharacter(events.charAt(i))) { | ||
if (sb.length() > 0) { | ||
batch.add(new BatchItem(sb.toString())); | ||
} | ||
sb = new StringBuilder(); | ||
} | ||
} | ||
} | ||
|
||
if (sb.length() != 0) { | ||
batch.add(new BatchItem(sb.toString())); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Well, I completely disagree with this (I mean the fact that we are measuring user input), but there is a bug here: There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @antban As for measuring user input, I thought it was what we had agreed on earlier this week (or was it last week?). Happy to discuss it offline if I'm wrong. |
||
} | ||
|
||
return batch; | ||
} | ||
|
||
private static boolean isEmptyCharacter(final char c) { | ||
return (c == ' ' || c == '\t' || c == '\n' || c == '\r'); | ||
} | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3,19 +3,22 @@ | |
import org.json.JSONObject; | ||
|
||
import javax.annotation.Nullable; | ||
import java.nio.charset.StandardCharsets; | ||
import java.util.Optional; | ||
|
||
public class BatchItem { | ||
private final BatchItemResponse response; | ||
private final JSONObject event; | ||
private String partition; | ||
private String brokerId; | ||
private int eventSize; | ||
|
||
public BatchItem(final JSONObject event) { | ||
public BatchItem(final String event) { | ||
this.event = new JSONObject(event); | ||
this.eventSize = event.getBytes(StandardCharsets.UTF_8).length; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I think this method also generates a copy of the original array. |
||
this.response = new BatchItemResponse(); | ||
this.event = event; | ||
|
||
Optional.ofNullable(event.optJSONObject("metadata")) | ||
Optional.ofNullable(this.event.optJSONObject("metadata")) | ||
.map(e -> e.optString("eid", null)) | ||
.ifPresent(this.response::setEid); | ||
} | ||
|
@@ -58,4 +61,8 @@ public void updateStatusAndDetail(final EventPublishingStatus publishingStatus, | |
response.setPublishingStatus(publishingStatus); | ||
response.setDetail(detail); | ||
} | ||
|
||
public int getEventSize() { | ||
return eventSize; | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was wondering if isn't there a way to build this substring with zero copy. I couldn't find, because since Java 7 String.substring creates a copy of the original char array.
I think it would be valuable to look at
JSONTokener
and try to figure out if isn't there a way to avoid extra copies of this string. This is the most executed code path in the project. I would expect that this change has an impact on garbage collection and overall performance.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Maybe if it wasn't a string but an array of bytes, then we could just create strings using
String(bytes[] bytes, int offset, int length)
. I'm not sure, I think we should investigate more and reduce memory footprint.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good point. I wonder if we could use
CharBuffer.wrap(..)
.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I looked into this, and short of using another JSON parser, or doing some ugly size validation inside the controller, I don't see how we can improve this. We should discuss this in more detail.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ok. So let's move forward with this solution and we can test the performance impact on staging.