From f603307d3285c5495a081eca243b80612698e696 Mon Sep 17 00:00:00 2001 From: John Plaisted Date: Thu, 29 Oct 2020 13:00:28 -0700 Subject: [PATCH] [Breaking] Update definition of URN to match LI's internal definition. (#27) This should help us avoid more breakages when bringing in changes / pushing out changes. We're less likely to hit compiler errors due to bad references to Urn. --- .../com/linkedin/common/urn/TupleKey.java | 344 ++++++++++++++ .../com/linkedin/common/urn/Urn.java | 444 +++++++++++++++--- .../com/linkedin/common/urn/UrnCoercer.java | 28 ++ .../com/linkedin/common/urn/Urns.java | 37 ++ .../metadata/dao/ImmutableLocalDAO.java | 3 +- .../metadata/dao/EbeanLocalDAOTest.java | 41 +- .../BaseVersionedAspectResourceTest.java | 9 +- version.properties | 2 +- 8 files changed, 821 insertions(+), 87 deletions(-) create mode 100644 core-models/src/main/javaPegasus/com/linkedin/common/urn/TupleKey.java create mode 100644 core-models/src/main/javaPegasus/com/linkedin/common/urn/UrnCoercer.java create mode 100644 core-models/src/main/javaPegasus/com/linkedin/common/urn/Urns.java diff --git a/core-models/src/main/javaPegasus/com/linkedin/common/urn/TupleKey.java b/core-models/src/main/javaPegasus/com/linkedin/common/urn/TupleKey.java new file mode 100644 index 000000000..10183ecbd --- /dev/null +++ b/core-models/src/main/javaPegasus/com/linkedin/common/urn/TupleKey.java @@ -0,0 +1,344 @@ +package com.linkedin.common.urn; + +import com.linkedin.data.template.DataTemplateUtil; +import java.net.URISyntaxException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.List; + + +/** + * Represents the entity key portion of a Urn, encoded as a tuple of Strings. + * A single-element tuple is encoded simply as the value of that element. A tuple with multiple + * elements is encoded as a parenthesized list of strings, comma-delimited. + */ +public class TupleKey { + public static final char START_TUPLE = '('; + public static final char END_TUPLE = ')'; + public static final char DELIMITER = ','; + + private List _tuple; + + public TupleKey(String... tuple) { + _tuple = Arrays.asList(checkStringsNotNull(tuple)); + } + + public TupleKey(List tuple) { + this(tuple, true); + } + + /** + * Constructs a {@code TupleKey} given a list of tuple parts. + *

+ * When {@code calledFromExternal} is {@code false}, it means the constructor + * was called from within this class, where we can ensure our implementation + * satisfies some constraints and skip some work. + *

+ * The work we skip is checking that no tuple parts are null and wrapping the + * list with an unmodifiable view. + *

+ * For context, an earlier performance optimization introduced from Guava the + * {@code ImmutableList}, which gives both of that for free. Since then, we + * have encountered complications with Guava (specifically, Hadoop at the time + * of this writing requires using Guava 11 -- see LIHADOOP-44200). In order to + * resolve that with minimal effect, we copy this behavior here. + *

+ * Whether this optimization is meaningful can be examined later, if time is + * permitting, or {@code List#copyOf} from JDK 10 can be used to recover the + * benefits more elegantly when it is available for us to use. + * + * @param tuple tuple parts + * @param calledFromExternal whether the constructions is invoked from outside + * of this class + */ + private TupleKey(List tuple, boolean calledFromExternal) { + _tuple = calledFromExternal ? Collections.unmodifiableList(checkStringsNotNull(tuple)) : tuple; + } + + // This constructor is intentionally made non-public and should only be + // invoked by the convenient method createWithOneKeyPart. + // The reason why the String-vararg overload is insufficient is because it + // creates needless garbage in the case of a single element. That vararg + // methods allocates an array for the call, then copies that into a list. + private TupleKey(String oneElement) { + if (oneElement == null) { + throw new NullPointerException("Cannot create URN with null part."); + } + _tuple = Collections.singletonList(oneElement); + } + + public static TupleKey createWithOneKeyPart(String input) { + return new TupleKey(input); + } + + /** + * Create a tuple key from a sequence of Objects. The resulting tuple + * consists of the sequence of String values resulting from calling .toString() on each + * object in the input sequence + * + * @param tuple - a sequence of Objects to be represented in the tuple + * @return - a TupleKey representation of the object sequence + */ + public static TupleKey create(Object... tuple) { + List parts = new ArrayList(tuple.length); + + for (Object o : tuple) { + if (o == null) { + throw new NullPointerException("Cannot create a Urn from tuple with null parameter."); + } + + String objString = o.toString(); + if (objString.isEmpty()) { + throw new IllegalArgumentException("Cannot create a Urn from tuple with an empty value."); + } + parts.add(objString); + } + return new TupleKey(Collections.unmodifiableList(parts), false); + } + + /** + * Create a tuple key from a sequence of Objects. The resulting tuple + * consists of the sequence of String values resulting from calling .toString() on each + * object in the input sequence + * + * @param tuple - a sequence of Objects to be represented in the tuple + * @return - a TupleKey representation of the object sequence + */ + public static TupleKey create(Collection tuple) { + List parts = new ArrayList(tuple.size()); + + for (Object o : tuple) { + if (o == null) { + throw new NullPointerException("Cannot create a Urn from tuple with null parameter."); + } + parts.add(o.toString()); + } + return new TupleKey(Collections.unmodifiableList(parts), false); + } + + public String getFirst() { + return _tuple.get(0); + } + + public String get(int index) { + return _tuple.get(index); + } + + /** + * Return a tuple element coerced to a specific type + * + * @param index - the index of the tuple element to be returned + * @param clazz - the Class object for the return type. Must be String, Short, Boolean, Integer, Long, or an Enum subclass + * @param - the desired type for the returned object. + * @return The specified element of the tuple, coerced to the specified type T. + */ + public T getAs(int index, Class clazz) { + String value = get(index); + + Object result; + + if (value == null) { + return null; + } else if (String.class.equals(clazz)) { + result = value; + } else if (Short.TYPE.equals(clazz) || Short.class.equals(clazz)) { + result = Short.valueOf(value); + } else if (Boolean.class.equals(clazz) || Boolean.TYPE.equals(clazz)) { + if (!value.equalsIgnoreCase("true") && !value.equalsIgnoreCase("false")) { + throw new IllegalArgumentException("Invalid boolean value: " + value); + } + result = Boolean.valueOf(value); + } else if (Integer.TYPE.equals(clazz) || Integer.class.equals(clazz)) { + result = Integer.valueOf(value); + } else if (Long.TYPE.equals(clazz) || Long.class.equals(clazz)) { + result = Long.valueOf(value); + } else if (Enum.class.isAssignableFrom(clazz)) { + final Class enumClazz = clazz.asSubclass(Enum.class); + @SuppressWarnings("unchecked") + Enum enumValue = Enum.valueOf(enumClazz, value); + result = enumValue; + } else if (DataTemplateUtil.hasCoercer(clazz)) { + result = DataTemplateUtil.coerceOutput(value, clazz); + } else { + throw new IllegalArgumentException("Cannot coerce String to type: " + clazz.getName()); + } + @SuppressWarnings("unchecked") + T rv = (T) result; + return rv; + } + + public int size() { + return _tuple.size(); + } + + public List getParts() { + return _tuple; + } + + @Override + public String toString() { + if (_tuple.size() == 1) { + return _tuple.get(0); + } else { + StringBuilder result = new StringBuilder(); + + result.append(START_TUPLE); + boolean delimit = false; + for (String value : _tuple) { + if (delimit) { + result.append(DELIMITER); + } + result.append(value); + delimit = true; + } + result.append(END_TUPLE); + return result.toString(); + } + } + + @Override + public int hashCode() { + return _tuple.hashCode(); + } + + @Override + public boolean equals(Object obj) { + if (this == obj) { + return true; + } + + if (obj == null || getClass() != obj.getClass()) { + return false; + } + + return _tuple.equals(((TupleKey) obj)._tuple); + } + + public static TupleKey fromString(String s) throws URISyntaxException { + return new TupleKey(parseKeyParts(s, 0), false); + } + + /** + * Create a tuple key from a string starting at the given index. + * @param s raw urn string or urn type specific string. + * @param startIndex index where urn type specific string starts. + * @return entity tuple key. + * @throws URISyntaxException if type specific string format is invalid. + */ + public static TupleKey fromString(String s, int startIndex) throws URISyntaxException { + return new TupleKey(parseKeyParts(s, startIndex), false); + } + + private static List parseKeyParts(String input, int startIndex) throws URISyntaxException { + if (startIndex >= input.length()) { + return Collections.emptyList(); + } + + // If there's no opening paren, there's only one tuple part. This is a very + // common case so we special-case it for perf. We must still verify that + // parens are balanced though. + if (input.charAt(startIndex) != START_TUPLE) { + if (!hasBalancedParens(input, startIndex)) { + throw new URISyntaxException(input, "mismatched paren nesting"); + } + return Collections.singletonList(input.substring(startIndex)); + } + + /* URNs with multiple-part ids overwhelmingly have just two or three parts. As of May 5, a check of + * existing typed URNs showed + * + * 890 single-part URN ids + * 397 two-part URN ids + * 86 three-part URN ids + * 10 four-part URN ids + * 1 five-part URN id + * 1 seven-part URN id + * + * One-part URN ids should not even reach this point. + * Specifying an initial capacity of three limits the wasted space for two-part URNs to one slot rather than + * eight (as it would be for a default ArrayList capacity of 10) while providing enough slots for the 97.5% + * of URN types which use three parts or fewer -- the rest will require some array expansion. + */ + List parts = new ArrayList<>(3); + + int numStartedParenPairs = 1; // We know we have at least one starting paren + int partStart = startIndex + 1; // +1 to skip opening paren + for (int i = startIndex + 1; i < input.length(); i++) { + char c = input.charAt(i); + if (c == START_TUPLE) { + numStartedParenPairs++; + } else if (c == END_TUPLE) { + numStartedParenPairs--; + if (numStartedParenPairs < 0) { + throw new URISyntaxException(input, "mismatched paren nesting"); + } + } else if (c == DELIMITER) { + // If numStartedParenPairs == 0, then a comma is ignored because + // we're not in parens. If numStartedParenPairs >= 2, we're inside an + // nested paren pair and should also ignore the comma. + // Don't forget: (foo,bar(zoo,moo)) parsed is ["foo", "bar(zoo,moo)"]! + if (numStartedParenPairs != 1) { + continue; + } + + // Case: "(,,)" or "(,foo)" etc + if (i - partStart <= 0) { + throw new URISyntaxException(input, "empty part disallowed"); + } + parts.add(input.substring(partStart, i)); + partStart = i + 1; + } + } + + if (numStartedParenPairs != 0) { + throw new URISyntaxException(input, "mismatched paren nesting"); + } + + int lastPartEnd = input.charAt(input.length() - 1) == END_TUPLE ? input.length() - 1 : input.length(); + + if (lastPartEnd - partStart <= 0) { + throw new URISyntaxException(input, "empty part disallowed"); + } + + parts.add(input.substring(partStart, lastPartEnd)); + return Collections.unmodifiableList(parts); + } + + private static boolean hasBalancedParens(String input, int startIndex) { + int numStartedParenPairs = 0; + for (int i = startIndex; i < input.length(); i++) { + char c = input.charAt(i); + if (c == START_TUPLE) { + numStartedParenPairs++; + } else if (c == END_TUPLE) { + numStartedParenPairs--; + if (numStartedParenPairs < 0) { + return false; + } + } + } + return numStartedParenPairs == 0; + } + + private static String[] checkStringsNotNull(String... array) { + for (int i = 0; i < array.length; i++) { + if (array[i] == null) { + throw new NullPointerException("at index " + i); + } + } + return array; + } + + private static List checkStringsNotNull(List list) { + int i = 0; + for (String str : list) { + if (str == null) { + throw new NullPointerException("at index " + i); + } + i++; + } + return list; + } +} \ No newline at end of file diff --git a/core-models/src/main/javaPegasus/com/linkedin/common/urn/Urn.java b/core-models/src/main/javaPegasus/com/linkedin/common/urn/Urn.java index aa9418fe3..84231fdf3 100644 --- a/core-models/src/main/javaPegasus/com/linkedin/common/urn/Urn.java +++ b/core-models/src/main/javaPegasus/com/linkedin/common/urn/Urn.java @@ -1,117 +1,439 @@ package com.linkedin.common.urn; import com.linkedin.data.template.Custom; -import com.linkedin.data.template.DirectCoercer; -import com.linkedin.data.template.TemplateOutputCastException; - -import javax.annotation.Nonnull; +import com.linkedin.util.ArgumentUtil; import java.net.URISyntaxException; -import java.util.regex.Matcher; -import java.util.regex.Pattern; +import java.util.Collection; +import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import javax.annotation.Nullable; +/** + * Represents a URN (Uniform Resource Name) for a Linkedin entity, in the spirit of RFC 2141. + * Our default URN format uses the non-standard namespace identifier "li", and hence default URNs + * begin with "urn:li:". Note that the namespace according to + * RFC 2141 [Section 2.1] is case-insensitive and + * for safety we only allow lower-case letters in our implementation. + * + *

Our URNs all consist of an "entity type", which denotes an internal namespace for the resource, + * as well as an entity key, formatted as a tuple of parts. The full format of a URN is: + * + *

<URN> ::= urn:<namespace>:<entityType>:<entityKey> + * + *

The entity key is represented as a tuple of strings. If the tuple is of length 1, the + * key is encoded directly. If the tuple has multiple parts, the parts are enclosed in + * parenthesizes and comma-delimited, e.g., a URN whose key is the tuple [1, 2, 3] would be + * encoded as: + * + *

urn:li:example:(1,2,3) + */ public class Urn { + /** + * + * @deprecated Don't create the Urn string manually, use Typed Urns or {@link #create(String entityType, Object... + * tupleParts)} + */ + @Deprecated + public static final String URN_PREFIX = "urn:li:"; - static final String URN_PREFIX = "urn:li:"; - - // the URN format is urn:li:: - private static final Pattern URN_PATTERN = Pattern.compile("^" + URN_PREFIX + "(\\w+?):(.+)$"); + private static final String URN_START = "urn:"; + private static final String DEFAULT_NAMESPACE = "li"; private final String _entityType; - private final String _urn; - private final String _content; + private final TupleKey _entityKey; + private final String _namespace; + + // Used to speed up toString() in the common case where the Urn is built up + // from parsing an input string. + @Nullable + private String _cachedStringUrn; + + static { + Custom.registerCoercer(new UrnCoercer(), Urn.class); + } + + /** + * Customized interner for all strings that may be used for _entityType. + * Urn._entityType is by nature a pretty small set of values, such as "member", + * "company" etc. Due to this fact, when an app creates and keeps in memory a + * large number of Urn's, it may end up with a very big number of identical strings. + * Thus it's worth saving memory by interning _entityType when an Urn is instantiated. + * String.intern() would be a natural choice, but it takes a few microseconds, and + * thus may become too expensive when many (temporary) Urns are generated in very + * quick succession. Thus we use a faster CHM below. Compared to the internal table + * used by String.intern() it has a bigger memory overhead per each interned string, + * but for a small set of canonical strings it doesn't matter. + */ + private static final Map ENTITY_TYPE_INTERNER = new ConcurrentHashMap<>(); + /** + * Create a Urn given its raw String representation. + * @param rawUrn - the String representation of a Urn. + * @throws URISyntaxException - if the String is not a valid Urn. + */ public Urn(String rawUrn) throws URISyntaxException { - Matcher matcher = URN_PATTERN.matcher(rawUrn); - if (matcher.find()) { - this._urn = rawUrn; - this._entityType = matcher.group(1); - this._content = matcher.group(2); - } else { - throw new URISyntaxException(rawUrn, "URN deserialization error"); + ArgumentUtil.notNull(rawUrn, "rawUrn"); + _cachedStringUrn = rawUrn; + + if (!rawUrn.startsWith(URN_START)) { + throw new URISyntaxException( + rawUrn, + "Urn doesn't start with 'urn:'. Urn: " + rawUrn, + 0); + } + + int secondColonIndex = rawUrn.indexOf(':', URN_START.length() + 1); + _namespace = validateAndExtractNamespace(rawUrn, secondColonIndex); + + // First char of entityType must be [a-z] + if (!charIsLowerCaseAlphabet(rawUrn, secondColonIndex + 1)) { + throw new URISyntaxException( + rawUrn, + "First char of entityType must be [a-z]! Urn: " + rawUrn, + secondColonIndex + 1); + } + + int thirdColonIndex = rawUrn.indexOf(':', secondColonIndex + 2); + + // Case: urn:li:foo + if (thirdColonIndex == -1) { + _entityType = rawUrn.substring(secondColonIndex + 1); + if (!charsAreWordClass(_entityType)) { + throw new URISyntaxException( + rawUrn, + "entityType must have only [a-zA-Z0-9] chars. Urn: " + rawUrn); + } + _entityKey = new TupleKey(); + return; + } + + String entityType = rawUrn.substring(secondColonIndex + 1, thirdColonIndex); + if (!charsAreWordClass(entityType)) { + throw new URISyntaxException( + rawUrn, + "entityType must have only [a-zA-Z_0-9] chars. Urn: " + rawUrn); + } + + int numEntityKeyChars = rawUrn.length() - (thirdColonIndex + 1); + if (numEntityKeyChars <= 0) { + throw new URISyntaxException( + rawUrn, + "Urns with empty entityKey are not allowed. Urn: " + rawUrn); } + + _entityType = internEntityType(entityType); + _entityKey = TupleKey.fromString(rawUrn, thirdColonIndex + 1); + + // For the sake of backwards compatibility, we must ensure that + // new Urn("urn:li:y:(urn:li:z:1)").toString() == "urn:li:y:urn:li:z:1" + // Thus, if we detect a TupleKey with 1 part AND we had a paren in the + // input, we abort our optimization of storing the original URN. + if (_entityKey.size() == 1 && rawUrn.charAt(thirdColonIndex + 1) == '(') { + _cachedStringUrn = null; + } + } + + /** + * Create a Urn from an entity type and an encoded String key. The key is converted to a + * Tuple by parsing using @see TupleKey#fromString + * + * @param entityType - the entity type for the Urn + * @param typeSpecificString - the encoded string representation of a TupleKey + * @throws URISyntaxException if the typeSpecificString is not a valid encoding of a TupleKey + */ + public Urn(String entityType, String typeSpecificString) throws URISyntaxException { + this(DEFAULT_NAMESPACE, entityType, TupleKey.fromString(typeSpecificString)); } - public Urn(String entityType, String content) { - this._entityType = entityType; - this._content = content; - this._urn = URN_PREFIX + entityType + ":" + content; + public Urn(String entityType, TupleKey entityKey) { + this(DEFAULT_NAMESPACE, entityType, entityKey); } + public Urn(String namespace, String entityType, TupleKey entityKey) { + _namespace = namespace; + _entityType = entityType; + _entityKey = entityKey; + _cachedStringUrn = null; + } + + /** + * DEPRECATED - use {@link #createFromTuple(String, Object...)} + * Create a Urn from an entity type and a sequence of key parts. The key parts are converted + * to a tuple using @see TupleKey#create + * + * @param entityType - the entity type for the Urn + * @param tupleParts - a sequence of objects representing the key of the Urn + * @return - a new Urn object + */ + @Deprecated + public static Urn create(String entityType, Object... tupleParts) { + return new Urn(entityType, TupleKey.create(tupleParts)); + } + + /** + * DEPRECATED - use {@link #createFromTuple(String, java.util.Collection)} + * Create a Urn from an entity type and a sequence of key parts. The key parts are converted + * to a tuple using @see TupleKey#create + * + * @param entityType - the entity type for the Urn + * @param tupleParts - a sequence of objects representing the key of the Urn + * @return - a new Urn object + */ + @Deprecated + public static Urn create(String entityType, Collection tupleParts) { + return new Urn(entityType, TupleKey.create(tupleParts)); + } + + /** + * Create a Urn from an entity type and a sequence of key parts. The key parts are converted + * to a tuple using @see TupleKey#create + * + * @param entityType - the entity type for the Urn + * @param tupleParts - a sequence of objects representing the key of the Urn + * @return - a new Urn object + */ + public static Urn createFromTuple(String entityType, Object... tupleParts) { + return new Urn(entityType, TupleKey.create(tupleParts)); + } + + /** + * Create a Urn from an namespace, entity type and a sequence of key parts. The key parts are converted + * to a tuple using @see TupleKey#create + * + * @param namespace - The namespace of this urn. + * @param entityType - the entity type for the Urn + * @param tupleParts - a sequence of objects representing the key of the Urn + * @return - a new Urn object + */ + public static Urn createFromTupleWithNamespace(String namespace, String entityType, Object... tupleParts) { + return new Urn(namespace, entityType, TupleKey.create(tupleParts)); + } + + /** + * Create a Urn from an entity type and a sequence of key parts. The key parts are converted + * to a tuple using @see TupleKey#create + * + * @param entityType - the entity type for the Urn + * @param tupleParts - a sequence of objects representing the key of the Urn + * @return - a new Urn object + */ + public static Urn createFromTuple(String entityType, Collection tupleParts) { + return new Urn(entityType, TupleKey.create(tupleParts)); + } + + /** + * Create a Urn given its raw String representation. + * @param rawUrn - the String representation of a Urn. + * @throws URISyntaxException - if the String is not a valid Urn. + */ public static Urn createFromString(String rawUrn) throws URISyntaxException { return new Urn(rawUrn); } + /** + * Create a Urn given its raw CharSequence representation. + * @param rawUrn - the Char Sequence representation of a Urn. + * @throws URISyntaxException - if the String is not a valid Urn. + */ + public static Urn createFromCharSequence(CharSequence rawUrn) throws URISyntaxException { + ArgumentUtil.notNull(rawUrn, "rawUrn"); + return new Urn(rawUrn.toString()); + } + + /** + * Create a Urn from an entity type and an encoded String key. The key is converted to a + * Tuple by parsing using @see TupleKey#fromString + * + * @param entityType - the entity type for the Urn + * @param typeSpecificString - the encoded string representation of a TupleKey + * @throws URISyntaxException if the typeSpecificString is not a valid encoding of a TupleKey + */ + public static Urn createFromTypeSpecificString(String entityType, String typeSpecificString) + throws URISyntaxException { + return new Urn(entityType, typeSpecificString); + } + public String getEntityType() { return _entityType; } - public String getContent() { - return _content; + public String getNamespace() { + return _namespace; } - public Long getIdAsLong() { - return Long.valueOf(_content); + public TupleKey getEntityKey() { + return _entityKey; + } + + /** + * Convenience method to get the key's first tuple element as a String + * + * @return key's first tuple element + */ + public String getId() { + return _entityKey.getAs(0, String.class); } + /** + * Convenience method to get the key's first tuple element as an Integer + * + * @return key's first tuple element, coerced to Integer + */ public Integer getIdAsInt() { - return Integer.valueOf(_content); + return _entityKey.getAs(0, Integer.class); + } + + /** + * Convenience method to get the key's first tuple element as a Long + * + * @return key's first tuple element, coerced to Long + */ + public Long getIdAsLong() { + return _entityKey.getAs(0, Long.class); + } + + public Urn getIdAsUrn() { + return _entityKey.getAs(0, Urn.class); + } + + /** + * Return the namespace-specific string portion of this URN, i.e., + * everything following the "urn:<namespace>:" prefix. + * + * @return The namespace-specific string portion of this URN + */ + public String getNSS() { + return _entityType + (_entityKey.size() > 0 ? ':' + _entityKey.toString() : ""); + } + + @Override + public String toString() { + if (_cachedStringUrn != null) { + return _cachedStringUrn; + } + // This can be written to by multiple threads, but that's actually safe + // because Urn is immutable and all the threads will compute the same + // logical String (even though they may produce different String objects). + // So whichever thread "wins" the write race, the result is the same. + // This field also doesn't need to be volatile for memory visibility + // because it's just a cache, so if one thread sees a null here while + // another sees non-null, it's still fine: the thread seeing non-null + // uses the cache and the other thread computes a "new" value for the + // field which is again the same logical String. + _cachedStringUrn = URN_START + _namespace + ':' + getNSS(); + return _cachedStringUrn; } @Override public boolean equals(Object obj) { - if (obj != null && Urn.class.isAssignableFrom(obj.getClass())) { - Urn other = (Urn) obj; - return this._urn.equals(other._urn); - } else { + if (obj == null || !Urn.class.isAssignableFrom(obj.getClass())) { return false; } + Urn other = (Urn) obj; + return _entityType.equals(other._entityType) + && _entityKey.equals(other._entityKey) + && _namespace.equals(other._namespace); } @Override public int hashCode() { - return this._urn.hashCode(); + final int prime = 31; + int result = _entityType.hashCode(); + result = prime * result + _entityKey.hashCode(); + return result; } - @Override - public String toString() { - return _urn; + private static String validateAndExtractNamespace(String rawUrn, + int secondColonIndex) + throws URISyntaxException { + if (!charIsLowerCaseAlphabet(rawUrn, URN_START.length())) { + throw new URISyntaxException( + rawUrn, + "First char of Urn namespace must be [a-z]! Urn: " + rawUrn, + URN_START.length()); + } + + if (secondColonIndex == -1) { + throw new URISyntaxException( + rawUrn, + "Missing second ':' char. Urn: " + rawUrn); + } + + int namespaceLen = secondColonIndex - URN_START.length(); + if (namespaceLen > 32) { + throw new URISyntaxException( + rawUrn, + "Namespace length > 32 chars. Urn: " + rawUrn, + secondColonIndex); + } + + if (namespaceLen == 2 + && rawUrn.charAt(URN_START.length()) == 'l' + && rawUrn.charAt(URN_START.length() + 1) == 'i') { + // We want to avoid an allocation for the ultra-common "li" namespace! + return DEFAULT_NAMESPACE; + } + + String namespace = rawUrn.substring(URN_START.length(), secondColonIndex); + if (!charsAreValidNamespace(namespace)) { + throw new URISyntaxException( + rawUrn, + "Chars in namespace must be [a-z0-9-]!. Urn: " + rawUrn); + } + return namespace; } - public static boolean isUrn(@Nonnull String urn) { - try { - final Urn dummy = Urn.createFromString(urn); - return true; - } catch (URISyntaxException e) { + // Not using Character.isLowerCase on purpose because that is unicode-aware + // and we only need ASCII. Handling only ASCII is faster. + private static boolean charIsLowerCaseAlphabet(String input, int index) { + if (index >= input.length()) { return false; } + char c = input.charAt(index); + return c >= 'a' && c <= 'z'; } - public static Urn deserialize(String rawUrn) throws URISyntaxException { - return createFromString(rawUrn); + // These are [a-z0-9-] + private static boolean charsAreValidNamespace(String input) { + for (int index = 0; index < input.length(); index++) { + char c = input.charAt(index); + // Not using Character.isLowerCase etc on purpose because that is + // unicode-aware and we only need ASCII. Handling only ASCII is faster. + if (!((c >= 'a' && c <= 'z') || (c >= '0' && c <= '9') || c == '-')) { + return false; + } + } + return true; } - public static void validateUrn(@Nonnull Urn urn, @Nonnull String entityType) - throws URISyntaxException { - if (!entityType.equals(urn.getEntityType())) { - throw new URISyntaxException(urn.toString(), - String.format("This is not a valid %s urn", entityType)); + // Regex word class (\w) is defined as: [a-zA-Z_0-9] + // Source: https://docs.oracle.com/javase/8/docs/api/java/util/regex/Pattern.html + private static boolean charsAreWordClass(String input) { + for (int index = 0; index < input.length(); index++) { + char c = input.charAt(index); + // Not using Character.isLowerCase etc on purpose because that is + // unicode-aware and we only need ASCII. Handling only ASCII is faster. + if (!((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') + || (c >= '0' && c <= '9') || c == '_')) { + return false; + } } + return true; } - static { - Custom.registerCoercer(new DirectCoercer() { - public Object coerceInput(Urn object) throws ClassCastException { - return object.toString(); - } + /** + * Intern a string to be assigned to the _entityType field. + */ + private static String internEntityType(String et) { + // Most of the times this method is called, the canonical string is already + // in the table, so let's do a quick get() first. + String canonicalET = ENTITY_TYPE_INTERNER.get(et); + if (canonicalET != null) { + return canonicalET; + } - public Urn coerceOutput(Object object) throws TemplateOutputCastException { - try { - return Urn.createFromString((String) object); - } catch (URISyntaxException e) { - throw new TemplateOutputCastException("Invalid URN syntax: " + e.getMessage(), e); - } - } - }, Urn.class); + canonicalET = ENTITY_TYPE_INTERNER.putIfAbsent(et, et); + return canonicalET != null ? canonicalET : et; } } \ No newline at end of file diff --git a/core-models/src/main/javaPegasus/com/linkedin/common/urn/UrnCoercer.java b/core-models/src/main/javaPegasus/com/linkedin/common/urn/UrnCoercer.java new file mode 100644 index 000000000..b69b51964 --- /dev/null +++ b/core-models/src/main/javaPegasus/com/linkedin/common/urn/UrnCoercer.java @@ -0,0 +1,28 @@ +package com.linkedin.common.urn; + +import com.linkedin.data.template.DirectCoercer; +import com.linkedin.data.template.TemplateOutputCastException; +import java.net.URISyntaxException; + + +/** + * @author Josh Walker + */ +public class UrnCoercer implements DirectCoercer { + @Override + public Object coerceInput(Urn object) throws ClassCastException { + return object.toString(); + } + + @Override + public Urn coerceOutput(Object object) throws TemplateOutputCastException { + if (object.getClass() != String.class) { + throw new TemplateOutputCastException("Urn not backed by String"); + } + try { + return Urn.createFromString((String) object); + } catch (URISyntaxException e) { + throw new TemplateOutputCastException("Invalid URN syntax: " + e.getMessage(), e); + } + } +} diff --git a/core-models/src/main/javaPegasus/com/linkedin/common/urn/Urns.java b/core-models/src/main/javaPegasus/com/linkedin/common/urn/Urns.java new file mode 100644 index 000000000..d6993c598 --- /dev/null +++ b/core-models/src/main/javaPegasus/com/linkedin/common/urn/Urns.java @@ -0,0 +1,37 @@ +package com.linkedin.common.urn; + +import java.net.URISyntaxException; +import javax.annotation.Nonnull; + + +/** + * Static utilities for {@link Urn}. + */ +public final class Urns { + private Urns() { + } + + /** + * Create a Urn from an entity type and an encoded String key. The key is converted to a Tuple by parsing using {@link + * TupleKey#fromString(String)}. + * + *

This differs from the {@link Urn#Urn(String, String)} (and {@link + * Urn#createFromTypeSpecificString(String, String)}) in that this does not have a checked {@link + * java.net.URISyntaxException}, and instead will throw an {@link IllegalArgumentException} if the {@code + * typeSpecificString} fails to parse. + * + *

The ideal usage for this is when calling this method with compile time constant strings that are known to be + * good. If using dynamic or user input strings, it may be wiser to handle the {@link URISyntaxException}. + * + * @param entityType - the entity type for the Urn + * @param typeSpecificString - the encoded string representation of a TupleKey + * @throws IllegalArgumentException if the typeSpecificString is not a valid encoding of a TupleKey + */ + public static Urn createFromTypeSpecificString(@Nonnull String entityType, @Nonnull String typeSpecificString) { + try { + return new Urn(entityType, typeSpecificString); + } catch (URISyntaxException e) { + throw new IllegalArgumentException("Failed to create Urn.", e); + } + } +} diff --git a/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/ImmutableLocalDAO.java b/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/ImmutableLocalDAO.java index 53b58977e..d07c2f23c 100644 --- a/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/ImmutableLocalDAO.java +++ b/dao-impl/ebean-dao/src/main/java/com/linkedin/metadata/dao/ImmutableLocalDAO.java @@ -4,6 +4,7 @@ import com.google.common.io.Resources; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.Urns; import com.linkedin.data.template.RecordTemplate; import com.linkedin.data.template.UnionTemplate; import com.linkedin.metadata.dao.producer.DummyMetadataEventProducer; @@ -33,7 +34,7 @@ public class ImmutableLocalDAO expectedVersions = Arrays.asList(0L, 1L, 2L, 3L, 4L); List expectedUrns = Arrays.asList(makeFooUrn(0), makeFooUrn(1), makeFooUrn(2), makeFooUrn(3), makeFooUrn(4)); - assertVersionMetadata(results.getMetadata(), expectedVersions, expectedUrns, 1234L, new Urn("test", "foo"), - new Urn("test", "bar")); + assertVersionMetadata(results.getMetadata(), expectedVersions, expectedUrns, 1234L, Urns.createFromTypeSpecificString("test", "foo"), + Urns.createFromTypeSpecificString("test", "bar")); // List next page results = dao.list(AspectFoo.class, urn0, 5, 9); @@ -938,7 +939,7 @@ public void testListAspectsForAllUrns() { assertNotNull(results.getMetadata()); assertVersionMetadata(results.getMetadata(), Arrays.asList(0L), Arrays.asList(makeFooUrn(0)), 1234L, - new Urn("test", "foo"), new Urn("test", "bar")); + Urns.createFromTypeSpecificString("test", "foo"), Urns.createFromTypeSpecificString("test", "bar")); // Test list latest aspects ListResult latestResults = dao.list(AspectFoo.class, 0, 2); @@ -965,7 +966,7 @@ public void testListAspectsForAllUrns() { assertNotNull(results.getMetadata()); assertVersionMetadata(results.getMetadata(), Arrays.asList(1L), Arrays.asList(makeUrn(2)), 1234L, - new Urn("test", "foo"), new Urn("test", "bar")); + Urns.createFromTypeSpecificString("test", "foo"), Urns.createFromTypeSpecificString("test", "bar")); } @Test @@ -1471,10 +1472,10 @@ public void testGetWithExtraInfoLatestVersion() { new EbeanLocalDAO<>(EntityAspectUnion.class, _mockProducer, _server, FooUrn.class); FooUrn urn = makeFooUrn(1); AspectFoo v0 = new AspectFoo().setValue("foo"); - Urn creator1 = new Urn("test", "testCreator1"); - Urn impersonator1 = new Urn("test", "testImpersonator1"); - Urn creator2 = new Urn("test", "testCreator2"); - Urn impersonator2 = new Urn("test", "testImpersonator2"); + Urn creator1 = Urns.createFromTypeSpecificString("test", "testCreator1"); + Urn impersonator1 = Urns.createFromTypeSpecificString("test", "testImpersonator1"); + Urn creator2 = Urns.createFromTypeSpecificString("test", "testCreator2"); + Urn impersonator2 = Urns.createFromTypeSpecificString("test", "testImpersonator2"); addMetadataWithAuditStamp(urn, AspectFoo.class.getCanonicalName(), 0, v0, 123, creator1.toString(), impersonator1.toString()); AspectFoo v1 = new AspectFoo().setValue("bar"); @@ -1494,10 +1495,10 @@ public void testGetWithExtraInfoSpecificVersion() { new EbeanLocalDAO<>(EntityAspectUnion.class, _mockProducer, _server, FooUrn.class); FooUrn urn = makeFooUrn(1); AspectFoo v0 = new AspectFoo().setValue("foo"); - Urn creator1 = new Urn("test", "testCreator1"); - Urn impersonator1 = new Urn("test", "testImpersonator1"); - Urn creator2 = new Urn("test", "testCreator2"); - Urn impersonator2 = new Urn("test", "testImpersonator2"); + Urn creator1 = Urns.createFromTypeSpecificString("test", "testCreator1"); + Urn impersonator1 = Urns.createFromTypeSpecificString("test", "testImpersonator1"); + Urn creator2 = Urns.createFromTypeSpecificString("test", "testCreator2"); + Urn impersonator2 = Urns.createFromTypeSpecificString("test", "testImpersonator2"); addMetadataWithAuditStamp(urn, AspectFoo.class.getCanonicalName(), 0, v0, 123, creator1.toString(), impersonator1.toString()); AspectFoo v1 = new AspectFoo().setValue("bar"); @@ -1516,12 +1517,12 @@ public void testGetWithExtraInfoMultipleKeys() { EbeanLocalDAO dao = new EbeanLocalDAO<>(EntityAspectUnion.class, _mockProducer, _server, FooUrn.class); FooUrn urn = makeFooUrn(1); - Urn creator1 = new Urn("test", "testCreator1"); - Urn impersonator1 = new Urn("test", "testImpersonator1"); - Urn creator2 = new Urn("test", "testCreator2"); - Urn impersonator2 = new Urn("test", "testImpersonator2"); - Urn creator3 = new Urn("test", "testCreator3"); - Urn impersonator3 = new Urn("test", "testImpersonator3"); + Urn creator1 = Urns.createFromTypeSpecificString("test", "testCreator1"); + Urn impersonator1 = Urns.createFromTypeSpecificString("test", "testImpersonator1"); + Urn creator2 = Urns.createFromTypeSpecificString("test", "testCreator2"); + Urn impersonator2 = Urns.createFromTypeSpecificString("test", "testImpersonator2"); + Urn creator3 = Urns.createFromTypeSpecificString("test", "testCreator3"); + Urn impersonator3 = Urns.createFromTypeSpecificString("test", "testImpersonator3"); AspectFoo fooV0 = new AspectFoo().setValue("foo"); addMetadataWithAuditStamp(urn, AspectFoo.class.getCanonicalName(), 0, fooV0, 123, creator1.toString(), impersonator1.toString()); diff --git a/restli-resources/src/test/java/com/linkedin/metadata/restli/BaseVersionedAspectResourceTest.java b/restli-resources/src/test/java/com/linkedin/metadata/restli/BaseVersionedAspectResourceTest.java index 0b8e42f26..418590dcc 100644 --- a/restli-resources/src/test/java/com/linkedin/metadata/restli/BaseVersionedAspectResourceTest.java +++ b/restli-resources/src/test/java/com/linkedin/metadata/restli/BaseVersionedAspectResourceTest.java @@ -3,6 +3,7 @@ import com.google.common.collect.ImmutableList; import com.linkedin.common.AuditStamp; import com.linkedin.common.urn.Urn; +import com.linkedin.common.urn.Urns; import com.linkedin.data.template.RecordTemplate; import com.linkedin.metadata.dao.AspectKey; import com.linkedin.metadata.dao.BaseLocalDAO; @@ -81,10 +82,10 @@ public void testGet() { @Test public void testGetAllWithMetadata() { List foos = ImmutableList.of(new AspectFoo().setValue("v1"), new AspectFoo().setValue("v2")); - ExtraInfo extraInfo1 = - makeExtraInfo(ENTITY_URN, 1L, new AuditStamp().setActor(new Urn("testUser", "bar1")).setTime(0L)); - ExtraInfo extraInfo2 = - makeExtraInfo(ENTITY_URN, 2L, new AuditStamp().setActor(new Urn("testUser", "bar2")).setTime(0L)); + ExtraInfo extraInfo1 = makeExtraInfo(ENTITY_URN, 1L, + new AuditStamp().setActor(Urns.createFromTypeSpecificString("testUser", "bar1")).setTime(0L)); + ExtraInfo extraInfo2 = makeExtraInfo(ENTITY_URN, 2L, + new AuditStamp().setActor(Urns.createFromTypeSpecificString("testUser", "bar2")).setTime(0L)); ListResultMetadata listResultMetadata = new ListResultMetadata().setExtraInfos(new ExtraInfoArray(ImmutableList.of(extraInfo1, extraInfo2))); ListResult listResult = ListResult.builder().values(foos).metadata(listResultMetadata).build(); diff --git a/version.properties b/version.properties index 96ff41b64..67c606965 100644 --- a/version.properties +++ b/version.properties @@ -1 +1 @@ -version=0.1.* +version=0.2.*