diff --git a/check_add_vr.py b/check_add_vr.py
new file mode 100644
index 0000000..b6b9496
--- /dev/null
+++ b/check_add_vr.py
@@ -0,0 +1,188 @@
+'''
+  This code checks each entry in the dicom_ontology.owl file for an
+  explicit VR assignment.  The issue is that the "new" entries - 
+  the entries/tags not in the original set that were scraped from
+  the NEMA website and instantiated in Neurolex - do not have
+  VR values attached. The reason for this is that in the DICOM 
+  standard documentation a listing of all of the tags are in Parts
+  06 and 07, but these lists do not include the definitions. The
+  definitions are given in tables in other Parts, so have to 
+  be extracted from those tables.
+
+  This code checks to see if an entry specifies the VR value and 
+  if not, retrieves it from the file 
+  /home/karl/Work/INCF/XML_code/dicom_dict_vr.py
+  which was created by the code: 
+  /home/karl/Work/INCF/XML_code/vr_generate_dict.py
+
+  This code skips reads lines until it finds 
+  a line containing "Datatype Properties" and continues to read lines 
+  until is finds a line that contains "dicom#dicom".  It collects 
+  lines in the variable "entry" until it encounters a line containing 
+  " ." which is the ending character for an entry. If it finds
+  a line containing "dicom:VR" then it writes the entry after removing 
+  an extra blank line that was written into the original owl file as
+  the line previous to the line containing the VR info. The code then 
+  writes that entry to the output file and then reads on, collecting the 
+  next entry. If the entry does not contain VR line, it queries the 
+  dicom_dict_vr.py file to find the VR value and writes a line 
+  containing the VR value in the correct format.
+
+  Note that since the code starts collecting entries with the "Datatype
+  Properties" section, everything above that needs to be pasted into 
+  the resulting file to make a complete owl file.  Also, the Class section
+  will be unchanged since there are no VR values for these terms
+  (since they are not official DICOM tags, but terms I extracted from the 
+  official Part documents). The code checks for the presence of "xxxx" in 
+  the dicom tag. If that string is in the tag then just write the entry 
+  unchanged into the output file.
+
+
+Sample Complete Entry
+------------------------------------------------
+###  http://purl.org/nidash/dicom#dicom_00280011
+
+dicom:dicom_00280011 rdf:type owl:DatatypeProperty ;
+
+        rdfs:label "Columns"^^xsd:string ;
+
+        obo:IAO_0000114  obo:IAO_0000428 ;
+
+        obo:IAO_0000115 "Number of columns in the image."^^xsd:string ;
+
+        dicom:dicom_xxxx0065 "(0028,0011)"^^xsd:string ;
+
+        dicom:VR "US"^^xsd:string ;
+
+        rdfs:subClassOf dc:identifier .
+
+
+2018-09-04 - started
+2018-09-14 - ran on full owl file and checked result into GitHub repo 
+
+Karl Helmer
+Athinoula A. Martinos Center for Biomedical Imaging
+Massachusetts General Hospital, 2018
+
+'''
+
+import os, sys
+import re
+import ast
+
+#************************************************
+#input parameters
+inDir = '/home/karl/Work/INCF/dicom-ontology/'
+inFilename = 'dicom_ontology.owl'
+outDir = '/home/karl/Work/INCF/dicom-ontology/'
+outFilename = 'dicom_ontology_new.owl'
+vrDir = '/home/karl/Work/INCF/XML_code/'
+vrFilename = 'dicom_dict_vr.dict'
+startEntry = 'dicom#dicom'
+endEntry = ' .'
+startPlace = 'Datatype Properties'
+#************************************************
+
+def search_vr(entry):
+    #make 1 string rather than searching in each string individually
+    #this is faster than using some version of "any"
+    combined = ' '.join(entry) 
+    test = 'dicom:VR' in combined
+
+    return test
+
+
+def get_tag(entry):
+
+    # check each line in entry for the dicom tag
+    for e in entry:
+        if 'rdf:type' in e:
+            t = re.search('dicom_(.+?) ', e)
+            tag = t.group(1)
+            if tag:
+                #print "The DICOM tag is: ", tag
+                return tag
+            else:
+                #this will crash the program since no tag value is returned
+                print "no dicom tag value found in: ", entry 
+    
+
+def get_vr(vrDir, vrFilename, tag):
+    if tag:
+        vrF = open(vrDir+vrFilename, 'r').read()
+        dicomDict = ast.literal_eval(vrF)
+        if tag:
+            vr = dicomDict.get(tag,'')[0]
+            #print "vr value is = ", vr    
+            return vr
+    else:
+        print "vr value not found for tag = ", tag
+        return None
+
+
+def add_vr_to_entry(vr, entry):
+    vrLine = '        dicom:VR "{}"^^xsd:string   ;\n'.format(vr)
+    entry.insert(4,vrLine)
+    entry.insert(5,'\n')
+
+    return entry
+
+
+def write_entry(entry, outFile):
+    for line in entry:
+        outFile.write(line)
+    outFile.write('\n\n')
+
+
+def remove_sequential_blanks_in_entry(entry):
+    for i in range(len(entry)-1):
+        if entry[i] == entry[i+1]:
+            del entry[i]
+            break
+
+    return entry
+
+
+
+def main():
+
+    # open the dicom ontology file and start reading
+    with open(inDir+inFilename, 'r') as inFile, open(outDir+outFilename, 'w') as outFile:
+        entry = []
+        copy = False 
+        dt = False
+        for line in inFile: 
+            if startPlace in line: #find "Datatype Properties" line and start here
+                dt = True
+                print "starting place is:", startPlace
+
+            if dt == True: #start check after Datatype Prop line
+                if startEntry in line:
+                    copy = True
+                    #print 'start of entry'
+                if endEntry in line:
+                    copy = False
+                    entry.append(line)        #append the last line of entry
+                    #print 'end of entry'
+
+                    #now check the entry list as a whole
+                    vrFlag = search_vr(entry) #see if the entry has a VR line
+                    tag = get_tag(entry)  #extract the tag from entry
+                    print tag, vrFlag
+                    if vrFlag == False and ("xxxx" not in tag):
+                        vr = get_vr(vrDir, vrFilename, tag)      #get vr value from the dict
+                        entry1 = add_vr_to_entry(vr, entry)
+                        write_entry(entry1, outFile)    #write entry with added vr to outfile
+                        entry = []            #clear entry list when finished
+                    else:
+                        entry2 = remove_sequential_blanks_in_entry(entry)
+                        write_entry(entry2, outFile)    #write unchanged entry to outfile
+                        entry = []
+                elif copy:
+                    entry.append(line)
+
+
+
+##############################################################
+if __name__ == "__main__":
+    main()
diff --git a/create_dicom_ttl.0.4.py b/create_dicom_ttl.0.4.py
new file mode 100644
index 0000000..ea020ba
--- /dev/null
+++ b/create_dicom_ttl.0.4.py
@@ -0,0 +1,401 @@
+'''
+  This code takes the DICOM terms from the DICOM XML docbook 
+  (provided by David Clunie) and the CSV file from the DICOM terms
+  from Neurolex and creates a basic turtle file. Note that in the 
+  definitions there are XML links that don't show up in the extracted
+  text file from the DICOM docbook.  There are then phrases like
+  "See " that need to be removed from the definitions at the very end.
+
+ver 0.1 2017-03-14 - original; terms are camelcase labels
+ver 0.2 2017-03-28 - retrieve Neurolex ID's using term labels
+ver 0.3 2017-03-29 - retrieve neurolex ID using DICOM tags
+ver 0.4 2017-04-19 - change ID system to non-tag-based ID's
+                     reserve first 500 for other terms, rest for tags
+
+Karl Helmer
+Athinoula A. Martinos Center for Biomedical Imaging
+Massachusetts General Hospital
+
+'''
+
+import os, sys
+import re
+from operator import itemgetter
+import pickle
+
+#************************************************
+#input parameters
+#outDir = '/home/karl/Work/INCF/nidm/nidm/nidm/nidm-experiment/imports/'
+outDir = '/home/karl/Work/INCF/dicom-ontology/'
+outFile = 'dicom_numericalID.ttl'
+tagDefFile = 'all_tag_definition.txt'
+# The following file version is the one that replaces the greek mu with "u"
+# Use of mu means dealing with unicode processing 
+inFile = '/home/karl/Work/INCF/dicom-ontology/Clunie_DICOM_definitions-us.txt'
+nlxFile = '/home/karl/Work/INCF/dicom-ontology/Neurolex_dicom_terms_result.csv'
+dicomNS = 'dicom:'
+dicomPrefix = 'dicom_'
+rdfType = 'rdf:type'
+owlClass = 'owl:Class'
+owlDatatypeProperty = 'owl:DatatypeProperty'
+owlSameAs = 'owl:sameAs'
+rdfsLabel = 'rdfs:label'
+rdfsSub = 'rdfs:subClassOf'
+dicomTag = dicomNS+'dicom_00000065'#'Tag'
+vrInDicom = dicomNS+'VR'
+nlxID = 'nidm:neurolexID'
+dcID = 'dc:identifier'
+labelStr = 'label'
+subClass = 'subClassOf'
+provNS = 'prov:'
+xsdString = '^^xsd:string '
+definitionStr = 'obo:IAO_0000115'
+editorNote = 'obo:IAO_0000116 "To be discussed."'
+curationStatusReady = 'obo:IAO_0000114  obo:IAO_0000122 '
+curationStatusReqDisc = 'obo:IAO_0000114  obo:IAO_0000428 '
+classLink = 'http://purl.org/nidash/dicom#'
+nlxLink = 'http://uri.neuinfo.org/nif/nifstd/'
+idStart = 500
+#************************************************
+
+def write_ontology_header(ttlFile):
+
+    ttlFile.write("@prefix : <http://www.semanticweb.org/owl/owlapi/turtle#> .\n")
+    ttlFile.write("@prefix owl: <http://www.w3.org/2002/07/owl#> .\n")
+    ttlFile.write("@prefix rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#> .\n")
+    ttlFile.write("@prefix xml: <http://www.w3.org/XML/1998/namespace> .\n")
+    ttlFile.write("@prefix xsd: <http://www.w3.org/2001/XMLSchema#> .\n")
+    ttlFile.write("@prefix rdfs: <http://www.w3.org/2000/01/rdf-schema#> .\n")
+    ttlFile.write("@prefix nidm: <http://www.incf.org/ns/nidash/nidm#> .\n")
+    ttlFile.write("@prefix dc: <http://purl.org/dc/elements/1.1/> . .\n")
+    ttlFile.write("@prefix obo: <http://purl.obolibrary.org/obo/> .\n")
+    ttlFile.write("@prefix nlx: <http://uri.neuinfo.org/nif/nifstd/> .\n")
+    ttlFile.write("@base <http://www.owl-ontologies.com/Ontology1298855822.owl> .\n")
+    ttlFile.write("\n")
+    ttlFile.write(classLink+"[ rdf:type owl:Ontology ] .\n")
+
+
+def write_class_header(ttlFile):
+    ttlFile.write('\n')
+    ttlFile.write('#################################################################\n')
+    ttlFile.write('#\n')
+    ttlFile.write('#    Datatype Properties\n')
+    ttlFile.write('#\n')
+    ttlFile.write('#################################################################\n')
+    ttlFile.write('\n')
+    
+
+# The following two functions are used to create camelCase version of DICOM tag label
+#def repl_func(m):
+#    """process regular expression match groups for word upper-casing problem"""
+#    return m.group(1) + m.group(2).upper()
+
+
+#def create_camelcase_label(s):
+#    '''Capitalizes each word, removes non-alphanumeric characters 
+#       and spaces from the label  '''
+#    s = re.sub("(^|\s)(\S)", repl_func, s)
+#    s = re.sub('[^a-zA-Z0-9]+',"", s)
+#    s.replace(" ", "")
+#    if s[0].isalpha:
+#        s = s[0].lower() + s[1:]
+#
+#    return s
+
+
+# The following two functions are used to match term labels from the two input files
+# Used in string_match function
+def max_list_value(list,i):
+    # this function returns a tuple of the (index, maxValue) for a list
+    # you supply the list and the index of the place within the list that you 
+    # want the max of.
+    return max(enumerate(sub[i] for sub in list), key=itemgetter(1))
+
+
+def string_match(label,nlxData):
+    # this code takes an input string (dicom tag label) and tries to find an 
+    # exact match in another list of labels. If no exact match is found, finds 
+    # the closest match from a list of labels in which there is at least one 
+    # match between the original label and the possible label.
+
+    # find the length of the list of possible labels
+    neuroLines = len(nlxData)
+    exactMatch = 'False'
+    noMatch = 'True' 
+
+    print "considering DICOM file label = "+label
+
+    for i in range(neuroLines):
+        partMatch = 'False'
+        tempStore = []
+        # check for exact match
+        #print "DICOM_label=",label, " nlxData_label=",nlxData[i][0]
+        if nlxData[i][0] == label:
+            vrCode = nlxData[i][3]
+            dicomTagID = nlxData[i][2]
+            neurolexID = nlxData[i][1]
+            print "match for ", label
+            exactMatch = 'True'
+            noMatch = 'False'
+            break
+    # if no exact match, find how many words in the orig label are in the possible label 
+    # if none, go to next nlxLabel in nlxData
+        else:
+            matchCount = 0
+            labelPart = label.split()
+            filteredLabelPart = [s for s in labelPart if len(s) > 2] #don't match 2-or-less length words
+            for lp in filteredLabelPart:
+                #print lp
+                if lp in nlxData[i][0]:
+                    matchCount = 1+matchCount
+
+            # if at least one matching word, store needed info as list in list tempStore
+            if matchCount != 0:
+                partMatch = 'True'
+                tempStore.append([nlxData[i][0], nlxData[i][2], nlxData[i][1], nlxData[i][3], matchCount])
+
+    if (partMatch == 'True') and (exactMatch == 'False'):
+        print "Dicom label = "+label+"\n"
+        print "Neurolex entry = ", tempStore
+
+        if len(tempStore) > 1: #if only a single term matches then assume that it's not a match
+            isMatch = input("Is this a match (1/0)?")
+            if isMatch:
+                print 'partial match for '+label
+                maxAndWhere = max_list_value(tempStore,-1)  # tuple
+                print maxAndWhere
+                k=maxAndWhere[0]    #put the index of the best match into k
+                # put values for best match into variables for return 
+                neurolexID = tempStore[k][2]
+                dicomTagID = tempStore[k][1]
+                vrCode = tempStore[k][3]
+                noMatch = 'False'
+            else:
+                partMatch = 'False'
+        else:
+            partMatch = 'False'
+
+    if (partMatch == 'False') and (exactMatch == 'False'):
+        noMatch = 'True'
+        neurolexID = 'NF'
+        dicomTagID = 'NF'
+        vrCode = 'NF'
+        print "no match for "+label
+            
+    return neurolexID, dicomTagID, vrCode, noMatch
+
+
+
+def tag_match(tag,nlxData):
+    '''
+    This code takes an input string (dicom tag) and tries to find an 
+    exact match in another list of labels. The two strings have different
+    initial formats so first have to put them in common format (8char string,
+    no non-alphanumeric characters)
+    '''
+
+    # set match status flags
+    neuroLines = len(nlxData)
+    noMatch = 'True'
+    neurolexID = 'NF'
+    vrCode = 'NF'
+
+    # get the DICOM tag from the Clunie file in the format (XXXX,XXXX)
+    dicomTagIDGroup = re.search(r'.*\(([A-Za-z0-9\,]*)\)', tag)
+    if not dicomTagIDGroup:
+        print "bad dicom tag format for: "+tag
+    else:
+        dicomTagPartsList = dicomTagIDGroup.group(1).split(",")
+        dicomTagID = dicomTagPartsList[0]+dicomTagPartsList[1]
+        #print dicomTag
+
+    for i in range(neuroLines):
+        # This assumes that the correctly formatted tag is present 
+        # (already checked in main)
+        # To get the from XXXX_XXXX to XXXXXXXX
+        nlxDicomTagPartsList = nlxData[i][2].split("_")
+        nlxDicomTagID = nlxDicomTagPartsList[0]+nlxDicomTagPartsList[1]
+        #print nlxDicomTag
+
+        # check for exact match
+        if nlxDicomTagID == dicomTagID:
+            vrCode = nlxData[i][3]
+            neurolexID = nlxData[i][1]
+            noMatch = 'False'
+            break
+        else:
+            pass
+
+
+    if noMatch == 'True':
+        print "no match for "+dicomTagID
+    else:
+        print "match for "+dicomTagID
+
+    return neurolexID, dicomTagID, vrCode, noMatch
+
+
+
+def main():
+    nlxData = []
+    neurolexID = ''
+    dicomTagID = ''
+    vrCode = ''
+    ttlFile = open(outDir+outFile, "w")
+
+    write_ontology_header(ttlFile)
+    write_class_header(ttlFile)
+
+    # Neurolex/Interlex section*****************************
+    # put the label, Neurolex ID (if present), DICOM ID, and VR into a file that will be
+    # matched up to the label from the DICOM (Clunie-supplied) file 
+    nlxFileData = open(nlxFile, "r")
+    entries = nlxFileData.readlines()
+    for entry in entries:
+
+        dicomIDGroup = re.search(r'.*DICOM:([A-Za-z0-9\_]*),', entry)
+        if not dicomIDGroup:
+            print "no dicom ID found in: ", entry
+            dicomID = "NF "
+        else:
+            dicomID = dicomIDGroup.group(1)
+            #print dicomID
+
+
+        nlxIDGroup = re.search(r'.*,(nlx_[0-9]*),', entry)
+        if not nlxIDGroup:
+            print "no nlx ID found in: ", entry
+            nlxID = "NF "
+        else:
+            nlxID = nlxIDGroup.group(1)
+            #print nlxID 
+
+
+        vr =  entry[-3:].rstrip("\n")  #get rid of newline character 
+        vrGroup = re.search(r'(\"\,*)', vr)
+        if vrGroup:
+            if "US or SS" in entry:
+                vr = "US or SS"
+            elif "OB or OW" in entry:
+                vr = "OB or OW"
+            elif "OW or OB" in entry:
+                vr = "OB or OW"
+            elif "OP or OW" in entry:
+                vr = "OP or OW"
+            elif "US,SS,or OW" in entry:
+                vr = "US or SS"
+            elif "US or SS or OW" in entry:
+                vr = "US or SS or OW"
+            elif "does not exist" in entry:
+                vr = "does not exist"
+            else:
+                print "bad or missing VR value found in: ", entry
+                vr = "NF "
+        else:
+            vr = vr
+
+        #vr = vr.rstrip("\n")  #get rid of the newline character that appears 
+
+        # problem her is that sometimes there are "" around Category sometimes not
+        dicomLabelGroup =  re.search(r'.*:Category:([A-Za-z0-9\s\-\/\(\)\'\&\"]*),', entry)
+        if not dicomLabelGroup:
+            print "no dicom label found in: ", entry
+            dicomLabel = "NF "
+        else:
+            dicomLabel = dicomLabelGroup.group(1)
+            if dicomLabel[-1] == '"':
+                dicomLabel = dicomLabel[:-1]
+            #print dicomLabel
+
+        # store extracted strings in a list for future retrieval - this is all relevant NLX data
+        nlxData.append([dicomLabel, nlxID, dicomID, vr])
+
+
+    # DICOM document section************************************
+    # get the label, tag, definition for each term
+    tagList = []
+    multiTags = []
+    allEntries = []
+    idStart = 500
+    dicomFileData = open(inFile, "r")
+    lines = dicomFileData.readlines()
+    for line in lines:
+
+        # create a 5 digit ID with leading zeros to ID the tags
+        idStart = idStart + 1
+        numericalTagID = str(idStart).zfill(5)
+ 
+        # get the label
+        labelGroup = re.search(r'.*Name="([A-Za-z0-9\s\-\/\(\)\'\&]*)"\t', line)
+        label = labelGroup.group(1)
+        # get the tab
+        tagGroup = re.search(r'.*Tag=("[A-Za-z0-9\s\,\(\)]*")\t', line)
+        tag = tagGroup.group(1)  #left the quotes around the tag
+        # get the definition
+        definitionGroup = re.search(r'.*Description=(".*)', line)
+        definition = definitionGroup.group(1) # has quotes already
+
+        # find the corresponding term from the extracted Neurolex info
+        #neurolexID, dicomTagID, vrCode, noMatch = string_match(label,nlxData)
+        neurolexID, dicomTagID, vrCode, noMatch = tag_match(tag,nlxData)
+
+        #tempList = [dicomTagID, definition]
+        #allEntries.append(tempList)
+
+        # determine which tags have multiple entries and create a non-repeating list
+        # of the multiple-entry tag (multitags). tagList is a non-repeating list of all tags.
+        # {just store tag}
+        #if dicomTagID in tagList and dicomTagID not in multiTags:
+        #    multiTags.append(dicomTagID)
+        #else:
+        #    tagList.append(dicomTagID)
+
+        #{store all multiple tags and their definitions} - HOW TO STORE FIRST ONE OF MULTIPLE?
+        # look at each tag in turn and all tags after that tag.
+        #if dicomTagID in tagList and dicomTagID not in multiTags:
+        #    tempList = [dicomTagID, definition]
+        #    multiTags.append(tempList)
+        #else:
+        #    tagList.append(dicomTagID)
+
+        #labelCC = create_camelcase_label(label)
+        #print label
+        ttlFile.write("###  "+classLink+dicomPrefix+numericalTagID+"\n")
+        ttlFile.write("\n")
+        ttlFile.write(dicomNS+dicomPrefix+numericalTagID+" "+rdfType+" "+owlDatatypeProperty+"  ;\n")
+        ttlFile.write("\n")
+        ttlFile.write("        "+rdfsLabel+" "+'"'+label+'"'+xsdString+";\n")
+        ttlFile.write("\n")
+        ttlFile.write("        "+curationStatusReqDisc+";\n")
+        ttlFile.write("\n")
+        ttlFile.write("        "+definitionStr+" "+definition+xsdString+";\n")
+        ttlFile.write("\n")
+        ttlFile.write("        "+dicomTag+" "+tag+xsdString+";\n")
+        ttlFile.write("\n")
+
+        if noMatch == 'False':
+            ttlFile.write("        "+owlSameAs+" "+nlxID+"  ;\n")
+            ttlFile.write("\n")
+            ttlFile.write("        "+vrInDicom+" "+'"'+vrCode+'"'+xsdString+"  ;\n")
+            ttlFile.write("\n")
+            ttlFile.write("        "+rdfsSub+" "+dcID+"  .\n")
+        else:
+            ttlFile.write("        "+rdfsSub+" "+dcID+"  .\n")
+
+        ttlFile.write("\n")
+
+        ttlFile.write("\n")
+        ttlFile.write("\n")
+
+    ttlFile.close()
+
+    #print multiTags
+    #print len(multiTags)
+
+    # write out the list of all tag and defs for later sorting
+    #with open(outDir+tagDefFile, "wb") as fp:
+    #    pickle.dump(allEntries,fp)
+    #fp.close()
+##############################################################
+if __name__ == "__main__":
+    main()
diff --git a/vr_generate_dict.py b/vr_generate_dict.py
new file mode 100644
index 0000000..bb8587c
--- /dev/null
+++ b/vr_generate_dict.py
@@ -0,0 +1,249 @@
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# generate_dict_2015b.py
+
+"""
+    Reformat a dicom dictionary PS3.6 and PS3.7 docbook xml files (from e.g. standard docs) to Python syntax
+    Write the main DICOM dictionary elements as a python dict called main_attributes with format:
+    Tag: ('VR', 'VM', "Name", 'is_retired', 'Keyword')
+    Where
+        Tag is a 32-bit representation of the group, element as 0xggggeeee (e.g. 0x00181600)
+        VR is the Value Representation (e.g. 'OB' or 'OB or UI' or 'NONE')
+        VM is the Value Multiplicity (e.g. '1' or '2-2n' or '3-n' or '1-32')
+        Name is the DICOM Element Name (or Message Field for Command Elements) (e.g. 'Tomo Time' or 'Retired-blank' or 'Time Source')
+        is_retired is '' if not retired, 'Retired' otherwise (e.g. '' or 'Retired')
+        Keyword is the DICOM Keyword (e.g. 'TomoTime' or 'TimeSource')
+    Also write the repeating groups or elements (e.g. group "50xx") as a python dict called
+    mask_attributes as masks that can be tested later for tag lookups that didn't work
+    using format:
+    'Tag': ('VR', 'VM', "Name", 'is_retired', 'Keyword')
+    Where
+        Tag is a string representation of the element (e.g. '002031xx' or '50xx0022')
+"""
+
+# Based on Rickard Holmberg's docbook_to_dict2013.py
+# http://code.google.com/r/rickardholmberg-pydicom/
+# but rewritten for not using bs4 (and slight change for standard v2015b)
+
+# Based on Rickard Holmberg's generate_dict_2015b.py - found online as part of the "pydicom" package.
+# Note that this doesn't grab the definitions - the dictionary in Part 06 doesn't
+# include them in the table that I pull info from. This code is used to generate a python 
+# dict that contains tags and VR's. I keep the code to get latest docbook from URL, 
+# but currently pull from offline/local version of latest docbook so I don't have to be online.
+# Also not that this code pulls from the tables in Part 06 and the "Command Fields"
+# and "Retired Command Fields" tables in Part 07. Originally, this was written out as two separate
+# dictionaries in one named dictionary.  I decided to simplify this to one single dictionary 
+# with no name, since I don't need the elements in the 2nd dict.
+# K. Helmer
+# Massachusetts General Hospital, 2018
+
+import urllib2
+import xml.etree.ElementTree as ET
+import os
+
+# pydict_filename = '../dicom/_dicom_dict.py'   #this is the filename format expected for pydicom codebase
+pydict_filename = 'dicom_dict_vr.dict'  # KGH 
+main_dict_name = 'DicomDictionary'   #KGH - not used; only want dict in file, not "name = <dict>"
+mask_dict_name = 'RepeatersDictionary'
+
+def write_dict(f, dict_name, attributes, tagIsString):  #KGH-write out the tag as a string in both cases
+    if tagIsString:
+        #entry_format = """'{Tag}': ('{VR}', '{VM}', '{Name}', '{Retired}', '{Keyword}')"""
+        entry_format = """"{Tag}": ("{VR}", "{VM}", "{Name}", "{Retired}", "{Keyword}")"""  #KGH - try double quotes because some Names have apostrophe's in them, e.g., "Referring Physician's Name"
+    else:
+        #entry_format = """{Tag}: ('{VR}', '{VM}', '{Name}', '{Retired}', '{Keyword}')"""  #original
+        #entry_format = """'{Tag}': ('{VR}', '{VM}', '{Name}', '{Retired}', '{Keyword}')""" #KGH - make tag a string
+        entry_format = """"{Tag}": ("{VR}", "{VM}", "{Name}", "{Retired}", "{Keyword}")"""  #KGH - try double quotes because some Names have apostrophe's in them, e.g., "Referring Physician's Name"
+
+    #f.write("\n%s = {\n    " % dict_name)
+    #f.write("%s = {\n    " % dict_name)   #KGH - no initial newline necessary + don't want "name = {}"
+    f.write("{\n    ")      #KGH - just start with dict "{"
+    f.write(",\n    ".join(entry_format.format(**attr) for attr in attributes))
+    f.write("\n}\n")
+
+
+def parse_docbook_table(book_root, caption, empty_field_name="Retired"):
+    """ Parses the given XML book_root for the table with caption matching caption for DICOM Element data
+    Returns a list of dicts with each dict representing the data for an Element from the table
+    """
+
+    br = '{http://docbook.org/ns/docbook}' # Shorthand variable for book_root
+
+    # Find the table in book_root with caption
+    for table in book_root.iter('%stable' %br):
+        if table.find('%scaption' %br).text == caption:
+
+            def parse_header(header_row):
+                """ Parses the table's thead/tr row, header_row, for the column headers """
+                field_names = []
+
+                # The header_row should be <thead><tr>...</tr></thead>
+                # Which leaves the following:
+                #   <th><para><emphasis>Header 1</emphasis></para></th>
+                #   <th><para><emphasis>Header 2</emphasis></para></th>
+                #   etc...
+                # Note that for the part06 tables the last col header (Retired) is:
+                #   <th><para/></th>
+                for x in header_row.iter('%sth' %br):
+                    # If there is an emphasis tag under the para tag then its text is the column header
+                    if x.find('%spara' %br).find('%semphasis' %br) is not None:
+                        col_label = x.find('%spara' %br).find('%semphasis' %br).text
+                        field_names.append(col_label)
+
+                    # If there isn't an emphasis tag under the para tag then it must be the Retired header
+                    else:
+                        field_names.append("Retired")
+
+                return field_names
+
+            # Get the column headers
+            field_names = parse_header(table.find('%sthead' %br).find('%str' %br))
+
+            def parse_row(field_names, row):
+                """ Parses the table's tbody tr row, row, for the DICOM Element data
+                Returns a list of dicts {header1 : val1, header2 : val2, ...} with each list an Element
+                """
+
+                cell_values = []
+
+                # The row should be <tbody><tr>...</tr></tbody>
+                # Which leaves the following:
+                #   <td><para>Value 1</para></td>
+                #   <td><para>Value 2</para></td>
+                #   etc...
+                # Some rows are
+                #   <td><para><emphasis>Value 1</emphasis></para></td>
+                #   <td><para><emphasis>Value 2</emphasis></para></td>
+                #   etc...
+                # There are also some without text values
+                #   <td><para/></td>
+                #   <td><para><emphasis/></para></td>
+
+                for cell in row.iter('%spara' %br):
+                    # If we have an emphasis tag under the para tag
+                    emph_value = cell.find('%semphasis' %br)
+                    if emph_value is not None:
+                        # If there is a text value add it, otherwise add ""
+                        if emph_value.text is not None:
+                            cell_values.append(emph_value.text.strip().replace(u"\u200b", "")) #200b is a zero width space
+                        else:
+                            cell_values.append("")
+                    # Otherwise just grab the para tag text
+                    else:
+                        if cell.text is not None:
+                            cell_values.append(cell.text.strip().replace(u"\u200b", ""))
+                        else:
+                            cell_values.append("")
+
+                return {key : value for key, value in zip(field_names, cell_values)}
+
+            # Get all the Element data from the table
+            attrs = [parse_row(field_names, row) for row in table.find('%stbody' %br).iter('%str' %br)]
+            return attrs
+
+attrs = []
+
+# KGH - first look in Part 06 for three specific tables (see attrs += statements for table names)
+#url = 'http://medical.nema.org/medical/dicom/current/source/docbook/part06/part06.xml'
+#response = urllib2.urlopen(url)
+fLoc = '/home/karl/Work/INCF/DICOM_docbook_latest/source/docbook/part06/part06.xml'  #KGH
+response = open(fLoc)   #KGH
+tree = ET.parse(response)
+root = tree.getroot()
+response.close()  # KGH
+
+attrs += parse_docbook_table(root, "Registry of DICOM Data Elements")
+attrs += parse_docbook_table(root, "Registry of DICOM File Meta Elements")
+attrs += parse_docbook_table(root, "Registry of DICOM Directory Structuring Elements")
+#KGH ---------------------------------------------------------------
+
+#KGH - Then look at Part 07 that has the command field tables
+fLoc = '/home/karl/Work/INCF/DICOM_docbook_latest/source/docbook/part07/part07.xml'  #KGH
+response = open(fLoc)   #KGH
+#url = 'http://medical.nema.org/medical/dicom/current/source/docbook/part07/part07.xml'
+#response = urllib2.urlopen(url)
+tree = ET.parse(response)
+root = tree.getroot()
+
+command_attrs = parse_docbook_table(root, "Command Fields") # Changed from 2013 standard
+for attr in command_attrs:
+    attr["Name"] = attr["Message Field"]
+    attr["Retired"] = ""
+
+retired_command_attrs = parse_docbook_table(root, "Retired Command Fields")
+for attr in retired_command_attrs:
+    attr["Name"] = attr["Message Field"]
+    attr["Retired"] = "Retired"
+
+attrs += command_attrs
+attrs += retired_command_attrs
+#KGH -------------------------------------------------------------------------------
+
+
+# KGH - attrs dict now populated; sort by tag value
+attrs = sorted(attrs, key=lambda x: x["Tag"])
+
+main_attributes = []
+mask_attributes = []
+
+#KGH -check to see format of attrs key-value pair
+#print attrs[0]["Description of Field"]
+
+for attr in attrs:
+    group, elem = attr['Tag'][1:-1].split(",")
+
+    #KGH - unused as tables in Part 06 doesn't include definitions in tables
+    #KGH check to see if Description of Field exists; if not create key and make value a blank string
+    #if 'Description of Field' in attr:
+    #    pass
+    #else:
+    #    attr['Description of Field'] = 'None'
+
+    # e.g. (FFFE,E000)
+    if attr['VR'] == 'See Note':
+        attr['VR'] = 'NONE'
+
+    # e.g. (0018,1153), (0018,8150) and (0018,8151)
+    attr["Name"] = attr["Name"].replace(u"µ", "u") # replace micro symbol
+
+    # e.g. (0014,0023) and (0018,9445)
+    if attr['Retired'] in ['RET', 'RET - See Note']:
+        attr['Retired'] = 'Retired'
+
+    # e.g. (0008,0102), (0014,0025), (0040, A170)
+    if attr['Retired'] in ['DICOS', 'DICONDE', 'See Note']:
+        attr['Retired'] = ''
+
+    # e.g. (0028,1200)
+    attr['VM'] = attr['VM'].replace(" or ", " ")
+
+    # If blank then add dummy vals
+    # e.g. (0018,9445) and (0028,0020)
+    if attr['VR'] == '' and attr['VM'] == '':
+        attr['VR'] = 'OB'
+        attr['VM'] = '1'
+        attr['Name'] = 'Retired-blank'
+
+    # handle retired 'repeating group' tags
+    # e.g. (50xx,eeee) or (gggg,31xx)
+    if 'x' in group or 'x' in elem:
+        attr["Tag"] = group + elem
+        mask_attributes.append(attr)
+    else:
+        #attr["Tag"] = '0x%s%s' %(group, elem)  
+        attr["Tag"] = '%s%s' %(group, elem)   #KGH - writing out as string; don't need 32-bit value
+        main_attributes.append(attr)
+
+py_file = file(pydict_filename, "wb")
+#KGH - the following 3 write lines are for pydicom only and not needed for NIDM
+#py_file.write("# %s\n" % os.path.basename(pydict_filename))
+#py_file.write('"""DICOM data dictionary auto-generated by %s"""\n' % os.path.basename(__file__))
+#py_file.write('from __future__ import absolute_import\n')
+write_dict(py_file, main_dict_name, main_attributes, tagIsString=False)
+#write_dict(py_file, mask_dict_name, mask_attributes, tagIsString=True)
+
+py_file.close()
+
+print ("Finished creating python file %s containing the dicom dictionary" % pydict_filename)
+print ("Wrote %d tags" % (len(main_attributes) + len(mask_attributes)))