Bouke · TNGPS · May 17, 2018 · May 18, 2018 · May 18, 2018 · May 18, 2018
diff --git a/mailmerge.py b/mailmerge.py
@@ -4,11 +4,15 @@
 from lxml.etree import Element
 from lxml import etree
 from zipfile import ZipFile, ZIP_DEFLATED
+from random import randint
 
 NAMESPACES = {
     'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main',
     'mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
     'ct': 'http://schemas.openxmlformats.org/package/2006/content-types',
+    'wp': 'http://schemas.openxmlformats.org/drawingml/2006/wordprocessingDrawing',
+    'a': 'http://schemas.openxmlformats.org/drawingml/2006/main',
+    'pic': 'http://schemas.openxmlformats.org/drawingml/2006/picture',
 }
 
 CONTENT_TYPES_PARTS = (
@@ -26,6 +30,12 @@ def __init__(self, file, remove_empty_tables=False):
         self.parts = {}
         self.settings = None
         self._settings_info = None
+
+        self.media = {}         # new images to add indexed by embed id
+        self.rels = None        # etree for relationships
+        self._rels_info = None  # zi info block for rels
+        self.RELS_NAMESPACES = {'ns': None, 'od': None}
+
         self.remove_empty_tables = remove_empty_tables
 
         try:
@@ -37,6 +47,13 @@ def __init__(self, file, remove_empty_tables=False):
                 elif type == CONTENT_TYPE_SETTINGS:
                     self._settings_info, self.settings = self.__get_tree_of_file(file)
 
+            # get the rels for image mappings
+            try:
+                self._rels_info, self.rels = self.__get_tree_of_file('word/_rels/document.xml.rels')
+                self.RELS_NAMESPACES['ns'] = self.rels.getroot().nsmap.get(None)
+                self.RELS_NAMESPACES['od'] = self.rels.getroot().nsmap.get(None).replace('package', 'officeDocument')
+            except:
+                pass
             to_delete = []
 
             r = re.compile(r' MERGEFIELD +"?([^ ]+?)"? +(|\\\* MERGEFORMAT )', re.I)
@@ -108,7 +125,10 @@ def __init__(self, file, remove_empty_tables=False):
             raise
 
     def __get_tree_of_file(self, file):
-        fn = file.attrib['PartName' % NAMESPACES].split('/', 1)[1]
+        if isinstance(file, etree._Element):
+            fn = file.get('PartName').split('/', 1)[1]
+        else:
+            fn = file
         zi = self.zip.getinfo(fn)
         return zi, etree.parse(self.zip.open(zi))
 
@@ -125,8 +145,14 @@ def write(self, file):
                 elif zi == self._settings_info:
                     xml = etree.tostring(self.settings.getroot())
                     output.writestr(zi.filename, xml)
+                elif zi == self._rels_info:
+                    xml = etree.tostring(self.rels.getroot())
+                    output.writestr(zi.filename, xml)
                 else:
                     output.writestr(zi.filename, self.zip.read(zi))
+            # add new images to media folder is we have images merged
+            for img_id, img_data in self.media.items():
+                output.writestr('media/{}.png'.format(img_id), img_data)
 
     def get_merge_fields(self, parts=None):
         if not parts:
@@ -141,7 +167,7 @@ def merge_templates(self, replacements, separator):
         """
         Duplicate template. Creates a copy of the template, does a merge, and separates them by a new paragraph, a new break or a new section break.
         separator must be :
-        - page_break : Page Break. 
+        - page_break : Page Break.
         - column_break : Column Break. ONLY HAVE EFFECT IF DOCUMENT HAVE COLUMNS
         - textWrapping_break : Line Break.
         - continuous_section : Continuous section break. Begins the section on the next paragraph.
@@ -151,58 +177,58 @@ def merge_templates(self, replacements, separator):
         - oddPage_section : oddPage section break. section begins on the next odd-numbered page, leaving the next even page blank if necessary.
         """
 
-        #TYPE PARAM CONTROL AND SPLIT
-        valid_separators = {'page_break', 'column_break', 'textWrapping_break', 'continuous_section', 'evenPage_section', 'nextColumn_section', 'nextPage_section', 'oddPage_section'}
+        # TYPE PARAM CONTROL AND SPLIT
+        valid_separators = {'page_break', 'column_break', 'textWrapping_break', 'continuous_section',
+                            'evenPage_section', 'nextColumn_section', 'nextPage_section', 'oddPage_section'}
         if not separator in valid_separators:
             raise ValueError("Invalid separator argument")
         type, sepClass = separator.split("_")
-
 
-        #GET ROOT - WORK WITH DOCUMENT
+        # GET ROOT - WORK WITH DOCUMENT
         for part in self.parts.values():
             root = part.getroot()
             tag = root.tag
             if tag == '{%(w)s}ftr' % NAMESPACES or tag == '{%(w)s}hdr' % NAMESPACES:
                 continue
-		
+
             if sepClass == 'section':
 
-                #FINDING FIRST SECTION OF THE DOCUMENT
+                # FINDING FIRST SECTION OF THE DOCUMENT
                 firstSection = root.find("w:body/w:p/w:pPr/w:sectPr", namespaces=NAMESPACES)
                 if firstSection == None:
                     firstSection = root.find("w:body/w:sectPr", namespaces=NAMESPACES)
-			
-                #MODIFY TYPE ATTRIBUTE OF FIRST SECTION FOR MERGING
+
+                # MODIFY TYPE ATTRIBUTE OF FIRST SECTION FOR MERGING
                 nextPageSec = deepcopy(firstSection)
                 for child in nextPageSec:
-                #Delete old type if exist
+                    # Delete old type if exist
                     if child.tag == '{%(w)s}type' % NAMESPACES:
                         nextPageSec.remove(child)
-                #Create new type (def parameter)
-                newType = etree.SubElement(nextPageSec, '{%(w)s}type'  % NAMESPACES)
-                newType.set('{%(w)s}val'  % NAMESPACES, type)
+                # Create new type (def parameter)
+                newType = etree.SubElement(nextPageSec, '{%(w)s}type' % NAMESPACES)
+                newType.set('{%(w)s}val' % NAMESPACES, type)
 
-                #REPLACING FIRST SECTION
+                # REPLACING FIRST SECTION
                 secRoot = firstSection.getparent()
                 secRoot.replace(firstSection, nextPageSec)
 
-            #FINDING LAST SECTION OF THE DOCUMENT
+            # FINDING LAST SECTION OF THE DOCUMENT
             lastSection = root.find("w:body/w:sectPr", namespaces=NAMESPACES)
 
-            #SAVING LAST SECTION
+            # SAVING LAST SECTION
             mainSection = deepcopy(lastSection)
             lsecRoot = lastSection.getparent()
             lsecRoot.remove(lastSection)
 
-            #COPY CHILDREN ELEMENTS OF BODY IN A LIST
+            # COPY CHILDREN ELEMENTS OF BODY IN A LIST
             childrenList = root.findall('w:body/*', namespaces=NAMESPACES)
 
-            #DELETE ALL CHILDREN OF BODY
+            # DELETE ALL CHILDREN OF BODY
             for child in root:
                 if child.tag == '{%(w)s}body' % NAMESPACES:
                     child.clear()
 
-            #REFILL BODY AND MERGE DOCS - ADD LAST SECTION ENCAPSULATED OR NOT
+            # REFILL BODY AND MERGE DOCS - ADD LAST SECTION ENCAPSULATED OR NOT
             lr = len(replacements)
             lc = len(childrenList)
             parts = []
@@ -220,27 +246,27 @@ def merge_templates(self, replacements, separator):
                                 else:
                                     if sepClass == 'section':
                                         intSection = deepcopy(mainSection)
-                                        p   = etree.SubElement(child, '{%(w)s}p'  % NAMESPACES)
-                                        pPr = etree.SubElement(p, '{%(w)s}pPr'  % NAMESPACES)
+                                        p = etree.SubElement(child, '{%(w)s}p' % NAMESPACES)
+                                        pPr = etree.SubElement(p, '{%(w)s}pPr' % NAMESPACES)
                                         pPr.append(intSection)
                                         parts.append(p)
                                     elif sepClass == 'break':
-                                        pb   = etree.SubElement(child, '{%(w)s}p'  % NAMESPACES)
-                                        r = etree.SubElement(pb, '{%(w)s}r'  % NAMESPACES)
+                                        pb = etree.SubElement(child, '{%(w)s}p' % NAMESPACES)
+                                        r = etree.SubElement(pb, '{%(w)s}r' % NAMESPACES)
                                         nbreak = Element('{%(w)s}br' % NAMESPACES)
                                         nbreak.attrib['{%(w)s}type' % NAMESPACES] = type
                                         r.append(nbreak)
 
                     self.merge(parts, **repl)
 
     def merge_pages(self, replacements):
-         """
-         Deprecated method.
-         """
-         warnings.warn("merge_pages has been deprecated in favour of merge_templates",
+        """
+        Deprecated method.
+        """
+        warnings.warn("merge_pages has been deprecated in favour of merge_templates",
                       category=DeprecationWarning,
-                      stacklevel=2)         
-         self.merge_templates(replacements, "page_break")
+                      stacklevel=2)
+        self.merge_templates(replacements, "page_break")
 
     def merge(self, parts=None, **replacements):
         if not parts:
@@ -254,6 +280,33 @@ def merge(self, parts=None, **replacements):
                     self.__merge_field(part, field, replacement)
 
     def __merge_field(self, part, field, text):
+        if field.startswith('IMAGE:'):
+            _, img_name = field.split(':')
+            inline_img_el = part.find('.//wp:docPr[@title="{}"]/..'.format(img_name), namespaces=NAMESPACES)
+            if inline_img_el:
+                embed_node = inline_img_el.find('.//a:blip', namespaces=NAMESPACES)
+                if embed_node:
+                    # generate a random id and add tp media list for later export to media folder in zip file
+                    img_id = 'MMR{}'.format(randint(10000000, 999999999))
+                    self.media[img_id] = text
+
+                    # add a relationship
+                    last_img_relationship = \
+                    self.rels.findall('{%(ns)s}Relationship[@Type="%(od)s/image"]' % self.RELS_NAMESPACES)[-1]
+                    new_img_relationship = deepcopy(last_img_relationship)
+                    new_img_relationship.set('Id', img_id)
+                    new_img_relationship.set('Target', '/media/{}.png'.format(img_id))
+                    self.rels.getroot().append(new_img_relationship)
+
+                    # replace the embed attrib with the new image_id
+                    embed_node = inline_img_el.find('.//a:blip', namespaces=NAMESPACES)
+                    embed_attr = embed_node.attrib.keys()[0]
+                    embed_node.attrib[embed_attr] = img_id
+                # mark as done
+                inline_img_el.find('wp:docPr', namespaces=NAMESPACES).attrib['title'] = 'replaced_image_{}'.format(
+                    img_id)
+            return
+
         for mf in part.findall('.//MergeField[@name="%s"]' % field):
             children = list(mf)
             mf.clear()  # clear away the attributes