Update Multiple HTML Files with Python

You make a change to the index.html file, and need to make all other pages uniform with it. The pages' JavaScript and CSS coding can each be on their own file, from which all other files reference, but what about changes in HTML and text? This program, with the propper tags embedded into each file, updates any number of files based on a template. The content that is to be updated, for example a navigation section, is wrapped in these IN and OUT tags, exactly:

<!--pyTagI [a-z]*-->section to be updated<!--pyTagO [a-z]*-->

Where [a-z]* is any set of lowercase letters (no spaces or special characters), eg "menu" or "navbar".

Download the site's pages into the proper folders, per the structure outlined in the Python program.

# MULTIPLE HTML FILES UPDATER
# All files must adhere to tagI and tagO format below,
# wrapped around templated content.
# Template dictionary and files-to-be-updated are referenced
# to build a new HTML string, written to file.

# (c) John Oberlin 2017
# oberl.info

# Directory architecture:

#   + html-files-update [folder]
#       - updater.py
#       + html-files [folder]
#       + template-html-file [folder]
#       + updated-html-files [folder]


import re
from os import listdir
# May want to soup final string if HTML arrangement gets wonky
#from bs4 import BeautifulSoup as soup


# PyTag string format that wraps template content in HTML files
tagI = "<!--pyTagI [a-z]*-->"  # Make name more regex dynamic
tagO = "<!--pyTagO [a-z]*-->"
### CREATE DICTIONARY FROM TEMPLATE HTML
# Get template HTML file (will take first file in directory)
tempFiles = listdir("template-html-file")
tempFile = "template-html-file\\" + tempFiles[0]
with open(tempFile,"r") as f:
    template = f.read()

# The dictionary
tmplt = {}

# Create ghost of template to slice up
gh = template

while True:

    # Create object of first instance of a pytagI in ghost
    tagIGh = re.search(tagI,gh)

    if tagIGh:

        # Grab tag name and template content; append to template dict
        tagOGh = re.search(tagO,gh)
        cont = gh[tagIGh.end():tagOGh.start()]
        tmplt[tagIGh.group()[11:-3]] = cont

        # Slice off parsed data
        gh = gh[tagOGh.end():]

    else:
        break
### GET FILES TO BE UPDATED
origFiles = listdir("html-files")

for i in origFiles:

    # Add directory string and read file
    with open("html-files\\" + i,"r") as f:
        oFile = f.read()

    # Make a list: tag name, content start index, content end index
    oFileLst = []

    # Ghost to slice up, but grab start and end indices from read file
    gh = oFile

    while True:

        # Create object of first instance of a pytagI in ghost
        tagIGh = re.search(tagI,gh)

        if tagIGh:
            # Minilist to append to main list
            miniLst = []

            # Append tag name from ghost
            tagIoF = re.search(tagI,gh).group()
            miniLst.append(tagIoF[11:-3])

            # Build unique regex with tag name;
            # Apend its end index
            uniqTag = "<!--pyTagI " + tagIoF[11:-3] + "-->"
            miniLst.append(re.search(uniqTag,oFile).end())

            # Same for start index of next pytagO
            uniqTagO = "<!--pyTagO " + tagIoF[11:-3] + "-->"
            miniLst.append(re.search(uniqTagO,oFile).start())

            # Append minilist to main list
            oFileLst.append(miniLst)

            # Slice off parsed data
            gh = gh[re.search(tagO,gh).end():]

        else:
            break

Notice that the following is indented under the previous code block.

    ### CONSTRUCT NEW HTML STRING TO WRITE TO NEW FILE
    # Construct string using
    # original file's index values and template dict's content
    newHtml = ""
    for j in range(len(oFileLst)):
        if oFileLst[j][0] in tmplt:
            if j == 0:

                # Add file's content up to end of pytagI
                newHtml += oFile[:oFileLst[j][1]]

                # Add template's associated content
                newHtml += tmplt[oFileLst[j][0]]

                # Set index for next iteration
                stFrom = oFileLst[j][2]

            # If last iteration (last tag), add remaining HTML to string
            elif j == len(oFileLst) - 1:

                # Add oFile's content up to end of pytagI
                newHtml += oFile[stFrom:oFileLst[j][1]]

                # Add template's associated content
                newHtml += tmplt[oFileLst[j][0]]

                # Add final content from file
                newHtml += oFile[oFileLst[j][2]:]

            else:
                # Add oFile's content up to end of pytagI
                newHtml += oFile[stFrom:oFileLst[j][1]]

                # Add template's associated content
                newHtml += tmplt[oFileLst[j][0]]

                # Set index for next iteration
                stFrom = oFileLst[j][2]

        else:
            print("Tag " + j[0] + " not in template dictionary.")

    # Write new string into new file in updated files folder
    newFile = "updated-html-files\\" + i
    with open(newFile,"w") as f:
        f.write(newHtml)


done = raw_input("Finished")