props2csv.py 4.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129
  1. # usage:
  2. # props2csv.py -d <directory>
  3. #
  4. # <directory> is a folder containing .fzp files.
  5. # save a csv file of props, tags, etc. to <csv file path>
  6. import getopt, sys, os, re, csv, xml.dom.minidom, xml.dom
  7. def usage():
  8. print """
  9. usage:
  10. props2csv.py -d <directory> -c <csv file path>
  11. <directory> is a folder containing .fzp files.
  12. save a csv file of props, tags, etc. to <csv file path>
  13. """
  14. def main():
  15. try:
  16. opts, args = getopt.getopt(sys.argv[1:], "hd:c:", ["help", "directory", "csv"])
  17. except getopt.GetoptError, err:
  18. # print help information and exit:
  19. print str(err) # will print something like "option -a not recognized"
  20. usage()
  21. sys.exit(2)
  22. outputDir = None
  23. csvPath = None
  24. for o, a in opts:
  25. #print o
  26. #print a
  27. if o in ("-d", "--directory"):
  28. outputDir = a
  29. elif o in ("-c", "--csv"):
  30. csvPath = a
  31. elif o in ("-h", "--help"):
  32. usage()
  33. sys.exit(2)
  34. else:
  35. assert False, "unhandled option"
  36. if not outputDir:
  37. usage()
  38. sys.exit(2)
  39. if not csvPath:
  40. usage()
  41. sys.exit(2)
  42. writer = None
  43. file = None
  44. try:
  45. file = open(csvPath, 'wb')
  46. writer = csv.writer(file, delimiter=',')
  47. writer.writerow(["fzp","location","title","description","family","props","tags","taxonomy"] )
  48. except:
  49. print "unable to save to", csvPath
  50. sys.exit(2)
  51. names = []
  52. for filename in os.listdir(outputDir):
  53. if (filename.endswith(".fzp")):
  54. fzpFilename = os.path.join(outputDir, filename)
  55. try:
  56. dom = xml.dom.minidom.parse(fzpFilename)
  57. except xml.parsers.expat.ExpatError, err:
  58. print str(err), fzpFilename
  59. continue
  60. theLine = filename + ","
  61. fzp = dom.documentElement
  62. titleText = ""
  63. titles = fzp.getElementsByTagName("title")
  64. for title in titles:
  65. titleText = getText(title.childNodes)
  66. break # assume only one title
  67. taxonomyText = ""
  68. taxonomies = fzp.getElementsByTagName("taxonomy")
  69. for taxonomy in taxonomies:
  70. taxonomyText = getText(taxonomy.childNodes)
  71. break # assume only one title
  72. location = "core"
  73. if "contrib" in fzpFilename:
  74. location = "contrib"
  75. elif "resource" in fzpFilename:
  76. location = "resources"
  77. descriptionText = ""
  78. descriptions = fzp.getElementsByTagName("description")
  79. for description in descriptions:
  80. descriptionText = getText(description.childNodes)
  81. break # assume only one description
  82. tagsText = ""
  83. tags = fzp.getElementsByTagName("tag")
  84. for tag in tags:
  85. tagsText += getText(tag.childNodes) + "\n"
  86. familyText = ""
  87. propertiesText = ""
  88. properties = fzp.getElementsByTagName("property")
  89. for property in properties:
  90. name = property.getAttribute('name')
  91. value = getText(property.childNodes)
  92. propertiesText += name + ":" + value + "\n"
  93. if name == "family":
  94. familyText = value
  95. writer.writerow([filename, location, titleText.encode("utf-8"), descriptionText.encode("utf-8"), familyText.encode("utf-8"), propertiesText.encode("utf-8"), tagsText.encode("utf-8"), taxonomyText.encode("utf-8")])
  96. if file:
  97. file.close()
  98. def getText(nodelist):
  99. rc = []
  100. for node in nodelist:
  101. if node.nodeType == node.TEXT_NODE:
  102. rc.append(node.data)
  103. return ''.join(rc)
  104. if __name__ == "__main__":
  105. main()