You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
91 lines
2.3 KiB
91 lines
2.3 KiB
#!/usr/bin/env python
|
|
#
|
|
# This parser parses the output from Phil Harvey's exiftool (version 9.02)
|
|
# and convert it to xml format. It reads exiftool's output from stdin and
|
|
# write the xml format to stdout.
|
|
#
|
|
# In order to get the raw infomation from exiftool, we need to enable the verbose
|
|
# flag (-v2) of exiftool.
|
|
#
|
|
# Usage:
|
|
# exiftool -v2 img.jpg | ./parser.py >> output.xml
|
|
#
|
|
#
|
|
|
|
import os
|
|
import sys
|
|
import re
|
|
|
|
text = sys.stdin.read()
|
|
|
|
print """<?xml version="1.0" encoding="utf-8"?>"""
|
|
print "<exif>"
|
|
|
|
# find the following two groups of string:
|
|
#
|
|
# 1. tag:
|
|
#
|
|
# | | | x) name = value
|
|
# | | | - Tag 0x1234
|
|
#
|
|
# 2. IFD indicator:
|
|
#
|
|
# | | | + [xxx directory with xx entries]
|
|
#
|
|
p = re.compile(
|
|
"(((?:\| )+)[0-9]*\)(?:(?:.*? = .*?)|(?:.*? \(SubDirectory\) -->))\n.*?- Tag 0x[0-9a-f]{4})" + "|"
|
|
+ "(((?:\| )*)\+ \[.*? directory with [0-9]+ entries]$)"
|
|
, re.M)
|
|
tags = p.findall(text)
|
|
|
|
layer = 0
|
|
ifds = []
|
|
|
|
for s in tags:
|
|
# IFD indicator
|
|
if s[2]:
|
|
l = len(s[3])
|
|
ifd = s[2][l + 3:].split()[0]
|
|
new_layer = l / 2 + 1
|
|
if new_layer > layer:
|
|
ifds.append(ifd)
|
|
else:
|
|
for i in range(layer - new_layer):
|
|
ifds.pop()
|
|
ifds[-1] = ifd
|
|
layer = new_layer
|
|
else:
|
|
l = len(s[1])
|
|
s = s[0]
|
|
new_layer = l / 2
|
|
if new_layer < layer:
|
|
for i in range(layer - new_layer):
|
|
ifds.pop()
|
|
layer = new_layer
|
|
|
|
# find the ID
|
|
_id = re.search("0x[0-9a-f]{4}", s)
|
|
_id = _id.group(0)
|
|
|
|
# find the name
|
|
name = re.search("[0-9]*?\).*?(?:(?: = )|(?: \(SubDirectory\) -->))", s)
|
|
name = name.group(0).split()[1]
|
|
|
|
# find the raw value in the parenthesis
|
|
value = re.search("\(SubDirectory\) -->", s)
|
|
if value:
|
|
value = "NO_VALUE"
|
|
else:
|
|
value = re.search("\(.*\)\n", s)
|
|
if (name != 'Model' and value):
|
|
value = value.group(0)[1:-2]
|
|
else:
|
|
value = re.search("=.*\n", s)
|
|
value = value.group(0)[2:-1]
|
|
if "[snip]" in value:
|
|
value = "NO_VALUE"
|
|
|
|
print (' <tag ifd="' + ifds[-1] + '" id="'
|
|
+ _id + '" name="' + name +'">' + value + "</tag>")
|
|
print "</exif>"
|