# gitarch.py # # Copyright (C) 2011 Jan Stancek <jan@stancek.eu> # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA """ git-arch pure python implementation """ __author__ = """Copyright Jan Stancek 2011""" import socket import errno import os import sys import getopt import traceback import logging BUF_SIZE = 1024*16 log = logging def recv_safe(sock, bufSize, flags=0): n = 0 while True: try: n = sock.recv(bufSize, flags) break except socket.error, (value, message): if value != errno.EINTR and value != errno.EAGAIN: raise else: log.info('recv_safe: got an EINTR/AGAIN, trying again') return n def send_safe(sock, str): n = 0 while True: try: n = sock.send(str) break except socket.error, (value, message): if value != errno.EINTR and value != errno.EAGAIN: raise else: log.info('send_safe: got an EINTR/AGAIN, trying again') return n def send_fully(s, str): buflen = len(str) n = 0 while buflen > 0: n = send_safe(s, str) if (n < 0): log.info('send_fully: exception should be thrown in case of error') return False str = str[n:] buflen = buflen - n return True class GitRemoteClient: HDR_LEN_SIZE = 4 def __init__(self): self.callBack = None self.callBackData = None self.msgLen = 0 self.msgLenRecieved = 0 self.previousChunk = '' self.lastMsg = '' self.sideBand = False self.sideBandType = 0 def getArchive(self, outname, host, port, basedir, treeish, format): log.debug('outname: %s, host: %s, port: %s, basedir: %s, treeish: %s, format: %s', outname, host, port, basedir, treeish, format) request = self._prepareRequest(host, basedir, treeish, format) log.debug('Request for remote side %s', request) outfile = open(outname, 'wb') sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) sock.connect((host, port)) send_fully(sock, request) ret = self._readSocket(sock, self._archiveWriteCallBack, outfile) sock.close() outfile.flush() outfile.close() return ret def _archiveWriteCallBack(self, outfile, msg, sideBandType): log.debug('archiveWriteCallBack %s', len(msg)) if sideBandType == '\1': outfile.write(msg) pass def _prepareRequest(self, host, basepath, treeish, format): op = "git-upload-archive %s\0host=%s\0" % (basepath, host) arg1 = "argument %s\0" % treeish arg2 = "argument --format=%s\0" % format arg3 = "" op = self._wrapToGitMsg(op) arg1 = self._wrapToGitMsg(arg1) arg2 = self._wrapToGitMsg(arg2) arg3 = '0000' return op+arg1+arg2+arg3 def _wrapToGitMsg(self, msg, sideBand = 0): msglen= len(msg) + GitRemoteClient.HDR_LEN_SIZE if sideBand != 0: msg = sideBand + msg msglen = msglen + 1 msglenhex = hex(msglen)[2:] msglenhex = msglenhex.zfill(4) return '%s%s' % (msglenhex, msg) def _readSocket(self, sock, msgCallBack, msgCallBackData): data = ' ' self.callBack = msgCallBack self.callBackData = msgCallBackData while True: try: data = '' data = recv_safe(sock, BUF_SIZE) log.debug('sock read: %s', len(data)) if len(data) <= 0: break self._processChunk(data) except socket.error, (value,message): log.warn('info: socket.error %s %s', value, message) raise log.debug('Socket finished reading') if self.msgLen == self.msgLenRecieved: log.debug('Checking if last message is 0000') if self.previousChunk=='0000': log.debug('It is, all is OK') return 0 return 1 def _processChunk(self, data): if len(self.previousChunk) > 0: log.debug('previous chunk len: %s', len(self.previousChunk)) data = self.previousChunk + data self.previousChunk = '' chunkLen = len(data) chunkRemaining = chunkLen chunkPos = 0 headerSize = 4 log.debug('processChunk, chunk len: %s', len(data)) while chunkRemaining > 0: if self.sideBand: headerSize = 5 if self.msgLen == self.msgLenRecieved: log.debug('new message') if chunkRemaining >= headerSize: msgLenStr = data[chunkPos:chunkPos + GitRemoteClient.HDR_LEN_SIZE] chunkPos = chunkPos + GitRemoteClient.HDR_LEN_SIZE chunkRemaining = chunkRemaining - GitRemoteClient.HDR_LEN_SIZE self.lastMsg = '' self.msgLen = int(msgLenStr, 16) self.msgLenRecieved = GitRemoteClient.HDR_LEN_SIZE if self.sideBand: self.sideBandType = data[chunkPos:chunkPos + 1] chunkPos = chunkPos + 1 chunkRemaining = chunkRemaining - 1 self.msgLenRecieved = self.msgLenRecieved + 1 # empty message denotes start of data in sideband format if self.msgLen == 0: self.msgLen = 4 self.sideBand = True log.debug('start of sideband data') log.debug('read header') log.debug('msgLen: %s', self.msgLen) log.debug('self.msgLenRecieved: %s, chunkPos: %s, chunkRemaining: %s', self.msgLenRecieved, chunkPos, chunkRemaining) else: log.debug('msg too short, skipping this round') self.previousChunk = data[chunkPos:] chunkPos = chunkLen chunkRemaining = 0 else: msgRemaining = self.msgLen - self.msgLenRecieved if chunkRemaining >= msgRemaining: log.debug('the whole message was in chunk') msg = data[chunkPos:chunkPos+msgRemaining] self.lastMsg = self.lastMsg + msg if self.callBack: self.callBack(self.callBackData, self.lastMsg, self.sideBandType) self.msgLenRecieved = self.msgLenRecieved + msgRemaining chunkPos = chunkPos + msgRemaining chunkRemaining = chunkRemaining - msgRemaining log.debug('self.msgLenRecieved: %s, chunkPos: %s, chunkRemaining: %s', self.msgLenRecieved, chunkPos, chunkRemaining) else: log.debug('some of the message was in chunk') msg = data[chunkPos:] self.lastMsg = self.lastMsg + msg self.msgLenRecieved = self.msgLenRecieved + chunkRemaining chunkPos = chunkLen chunkRemaining = 0 log.debug('self.msgLenRecieved: %s, chunkPos: %s, chunkRemaining: %s', self.msgLenRecieved, chunkPos, chunkRemaining) def getArchive(outname, host, port, basedir, treeish): ret = 1 format = outname[-3:] if format != 'zip' and format != 'tar': return ret try: git = GitRemoteClient() log.info('Getting archive %s %s %s %s %s %s', outname, host, port, basedir, treeish, format) ret = git.getArchive(outname, host, port, basedir, treeish, format) if ret == 0: if format == 'zip': ret = os.system('zip -T %s' % outname) if format == 'tar': ret = os.system('tar -tf %s' % outname) except: traceback.print_exc() if ret > 255: ret = 1 log.info('Getting archive, ret code: %s', ret) return ret def installFromGit( host, port, basedir, treeish, outname, targetdir): log.info('installFromGit %s %s %s %s %s %s', host, port, basedir, treeish, outname, targetdir) ret = getArchive(outname, host, port, basedir, treeish) if ret == 0: ret = installArchive(outname, targetdir) return ret def installArchive(outname, targetdir): ret = 1 log.info('Installing archive %s to %s', outname, targetdir) if not os.path.exists(targetdir): log.info('Creating dir %s', targetdir) os.makedirs(targetdir) if not os.path.exists(targetdir): return ret else: log.warn('Directory already exist') archtype = outname[-3:] if archtype == 'zip': ret = installZip(outname, targetdir) if archtype == 'tar': ret = installTar(outname, targetdir) log.info('installArchive ret code: %s', ret) return ret def installZip(outname, targetdir): cmd = 'unzip -o %s -d %s' % (outname, targetdir) log.info(cmd) return os.system(cmd) def installTar(outname, targetdir): cmd = 'tar -C %s --overwrite xf %s' % (targetdir, outname) return os.system(cmd) user_options = [('help', 'prints help'), ('outname', 'output file name, must end with tar or zip'), ('hostname', 'target host where git daemon is running'), ('port', 'port where git daemon is running'), ('basedir', 'location of git repo on host'), ('treeish', 'git tree-ish, which identifies some tree'), ('debug', 'show debug messages'), ('quiet', 'less messages'), ] def usage(): print 'Common switches:' for line in user_options: print ' --%s %s' % (line[0], line[1]) print print 'Example: ' print ' You have your git repo on host (192.168.0.118) at /usr/src/test' print ' You start git daemon:' print ' git daemon --verbose --base-path=/usr/src --enable=upload-archive --export-all /usr/src/test' print ' Using git you would do:' print ' git archive --remote=git://192.168.0.118/test master:etc/yum > /tmp/dir.tar ' print ' Using gitarch.py:' print ' python gitarch.py --outname=/tmp/dir.zip --hostname=192.168.0.118 --basedir=/test --treeish=HEAD:etc/yum' def main(): outname = None hostname = None port = 9418 basedir = None treeish = None debug = False quiet = False try: opts, args = getopt.getopt(sys.argv[1:], "hdqv", ["help", "outname=", "hostname=", "port=", "basedir=", "treeish=", "debug", "quiet"]) except getopt.GetoptError, err: print str(err) usage() sys.exit(2) for o, a in opts: if o in ("-h", "--help"): usage() sys.exit() elif o in ("-d", "--debug", "-v"): debug = True elif o in ("-q", "--quiet"): quiet = True elif o in ("--hostname="): hostname = a elif o in ("--outname="): outname = a elif o in ("--port="): port = a elif o in ("--basedir="): basedir = a elif o in ("--treeish="): treeish = a else: print o, a assert False, "unhandled option" if not outname or not hostname or not port or not basedir or not treeish: usage() sys.exit() LOG_FORMAT = '%(asctime)s - %(levelname)s - %(filename)s - ' \ '%(funcName)s:%(lineno)s - %(message)s' if (debug): LOG_LEVEL = log.DEBUG elif (quiet): LOG_LEVEL = log.CRITICAL else: LOG_LEVEL = log.INFO LOG_FORMAT = '%(message)s' formatter = logging.Formatter(LOG_FORMAT) stdout_handler = logging.StreamHandler(sys.stdout) stdout_handler.setFormatter(formatter) logger = logging.getLogger('') logger.addHandler(stdout_handler) logger.setLevel(LOG_LEVEL) ret = getArchive(outname, hostname, port, basedir, treeish) sys.exit(ret) if __name__ == '__main__': main()