# gitarch.py
#
# Copyright (C) 2011 Jan Stancek <jan@stancek.eu>
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
"""
git-arch pure python implementation
"""
__author__ = """Copyright Jan Stancek 2011"""


import socket
import errno
import os
import sys
import getopt
import traceback
import logging

BUF_SIZE = 1024*16
log = logging


def recv_safe(sock, bufSize, flags=0):
    n = 0
    while True:
        try:
            n = sock.recv(bufSize, flags)
            break
        except socket.error, (value, message):
            if value != errno.EINTR and value != errno.EAGAIN:
                raise
            else:
                log.info('recv_safe: got an EINTR/AGAIN, trying again')
    return n

def send_safe(sock, str):
    n = 0
    while True:
        try:
            n = sock.send(str)
            break
        except socket.error, (value, message):
            if value != errno.EINTR and value != errno.EAGAIN:
                raise
            else:
                log.info('send_safe: got an EINTR/AGAIN, trying again')
    return n

def send_fully(s, str):
    buflen = len(str)
    n = 0
    
    while buflen > 0:
        n = send_safe(s, str)
        if (n < 0):
            log.info('send_fully: exception should be thrown in case of error')
            return False
        str = str[n:]
        buflen = buflen - n
    return True


class GitRemoteClient:
    HDR_LEN_SIZE = 4
    
    def __init__(self):
        self.callBack = None
        self.callBackData = None
        self.msgLen = 0
        self.msgLenRecieved = 0
        self.previousChunk = ''
        self.lastMsg = ''
        self.sideBand = False
        self.sideBandType = 0

    def getArchive(self, outname, host, port, basedir, treeish, format):
        log.debug('outname: %s, host: %s, port: %s, basedir: %s, treeish: %s, format: %s', 
                  outname, host, port, basedir, treeish, format)
        request = self._prepareRequest(host, basedir, treeish, format)
        log.debug('Request for remote side %s', request)
        
        outfile = open(outname, 'wb')
        
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        sock.connect((host, port))
        send_fully(sock, request)
        ret = self._readSocket(sock, self._archiveWriteCallBack, outfile)
        sock.close()
        
        outfile.flush()
        outfile.close()
        return ret
        
    def _archiveWriteCallBack(self, outfile, msg, sideBandType):
        log.debug('archiveWriteCallBack %s', len(msg))
        if sideBandType == '\1': 
            outfile.write(msg)
        pass

    def _prepareRequest(self, host, basepath, treeish, format):
        op = "git-upload-archive %s\0host=%s\0" % (basepath, host)
        arg1 = "argument %s\0" % treeish
        arg2 = "argument --format=%s\0" % format
        arg3 = ""
        
        op = self._wrapToGitMsg(op)
        arg1 = self._wrapToGitMsg(arg1)
        arg2 = self._wrapToGitMsg(arg2)
        arg3 = '0000'
        return op+arg1+arg2+arg3

    def _wrapToGitMsg(self, msg, sideBand = 0):
        msglen= len(msg) + GitRemoteClient.HDR_LEN_SIZE
        if sideBand != 0:
            msg = sideBand + msg
            msglen = msglen + 1
        
        msglenhex = hex(msglen)[2:]
        msglenhex = msglenhex.zfill(4)
        return '%s%s' % (msglenhex, msg)

    def _readSocket(self, sock, msgCallBack, msgCallBackData):
        data = ' '
        self.callBack = msgCallBack
        self.callBackData = msgCallBackData
        while True:
            try:
                data = ''
                data = recv_safe(sock, BUF_SIZE)
                log.debug('sock read: %s', len(data))
                if len(data) <= 0:
                    break                
                
                self._processChunk(data)                
                
            except socket.error, (value,message):
                log.warn('info: socket.error %s %s', value, message)
                raise
                
        log.debug('Socket finished reading')
        if self.msgLen == self.msgLenRecieved:
            log.debug('Checking if last message is 0000')
            if self.previousChunk=='0000':
                log.debug('It is, all is OK')
                return 0
        return 1
        

    def _processChunk(self, data):
        if len(self.previousChunk) > 0:
            log.debug('previous chunk len: %s', len(self.previousChunk))
            data = self.previousChunk + data
            self.previousChunk = ''

        chunkLen = len(data)
        chunkRemaining = chunkLen
        chunkPos = 0
        headerSize = 4

        log.debug('processChunk, chunk len: %s', len(data))

        while chunkRemaining > 0:

            if self.sideBand:
                headerSize = 5

            if self.msgLen == self.msgLenRecieved:
                log.debug('new message')
                if chunkRemaining >= headerSize:
                    msgLenStr = data[chunkPos:chunkPos + GitRemoteClient.HDR_LEN_SIZE]
                    chunkPos = chunkPos + GitRemoteClient.HDR_LEN_SIZE
                    chunkRemaining = chunkRemaining - GitRemoteClient.HDR_LEN_SIZE
                    self.lastMsg = ''
                    self.msgLen = int(msgLenStr, 16)
                    self.msgLenRecieved = GitRemoteClient.HDR_LEN_SIZE

                    if self.sideBand:
                        self.sideBandType = data[chunkPos:chunkPos + 1]
                        chunkPos = chunkPos + 1
                        chunkRemaining = chunkRemaining - 1
                        self.msgLenRecieved = self.msgLenRecieved + 1

                    # empty message denotes start of data in sideband format
                    if self.msgLen == 0:
                        self.msgLen = 4
                        self.sideBand = True
                        log.debug('start of sideband data')

                    log.debug('read header')
                    log.debug('msgLen: %s', self.msgLen)
                    log.debug('self.msgLenRecieved: %s, chunkPos: %s, chunkRemaining: %s', 
                               self.msgLenRecieved, chunkPos, chunkRemaining)
                else:
                    log.debug('msg too short, skipping this round')
                    self.previousChunk = data[chunkPos:]
                    chunkPos = chunkLen
                    chunkRemaining = 0

            else:
                msgRemaining = self.msgLen - self.msgLenRecieved
                if chunkRemaining >= msgRemaining:
                    log.debug('the whole message was in chunk')
                    msg = data[chunkPos:chunkPos+msgRemaining]
                    self.lastMsg = self.lastMsg + msg
                    
                    if self.callBack:
                        self.callBack(self.callBackData, self.lastMsg, self.sideBandType)
                    
                    self.msgLenRecieved = self.msgLenRecieved + msgRemaining
                    chunkPos = chunkPos + msgRemaining
                    chunkRemaining = chunkRemaining - msgRemaining
                    log.debug('self.msgLenRecieved: %s, chunkPos: %s, chunkRemaining: %s', 
                               self.msgLenRecieved, chunkPos, chunkRemaining)
                else:
                    log.debug('some of the message was in chunk')
                    msg = data[chunkPos:]
                    self.lastMsg = self.lastMsg + msg
                    self.msgLenRecieved = self.msgLenRecieved + chunkRemaining
                    chunkPos = chunkLen
                    chunkRemaining = 0
                    log.debug('self.msgLenRecieved: %s, chunkPos: %s, chunkRemaining: %s', 
                               self.msgLenRecieved, chunkPos, chunkRemaining)



def getArchive(outname, host, port, basedir, treeish):
    ret = 1
    format = outname[-3:]
    
    if format != 'zip' and format != 'tar':
        return ret
    
    try:
        git = GitRemoteClient()
        log.info('Getting archive %s %s %s %s %s %s', outname, host, port, basedir, treeish, format)
        ret = git.getArchive(outname, host, port, basedir, treeish, format)
        
        if ret == 0:
            if format == 'zip':
                ret = os.system('zip -T %s' % outname)
            if format == 'tar':
                ret = os.system('tar -tf %s' % outname)
    except:
        traceback.print_exc()
    
    if ret > 255:
        ret = 1
        
    log.info('Getting archive, ret code: %s', ret)    
    return ret

def installFromGit( host, port, basedir, treeish, outname, targetdir):
    log.info('installFromGit %s %s %s %s %s %s', 
             host, port, basedir, treeish, outname, targetdir)
    ret = getArchive(outname, host, port, basedir, treeish)
    if ret == 0:
        ret = installArchive(outname, targetdir)
    return ret          

def installArchive(outname, targetdir):
    ret = 1
    log.info('Installing archive %s to %s', outname, targetdir)
    if not os.path.exists(targetdir):
        log.info('Creating dir %s', targetdir)
        os.makedirs(targetdir)
        if not os.path.exists(targetdir):
            return ret
    else:
        log.warn('Directory already exist')
    
    archtype = outname[-3:]
    if archtype == 'zip':
        ret = installZip(outname, targetdir)
    if archtype == 'tar':
        ret = installTar(outname, targetdir)
    log.info('installArchive ret code: %s', ret)
    return ret

def installZip(outname, targetdir):
    cmd = 'unzip -o %s -d %s' % (outname, targetdir)
    log.info(cmd)
    return os.system(cmd)

def installTar(outname, targetdir):
    cmd = 'tar -C %s --overwrite xf %s' % (targetdir, outname)
    return os.system(cmd)   

user_options = [('help', 'prints help'),
                ('outname', 'output file name, must end with tar or zip'),
                ('hostname', 'target host where git daemon is running'),
                ('port', 'port where git daemon is running'),
                ('basedir', 'location of git repo on host'),
                ('treeish', 'git tree-ish, which identifies some tree'),
                ('debug', 'show debug messages'),
                ('quiet', 'less messages'),
                ]

def usage():
    print 'Common switches:'
    for line in user_options:
        print '    --%s %s' % (line[0], line[1])
    print
    print 'Example: '
    print '  You have your git repo on host (192.168.0.118) at /usr/src/test'
    print '  You start git daemon:'
    print '      git daemon --verbose --base-path=/usr/src --enable=upload-archive --export-all /usr/src/test'

    print '  Using git you would do:'
    print '      git archive --remote=git://192.168.0.118/test master:etc/yum > /tmp/dir.tar '
    print '  Using gitarch.py:'
    print '      python gitarch.py --outname=/tmp/dir.zip --hostname=192.168.0.118 --basedir=/test --treeish=HEAD:etc/yum'

def main():

    outname = None
    hostname = None
    port = 9418
    basedir = None
    treeish = None
    debug = False
    quiet = False

    try:
        opts, args = getopt.getopt(sys.argv[1:],
                                   "hdqv",
                                   ["help",
                                    "outname=",
                                    "hostname=",
                                    "port=",
                                    "basedir=",
                                    "treeish=",
                                    "debug",
                                    "quiet"])
    except getopt.GetoptError, err:
        print str(err)
        usage()
        sys.exit(2)

    for o, a in opts:
        if o in ("-h", "--help"):
            usage()
            sys.exit()
        elif o in ("-d", "--debug", "-v"):
            debug = True
        elif o in ("-q", "--quiet"):
            quiet = True
        elif o in ("--hostname="):
            hostname = a
        elif o in ("--outname="):
            outname = a
        elif o in ("--port="):
            port = a
        elif o in ("--basedir="):
            basedir = a
        elif o in ("--treeish="):
            treeish = a
            
        else:
            print o, a
            assert False, "unhandled option"

    if not outname or not hostname or not port or not basedir or not treeish:
        usage()
        sys.exit()

    LOG_FORMAT = '%(asctime)s - %(levelname)s - %(filename)s - ' \
        '%(funcName)s:%(lineno)s - %(message)s'    
    if (debug):
        LOG_LEVEL = log.DEBUG
    elif (quiet):
        LOG_LEVEL = log.CRITICAL
    else:
        LOG_LEVEL = log.INFO
        LOG_FORMAT = '%(message)s'
                
    formatter = logging.Formatter(LOG_FORMAT)
    stdout_handler = logging.StreamHandler(sys.stdout)
    stdout_handler.setFormatter(formatter)
    logger = logging.getLogger('')
    logger.addHandler(stdout_handler)
    logger.setLevel(LOG_LEVEL)
   
    ret = getArchive(outname, hostname, port, basedir, treeish)
    sys.exit(ret)

if __name__ == '__main__':        
    main()

Back to top
old_gitarch.py.txt · Last modified: 2015/10/05 16:06 by Jan Stancek
Sitemap Search: