#!/usr/bin/env python3

#    Copyright (C) 2012 Jeremy S. Sanders
#    Email: Jeremy Sanders <jeremy@jeremysanders.net>
#
#    This file is part of Veusz.
#
#    Veusz is free software: you can redistribute it and/or modify it
#    under the terms of the GNU General Public License as published by
#    the Free Software Foundation, either version 2 of the License, or
#    (at your option) any later version.
#
#    Veusz is distributed in the hope that it will be useful, but
#    WITHOUT ANY WARRANTY; without even the implied warranty of
#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
#    General Public License for more details.
#
#    You should have received a copy of the GNU General Public License
#    along with Veusz. If not, see <https://www.gnu.org/licenses/>.
#
##############################################################################

"""
pyqt_find_translatable
----------------------

Find translatable strings in PyQt source files and UI files. This
code uses the ast module to interpret python source code.

In your source code define a function for translating the text using a
specific context. Translation functions are identified by this program
by having context and disambiguation optional arguments.

def trans(text, disambiguation=None, context='trans_context'):
   return QCoreApplication.translate(context, text,
                                     disambiguation=disambiguation)

In your source code you can use the trans function, optionally
overriding the optional arguments.

trans("text")
trans("other text", "some disambiguation")
trans("more text", context="other_context")

This script will also identify strings in UI files. The class in the
UI file is the context in the output TS file.

Usage:
pyqt_find_translatable --output=file.ts --verbose *.py *.ui
"""

import argparse
import ast
import sys
import os.path

from lxml import etree

class Message:
    '''A translatable string.'''
    def __init__(self, string, filename=None, lineno=None, comment=None, n=False):
        self.string = string
        self.filename = filename
        self.lineno = lineno
        self.comment = comment
        self.n = n

class PythonMessageVisitor(ast.NodeVisitor):
    '''A visitor which visits function calls and definitions in source.'''

    def __init__(self, filename, outmessages, verbose=True):
        '''filename is file being read
        If set, mapping of context to Messages will be returned in
        outmessages.'''

        self.filename = filename

        # map translation functions to contexts
        self.fn2context = {}
        # arguments for functions
        self.fnargs = {}

        self.messages = outmessages
        self.verbose = verbose

    def visit_Call(self, obj):
        '''Function call made.'''

        # this needs to be called to walk the tree
        self.generic_visit(obj)

        try:
            fn = obj.func.id
        except AttributeError:
            # no name known
            return

        if fn not in self.fn2context:
            return

        if len(obj.args)+len(obj.keywords) not in (1,2,3,4) or len(obj.args) < 1:
            sys.stderr.write(
                'WARNING: Translatable call to %s in %s:%i '
                'requires 1 to 4 parameters\n' %
                (repr(fn), self.filename, obj.lineno))
            return

        # convert arguments to text if possible
        args = [a.value for a in obj.args]
        keywords = {}
        for a in obj.keywords:
            if a.arg == 'context':
                keywords[a.arg] = a.value.s
            elif a.arg == 'disambiguation':
                keywords[a.arg] = a.value.s
            elif a.arg == 'n':
                keywords[a.arg] = True
            else:
                sys.stderr.write(
                    'WARNING: Invalid keyword %s to translatable call\n' % a.arg)
                return

        # defaults
        text = args[0]
        context = self.fn2context[fn]
        comment = None

        # examine any unnamed arguments
        ctxidx = self.fnargs[fn].index('context')
        if len(args) > ctxidx:
            context = args[ctxidx]
        disidx = self.fnargs[fn].index('disambiguation')
        if len(args) > disidx:
            comment = args[disidx]

        # now look at named arguments which override defaults
        context = keywords.get('context', context)
        comment = keywords.get('disambiguation', comment)
        n = 'n' in keywords

        # create new message
        if context not in self.messages:
            self.messages[context] = []
        self.messages[context].append(
            Message(text, filename=self.filename, lineno=obj.lineno,
                    comment=comment, n=n) )

        if self.verbose:
            sys.stdout.write(
                'Found text %s (context=%s, disambiguation=%s) in %s:%i\n' %
                (repr(text), repr(context), repr(comment),
                 self.filename, obj.lineno))

    def visit_FunctionDef(self, obj):
        '''Function definition made.'''

        # this needs to be called to walk the tree
        self.generic_visit(obj)

        try:
            name = obj.name
        except AttributeError:
            return

        args = obj.args
        # want a three-parameter function with two default values
        if len(args.args) not in (3,4) or len(args.defaults) not in (2,3):
            return

        argids = [a.arg.lower() for a in args.args]
        # only functions with disambiguation and context as optional arguments
        if 'disambiguation' not in argids or 'context' not in argids:
            return
        if len(args.defaults)==3 and 'n' not in argids:
            return

        contextidx = argids.index('context')
        try:
            context = args.defaults[contextidx-1].value
        except AttributeError:
            sys.stderr.write(
                "WARNING: Translation function definition %s in "
                "%s:%i does not have default string for 'context'\n" %
                (repr(name), self.filename, obj.lineno))
            return

        if name in self.fn2context:
            sys.stderr.write(
                'WARNING: Duplicate translation function %s '
                'in %s:%i\n' % (repr(name), self.filename, obj.lineno))
            return

        if self.verbose:
            sys.stdout.write(
                'Found translation function %s with default '
                'context %s in %s:%i\n' %
                (repr(name), repr(context), self.filename, obj.lineno))

        # map function name to default context
        self.fn2context[name] = context
        self.fnargs[name] = argids

class TSWriter:
    """Class for writing ts files."""

    def __init__(self, filename, contexts, filenames=[], verbose=True):
        self.filename = filename
        self.contexts = contexts
        self.filenames = filenames
        self.verbose = verbose

    def add_message_to_context(self, ncontext, message):
        '''Given the context xml node, add a new message.'''

        if self.verbose:
            sys.stdout.write(
                'Adding new message %s to context %s\n' % (
                    repr(message.string),
                    repr(ncontext.find('name').text)))

        nmessage = etree.SubElement(ncontext, 'message')

        # where the message is in the source
        nlocation = etree.SubElement(nmessage, 'location')
        nlocation.set('filename', message.filename)
        nlocation.set('line', str(message.lineno))

        # original message
        nsource = etree.SubElement(nmessage, 'source')
        nsource.text = message.string

        if message.comment:
            # the disambiguation
            ncomment = etree.SubElement(nmessage, 'comment')
            ncomment.text = message.comment

        # blank translated text
        ntranslation = etree.SubElement(nmessage, 'translation')

        if message.n:
            # blank numerus form
            numerous = etree.SubElement(ntranslation, 'numerusform')
            nmessage.set('numerus', 'yes')
            ntranslation.set('type', 'unfinished')
        else:
            # make default translation input
            ntranslation.text = message.string

    def add_context_to_root(self, nroot, contextname, messages):
        '''Add a new context tag to the root node given.'''

        if self.verbose:
            sys.stdout.write('Adding new context %s\n' % repr(contextname))

        ncontext = etree.SubElement(nroot, 'context')
        nname = etree.SubElement(ncontext, 'name')
        nname.text = contextname

        # avoid writing message twice
        done = set()

        # iterate over messages in context
        for msg in sorted(messages, key=lambda s: s.lineno):

            # only write unique messages
            k = (msg.string, msg.comment)
            if k in done:
                continue
            done.add(k)

            self.add_message_to_context(ncontext, msg)

    def mark_message_obsolete(self, nmessage):
        '''Mark message as obsolete.'''

        # if we didn't examine this file, then do not mark
        # as obsolete
        nlocation = nmessage.find('location')
        source = nmessage.find('source').text

        if nlocation is not None:
            filename = nlocation.get('filename')
            if filename not in self.filenames:
                if self.verbose:
                    sys.stdout.write(
                        'Not marking %s as obsolete as file not processed\n' %
                        repr(source))
                return

        # message no longer exists, so obsolete
            sys.stdout.write('Marking %s as obsolete\n' % repr(source))

        ntranslation = nmessage.find('translation')
        if ntranslation is None:
            ntranslation = etree.SubElement(nmessage, 'translation')
        ntranslation.set('type', 'obsolete')
        # remove obsolete location
        nlocation = nmessage.find('location')
        if nlocation is not None:
            nmessage.remove(nlocation)

    def update_message(self, nmessage, message):
        '''Update message node.'''
        nlocation = nmessage.find('location')
        if nlocation is None:
            nlocation = etree.SubElement(nmessage, 'location')
        nlocation.set('filename', message.filename)
        nlocation.set('line', str(message.lineno))

        # this is the translation itself
        ntranslation = nmessage.find('translation')
        if ntranslation is None:
            ntranslation = etree.SubElement('translation')
            ntranslation.set('type', 'unfinished')

        # unmark as obsolete, if appropriate
        if ntranslation.get('type') == 'obsolete':
            ntranslation.set('type', 'unfinished')
            if self.verbose:
                sys.stdout.write(
                    'Unmarking %s as obsolete\n' %
                    nmessage.find('source').text)

        if message.n and nmessage.get('numerus', 'no')=='no':
            # make numerus if previously not
            nmessage.set('numerus', 'yes')
            ntranslation.set('type', 'unfinished')
            ntranslation.text = ''
            numerous = etree.SubElement(ntranslation, 'numerusform')
        elif not message.n and nmessage.get('numerus', 'no')=='yes':
            # now not numerus
            del nmessage.attrib['numerus']
            ntranslation.set('type', 'unfinished')
            ntranslation.text = ''
            etree.strip_elements(ntranslation, 'numerusform')

    def write_new(self):
        '''Write a new output .ts file.'''

        if self.verbose:
            sys.stdout.write('Writing %s\n' % repr(self.filename))

        nroot = etree.Element('TS')
        nroot.set('version', '2.0')

        # iterate over contexts and write them
        for context, messages in sorted(self.contexts.items()):
            self.add_context_to_root(nroot, context, messages)

        tree = etree.ElementTree(nroot)
        with open(self.filename, 'wb') as f:
            f.write(b'<!DOCTYPE TS>\n')
            tree.write(f, pretty_print=True, encoding='utf8')

    def update(self):
        '''Update an existing ts file with the contexts given.'''

        if self.verbose:
            sys.stdout.write('Updating %s\n' % repr(self.filename))

        parser = etree.XMLParser(remove_blank_text=True)
        tree = etree.parse(self.filename, parser)
        nroot = tree.getroot()
        if nroot.tag != 'TS':
            sys.stderr.write('Root node in ts file is not TS\n')
            return

        # iterate over context nodes
        processedcontexts = set()
        for ncontext in nroot.getiterator(tag='context'):
            try:
                contextname = ncontext.find('name').text
            except AttributeError:
                continue
            processedcontexts.add(contextname)

            if contextname not in self.contexts:
                # FIXME: make obsolete in messages if context no longer exists?
                continue

            # make copy of messages to add/update in context
            toprocess = {}
            for message in self.contexts[contextname]:
                toprocess[ (message.string, message.comment) ] = message

            # iterate over message nodes
            for nmessage in ncontext.getiterator(tag='message'):
                source = nmessage.find('source').text
                try:
                    comment = nmessage.find('comment').text
                except AttributeError:
                    comment = None

                key = (source, comment)
                if key in toprocess:
                    # existing key, so update details
                    self.update_message(nmessage, toprocess[key])

                    # don't need to add at end
                    del toprocess[key]
                else:
                    self.mark_message_obsolete(nmessage)

            # add remaining items in this context
            for key, message in sorted(toprocess.items()):
                self.add_message_to_context(ncontext, message)

        # add any new contexts
        for context, messages in sorted(self.contexts.items()):
            if context not in processedcontexts:
                self.add_context_to_root(nroot, context, messages)

        # write tree back to file
        tree.write(self.filename, encoding='utf-8', pretty_print=True)

def python_find_strings(filename, retn, verbose=True, gcontext={}, gargs={}):
    '''Update output in retn with strings in filename.'''

    if verbose:
        sys.stdout.write('Examining file %s\n' % repr(filename))
    with open(filename) as f:
        source = f.read()

    tree = ast.parse(source, filename)

    v = PythonMessageVisitor(filename, retn, verbose=verbose)
    v.fn2context = gcontext.copy()
    v.fnargs = gargs.copy()
    v.visit(tree)

def ui_find_strings(filename, retn, verbose=True):
    '''Find strings in UI files.'''

    if verbose:
        sys.stdout.write('Examining UI file %s\n' % repr(filename))

    tree = etree.parse(filename)
    nroot = tree.getroot()

    try:
        context = nroot.find('class').text
    except AttributeError:
        sys.stderr.write('Could not find class element in %s\n' % filename)
        return

    for string in nroot.getiterator('string'):
        if not string.text:
            continue

        message = Message(
            string.text, filename=filename,
            lineno=string.sourceline)

        if verbose:
            sys.stdout.write(
                'Found string %s in %s:%i\n' % (
                    repr(message.string), filename,
                    string.sourceline))

        if context not in retn:
            retn[context] = []
        retn[context].append(message)

def main():
    parser = argparse.ArgumentParser(
        description='Find translatable strings in PyQt programs',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        'files', metavar='FILE', nargs='+',
        help='input source file')
    parser.add_argument(
        '--verbose', action='store_true',
        help='display verbose output')
    parser.add_argument(
        '--reset', action='store_true',
        help='write output file as new')
    parser.add_argument(
        '--output', metavar='FILE',
        default='output.ts',
        help='output ts file')
    parser.add_argument(
        '--defn', metavar='FNAME:CTX',nargs='+',default=[],
        help='force global function name:context')
    args = parser.parse_args()

    # Prepare forced global translation functions
    gcontext={}
    gargs={}
    for n in args.defn:
        if ':' in n:
            n,ctx = n.split(':')
        else:
            # Default context is 'global'
            ctx = 'global'
        gcontext[n]=ctx
        # Always use default argids for forced functions
        gargs[n] = ['text','disambiguation','context']

    retn = {}
    for infile in args.files:
        if not os.path.isfile(infile):
            sys.stderr.write('Cannot access file %s\n' % infile)
            continue

        ext = os.path.splitext(infile)[1]
        if ext == '.py':
            python_find_strings(
                infile, retn, verbose=args.verbose,
                gcontext=gcontext, gargs=gargs)
        elif ext == '.ui':
            ui_find_strings(infile, retn, verbose=args.verbose)
        else:
            sys.stderr.write(
                'Unknown file extension %s in %s\n' %
                (ext, infile))

    writer = TSWriter(
        args.output, retn, filenames=args.files,
        verbose=args.verbose)

    if os.path.isfile(args.output) and not args.reset:
        writer.update()
    else:
        writer.write_new()

if __name__ == '__main__':
    main()
