From 123b33efd40e409a66d43e01a88d1c3104b1e3d1 Mon Sep 17 00:00:00 2001 From: "Markus F.X.J. Oberhumer" Date: Thu, 8 Apr 2004 18:16:41 +0000 Subject: [PATCH] New upstream version. committer: mfx 1081448201 +0000 --- scripts/cvs2cl.pl | 3923 ++++++++++++++++++++++++++------------------- 1 file changed, 2290 insertions(+), 1633 deletions(-) diff --git a/scripts/cvs2cl.pl b/scripts/cvs2cl.pl index 6d45b66a..d6637f54 100644 --- a/scripts/cvs2cl.pl +++ b/scripts/cvs2cl.pl @@ -9,39 +9,19 @@ exec perl -w -x $0 ${1+"$@"} # -*- mode: perl; perl-indent-level: 2; -*- ### ### ############################################################## -## $Revision: 2.50 $ -## $Date: 2003/08/25 10:52:04 $ +## $Revision: 2.53 $ +## $Date: 2004/03/07 11:56:25 $ ## $Author: fluffy $ ## -## (C) 2001,2002,2003 Martyn J. Pearce , under the GNU GPL. -## (C) 1999 Karl Fogel , under the GNU GPL. -## -## (Extensively hacked on by Melissa O'Neill .) -## (Gecos hacking by Robin Johnson .) -## -## cvs2cl.pl is free software; you can redistribute it and/or modify -## it under the terms of the GNU General Public License as published by -## the Free Software Foundation; either version 2, or (at your option) -## any later version. -## -## cvs2cl.pl is distributed in the hope that it will be useful, -## but WITHOUT ANY WARRANTY; without even the implied warranty of -## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -## GNU General Public License for more details. -## -## You may have received a copy of the GNU General Public License -## along with cvs2cl.pl; see the file COPYING. If not, write to the -## Free Software Foundation, Inc., 59 Temple Place - Suite 330, -## Boston, MA 02111-1307, USA. - use strict; -use Text::Wrap qw( ); -use Time::Local; -use File::Basename qw( fileparse ); -use User::pwent; - +use File::Basename qw( fileparse ); +use Getopt::Long qw( GetOptions ); +use Text::Wrap qw( ); +use Time::Local qw( timegm ); +use User::pwent qw( getpwnam ); + # The Plan: # # Read in the logs for multiple files, spit out a nice ChangeLog that @@ -73,16 +53,99 @@ use User::pwent; # If we're not using the `--distributed' flag, the directory is always # considered to be `./', even as descend into subdirectories. - -############### Globals ################ +# Call Tree + +# name number of lines (10.xii.03) +# parse_options 192 +# derive_changelog 13 +# +-maybe_grab_accumulation_date 38 +# +-read_changelog 277 +# +-maybe_read_user_map_file 94 +# +-run_ext 9 +# +-read_file_path 29 +# +-read_symbolic_name 43 +# +-read_revision 49 +# +-read_date_author_and_state 25 +# +-parse_date_author_and_state 20 +# +-read_branches 36 +# +-output_changelog 424 +# +-pretty_file_list 290 +# +-common_path_prefix 35 +# +-preprocess_msg_text 30 +# +-min 1 +# +-mywrap 16 +# +-last_line_len 5 +# +-wrap_log_entry 177 +# +# Utilities +# +# xml_escape 6 +# slurp_file 11 +# debug 5 +# version 2 +# usage 142 + +# -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- +# +# Note about a bug-slash-opportunity: +# ----------------------------------- +# +# There's a bug in Text::Wrap, which affects cvs2cl. This script +# reveals it: +# +# #!/usr/bin/perl -w +# +# use Text::Wrap; +# +# my $test_text = +# "This script demonstrates a bug in Text::Wrap. The very long line +# following this paragraph will be relocated relative to the surrounding +# text: +# +# ==================================================================== +# +# See? When the bug happens, we'll get the line of equal signs below +# this paragraph, even though it should be above."; +# +# +# # Print out the test text with no wrapping: +# print "$test_text"; +# print "\n"; +# print "\n"; +# +# # Now print it out wrapped, and see the bug: +# print wrap ("\t", " ", "$test_text"); +# print "\n"; +# print "\n"; +# +# If the line of equal signs were one shorter, then the bug doesn't +# happen. Interesting. +# +# Anyway, rather than fix this in Text::Wrap, we might as well write a +# new wrap() which has the following much-needed features: +# +# * initial indentation, like current Text::Wrap() +# * subsequent line indentation, like current Text::Wrap() +# * user chooses among: force-break long words, leave them alone, or die()? +# * preserve existing indentation: chopped chunks from an indented line +# are indented by same (like this line, not counting the asterisk!) +# * optional list of things to preserve on line starts, default ">" +# +# Note that the last two are essentially the same concept, so unify in +# implementation and give a good interface to controlling them. +# +# And how about: +# +# Optionally, when encounter a line pre-indented by same as previous +# line, then strip the newline and refill, but indent by the same. +# Yeah... + +# Globals -------------------------------------------------------------------- use constant MAILNAME => "/etc/mailname"; -# What we run to generate it: -my $Log_Source_Command = "cvs log"; - # In case we have to print it out: -my $VERSION = '$Revision: 2.50 $'; +my $VERSION = '$Revision: 2.53 $'; $VERSION =~ s/\S+\s+(\S+)\s+\S+/$1/; ## Vars set by options: @@ -96,9 +159,6 @@ my $Print_Version = 0; # Just print usage message and exit? my $Print_Usage = 0; -# Single top-level ChangeLog, or one per subdirectory? -my $Distributed = 0; - # What file should we generate (defaults to "ChangeLog")? my $Log_File_Name = "ChangeLog"; @@ -116,7 +176,7 @@ my $Cumulative = 0; my $Update = 0; # Expand usernames to email addresses based on a map file? -my $User_Map_File = ""; +my $User_Map_File = ''; my $User_Passwd_File; my $Mail_Domain; @@ -144,6 +204,9 @@ my %show_tags; # Don't call Text::Wrap on the body of the message my $No_Wrap = 0; +# Indentation of log messages +my $Indent = "\t"; + # Don't do any pretty print processing my $Summary = 0; @@ -158,6 +221,7 @@ my $XML_Encoding = ''; # Format more for programs than for humans. my $XML_Output = 0; my $No_XML_Namespace = 0; +my $No_XML_ISO_Date = 0; # Do some special tweaks for log data that was written in FSF # ChangeLog style. @@ -202,13 +266,13 @@ my @Ignore_Files; my $Case_Insensitive = 0; # Maybe only show log messages matching a certain regular expression. -my $Regexp_Gate = ""; +my $Regexp_Gate = ''; # Pass this global option string along to cvs, to the left of `log': -my $Global_Opts = ""; +my $Global_Opts = ''; # Pass this option string along to the cvs log subcommand: -my $Command_Opts = ""; +my $Command_Opts = ''; # Read log output from stdin instead of invoking cvs log? my $Input_From_Stdin = 0; @@ -227,19 +291,18 @@ my $Common_Dir = 1; my $Max_Checkin_Duration = 180; # What to put at the front of [each] ChangeLog. -my $ChangeLog_Header = ""; +my $ChangeLog_Header = ''; # Whether to enable 'delta' mode, and for what start/end tags. my $Delta_Mode = 0; -my $Delta_From = ""; -my $Delta_To = ""; +my $Delta_From = ''; +my $Delta_To = ''; my $TestCode; # Whether to parse filenames from the RCS filename, and if so what # prefix to strip. -my $RCS_Mode = 0; -my $RCS_Root = ""; +my $RCS_Root; ## end vars set by options. @@ -258,1160 +321,130 @@ my $logmsg_separator = "----------------------------"; my $No_Ancestors = 0; -############### End globals ############ +my $No_Extra_Indent = 0; - +my $GroupWithinDate = 0; -&parse_options (); -if ( defined $TestCode ) { - eval $TestCode; - die "Eval failed: '$@'\n" - if $@; -} else { - &derive_change_log (); +# ---------------------------------------------------------------------------- + +package CVS::Utils::ChangeLog::EntrySet; + +sub new { + my $class = shift; + my %self; + bless \%self, $class; } - -### Everything below is subroutine definitions. ### +# ------------------------------------- -sub run_ext { - my ($cmd) = @_; - $cmd = [$cmd] - unless ref $cmd; - local $" = ' '; - my $out = qx"@$cmd 2>&1"; - my $rv = $?; - my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8); - return $out, $exit, $sig, $core; +sub output_changelog { + my $output_type = $XML_Output ? 'XML' : 'Text'; + my $output_class = "CVS::Utils::ChangeLog::EntrySet::Output::${output_type}"; + $output_class->new->output_changelog(@_); } -# If accumulating, grab the boundary date from pre-existing ChangeLog. -sub maybe_grab_accumulation_date () -{ - if (! $Cumulative || $Update) { - return ""; - } +# ---------------------------------------------------------------------------- - # else +package CVS::Utils::ChangeLog::EntrySet::Output::Text; - open (LOG, "$Log_File_Name") - or die ("trouble opening $Log_File_Name for reading ($!)"); +use base qw( CVS::Utils::ChangeLog::EntrySet::Output ); - my $boundary_date; - while () - { - if (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/) - { - $boundary_date = "$1"; - last; - } - } +use File::Basename qw( fileparse ); - close (LOG); - - # convert time from utc to local timezone if the ChangeLog has - # dates/times in utc - if ($UTC_Times && $boundary_date) - { - # convert the utc time to a time value - my ($year,$mon,$mday,$hour,$min) = $boundary_date =~ - m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#; - my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900); - # print the timevalue in the local timezone - my ($ignore,$wday); - ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time); - $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u", - $year+1900,$mon+1,$mday,$hour,$min); - } - - return $boundary_date; +sub new { + my $class = shift; + bless \(my($ self)), $class; } -# Don't call this wrap, because with 5.5.3, that clashes with the -# (unconditional :-( ) export of wrap() from Text::Wrap -sub mywrap { - my ($indent1, $indent2, @text) = @_; - # If incoming text looks preformatted, don't get clever - my $text = Text::Wrap::wrap($indent1, $indent2, @text); - if ( grep /^\s+/m, @text ) { - return $text; - } - my @lines = split /\n/, $text; - $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e; - $lines[0] =~ s/^$indent1\s+/$indent1/; - s/^$indent2\s+/$indent2/ - for @lines[1..$#lines]; - my $newtext = join "\n", @lines; - $newtext .= "\n" - if substr($text, -1) eq "\n"; - return $newtext; +# ------------------------------------- + +sub wday { + my $self = shift; my $class = ref $self; + my ($wday) = @_; + + return $Show_Day_Of_Week ? ' ' . $class->weekday_en($wday) : ''; } -# Fills up a ChangeLog structure in the current directory. -sub derive_change_log () -{ - # See "The Plan" above for a full explanation. +# ------------------------------------- - my %grand_poobah; +sub header_line { + my $self = shift; + my ($time, $author, $lastdate) = @_; - my $file_full_path; - my $time; - my $revision; - my $author; - my $state; - my $lines; - my $cvsstate; - my $msg_txt; - my $detected_file_separator; + my $header_line = ''; - my %tag_date_printed; + my (undef,$min,$hour,$mday,$mon,$year,$wday) + = $UTC_Times ? gmtime($time) : localtime($time); - # Might be adding to an existing ChangeLog - my $accumulation_date = &maybe_grab_accumulation_date (); - if ($accumulation_date) { - # Insert -d immediately after 'cvs log' - my $Log_Date_Command = "-d\'>${accumulation_date}\'"; - $Log_Source_Command =~ s/(^.*log\S*)/$1 $Log_Date_Command/; - &debug ("(adding log msg starting from $accumulation_date)\n"); - } + my $date = $self->fdatetime($time); - # We might be expanding usernames - my %usermap; - - # In general, it's probably not very maintainable to use state - # variables like this to tell the loop what it's doing at any given - # moment, but this is only the first one, and if we never have more - # than a few of these, it's okay. - my $collecting_symbolic_names = 0; - my %symbolic_names; # Where tag names get stored. - my %branch_names; # We'll grab branch names while we're at it. - my %branch_numbers; # Save some revisions for @Follow_Branches - my @branch_roots; # For showing which files are branch ancestors. - - # Bleargh. Compensate for a deficiency of custom wrapping. - if (($After_Header ne " ") and $FSF_Style) - { - $After_Header .= "\t"; - } - - if (! $Input_From_Stdin) { - &debug ("(run \"${Log_Source_Command}\")\n"); - open (LOG_SOURCE, "$Log_Source_Command |") - or die "unable to run \"${Log_Source_Command}\""; - } - else { - open (LOG_SOURCE, "-") or die "unable to open stdin for reading"; - } - - binmode LOG_SOURCE; - - %usermap = &maybe_read_user_map_file (); - - while () - { - # Canonicalize line endings - s/\r$//; - my $new_full_path; - - # If on a new file and don't see filename, skip until we find it, and - # when we find it, grab it. - if (! (defined $file_full_path)) - { - if (/^Working file: (.*)/) { - $new_full_path = $1; - } elsif ($RCS_Mode && m|^RCS file: $RCS_Root[/\\](.*),v$|) { - $new_full_path = $1; + if ($Show_Times) { + $header_line = + sprintf "%s %s\n\n", $date, $author; + } else { + if ( ! defined $lastdate or $date ne $lastdate or ! $GroupWithinDate ) { + if ( $GroupWithinDate ) { + $header_line = "$date\n\n"; + } else { + $header_line = "$date $author\n\n"; } - } - - if (defined $new_full_path) - { - $file_full_path = $new_full_path; - if (@Ignore_Files) - { - my $base; - ($base, undef, undef) = fileparse ($file_full_path); - # Ouch, I wish trailing operators in regexps could be - # evaluated on the fly! - if ($Case_Insensitive) { - if (grep ($file_full_path =~ m|$_|i, @Ignore_Files)) { - undef $file_full_path; - } - } - elsif (grep ($file_full_path =~ m|$_|, @Ignore_Files)) { - undef $file_full_path; - } - } - next; - } - - # Just spin wheels if no file defined yet. - next if (! $file_full_path); - - # Collect tag names in case we're asked to print them in the output. - if (/^symbolic names:$/) { - $collecting_symbolic_names = 1; - next; # There's no more info on this line, so skip to next - } - if ($collecting_symbolic_names) - { - # All tag names are listed with whitespace in front in cvs log - # output; so if see non-whitespace, then we're done collecting. - if (/^\S/) { - $collecting_symbolic_names = 0; - } - else # we're looking at a tag name, so parse & store it - { - # According to the Cederqvist manual, in node "Tags", tag - # names must start with an uppercase or lowercase letter and - # can contain uppercase and lowercase letters, digits, `-', - # and `_'. However, it's not our place to enforce that, so - # we'll allow anything CVS hands us to be a tag: - /^\s+([^:]+): ([\d.]+)$/; - my $tag_name = $1; - my $tag_rev = $2; - - # A branch number either has an odd number of digit sections - # (and hence an even number of dots), or has ".0." as the - # second-to-last digit section. Test for these conditions. - my $real_branch_rev = ""; - if (($tag_rev =~ /^(\d+\.\d+\.)+\d+$/) # Even number of dots... - and (! ($tag_rev =~ /^(1\.)+1$/))) # ...but not "1.[1.]1" - { - $real_branch_rev = $tag_rev; - } - elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) # Has ".0." - { - $real_branch_rev = $1 . $3; - } - # If we got a branch, record its number. - if ($real_branch_rev) - { - $branch_names{$real_branch_rev} = $tag_name; - if (@Follow_Branches) { - if (grep ($_ eq $tag_name, @Follow_Branches)) { - $branch_numbers{$tag_name} = $real_branch_rev; - } - } - } - else { - # Else it's just a regular (non-branch) tag. - push (@{$symbolic_names{$tag_rev}}, $tag_name); - } - } - } - # End of code for collecting tag names. - - # If have file name, but not revision, and see revision, then grab - # it. (We collect unconditionally, even though we may or may not - # ever use it.) - if ((! (defined $revision)) and (/^revision (\d+\.[\d.]+)/)) - { - $revision = $1; - - if (@Follow_Branches) - { - foreach my $branch (@Follow_Branches) - { - # Special case for following trunk revisions - if (($branch =~ /^trunk$/i) and ($revision =~ /^[0-9]+\.[0-9]+$/)) - { - goto dengo; - } - - my $branch_number = $branch_numbers{$branch}; - if ($branch_number) - { - # Are we on one of the follow branches or an ancestor of - # same? - # - # If this revision is a prefix of the branch number, or - # possibly is less in the minormost number, OR if this - # branch number is a prefix of the revision, then yes. - # Otherwise, no. - # - # So below, we determine if any of those conditions are - # met. - - # Trivial case: is this revision on the branch? - # (Compare this way to avoid regexps that screw up Emacs - # indentation, argh.) - if ((substr ($revision, 0, ((length ($branch_number)) + 1))) - eq ($branch_number . ".")) - { - goto dengo; - } - # Non-trivial case: check if rev is ancestral to branch - elsif ((length ($branch_number)) > (length ($revision)) - and - $No_Ancestors) - { - $revision =~ /^((?:\d+\.)+)(\d+)$/; - my $r_left = $1; # still has the trailing "." - my $r_end = $2; - - $branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/; - my $b_left = $1; # still has trailing "." - my $b_mid = $2; # has no trailing "." - - if (($r_left eq $b_left) - && ($r_end <= $b_mid)) - { - goto dengo; - } - } - } - } - } - else # (! @Follow_Branches) - { - next; - } - - # Else we are following branches, but this revision isn't on the - # path. So skip it. - undef $revision; - dengo: - next; - } - - # If we don't have a revision right now, we couldn't possibly - # be looking at anything useful. - if (! (defined ($revision))) { - $detected_file_separator = /^$file_separator$/o; - if ($detected_file_separator) { - # No revisions for this file; can happen, e.g. "cvs log -d DATE" - goto CLEAR; - } - else { - next; - } - } - - # If have file name but not date and author, and see date or - # author, then grab them: - unless (defined $time) - { - if (/^date: .*/) - { - ($time, $author, $state, $lines) = - &parse_date_author_and_state ($_); - if (defined ($usermap{$author}) and $usermap{$author}) { - $author = $usermap{$author}; - } elsif(defined $Domain or $Gecos == 1) { - my $email = $author; - if(defined $Domain && $Domain ne '') { - $email = $author."@".$Domain; - } - my $pw = getpwnam($author); - my $fullname; - my $office; - my $workphone; - my $homephone; - for (($fullname, $office, $workphone, $homephone) = split /\s*,\s*/, $pw->gecos) { - s/&/ucfirst(lc($pw->name))/ge; - } - if($fullname ne "") { - $author = $fullname . " <" . $email . ">"; - } - } - } - else { - $detected_file_separator = /^$file_separator$/o; - if ($detected_file_separator) { - # No revisions for this file; can happen, e.g. "cvs log -d DATE" - goto CLEAR; - } - } - # If the date/time/author hasn't been found yet, we couldn't - # possibly care about anything we see. So skip: - next; - } - - # A "branches: ..." line here indicates that one or more branches - # are rooted at this revision. If we're showing branches, then we - # want to show that fact as well, so we collect all the branches - # that this is the latest ancestor of and store them in - # @branch_roots. Just for reference, the format of the line we're - # seeing at this point is: - # - # branches: 1.5.2; 1.5.4; ...; - # - # Okay, here goes: - - if (/^branches:\s+(.*);$/) - { - if ($Show_Branches) - { - my $lst = $1; - $lst =~ s/(1\.)+1;|(1\.)+1$//; # ignore the trivial branch 1.1.1 - if ($lst) { - @branch_roots = split (/;\s+/, $lst); - } - else { - undef @branch_roots; - } - next; - } - else - { - # Ugh. This really bothers me. Suppose we see a log entry - # like this: - # - # ---------------------------- - # revision 1.1 - # date: 1999/10/17 03:07:38; author: jrandom; state: Exp; - # branches: 1.1.2; - # Intended first line of log message begins here. - # ---------------------------- - # - # The question is, how we can tell the difference between that - # log message and a *two*-line log message whose first line is - # - # "branches: 1.1.2;" - # - # See the problem? The output of "cvs log" is inherently - # ambiguous. - # - # For now, we punt: we liberally assume that people don't - # write log messages like that, and just toss a "branches:" - # line if we see it but are not showing branches. I hope no - # one ever loses real log data because of this. - next; - } - } - - # If have file name, time, and author, then we're just grabbing - # log message texts: - $detected_file_separator = /^$file_separator$/o; - if ($detected_file_separator && ! (defined $revision)) { - # No revisions for this file; can happen, e.g. "cvs log -d DATE" - goto CLEAR; - } - unless ($detected_file_separator || /^$logmsg_separator$/o) - { - $msg_txt .= $_; # Normally, just accumulate the message... - next; - } - # ... until a msg separator is encountered: - # Ensure the message contains something: - if ((! $msg_txt) - || ($msg_txt =~ /^\s*\.\s*$|^\s*$/) - || ($msg_txt =~ /\*\*\* empty log message \*\*\*/)) - { - if ($Prune_Empty_Msgs) { - goto CLEAR; - } - # else - $msg_txt = "[no log message]\n"; - } - - ### Store it all in the Grand Poobah: - { - my $dir_key; # key into %grand_poobah - my %qunk; # complicated little jobbie, see below - - # Each revision of a file has a little data structure (a `qunk') - # associated with it. That data structure holds not only the - # file's name, but any additional information about the file - # that might be needed in the output, such as the revision - # number, tags, branches, etc. The reason to have these things - # arranged in a data structure, instead of just appending them - # textually to the file's name, is that we may want to do a - # little rearranging later as we write the output. For example, - # all the files on a given tag/branch will go together, followed - # by the tag in parentheses (so trunk or otherwise non-tagged - # files would go at the end of the file list for a given log - # message). This rearrangement is a lot easier to do if we - # don't have to reparse the text. - # - # A qunk looks like this: - # - # { - # filename => "hello.c", - # revision => "1.4.3.2", - # time => a timegm() return value (moment of commit) - # tags => [ "tag1", "tag2", ... ], - # branch => "branchname" # There should be only one, right? - # branchroots => [ "branchtag1", "branchtag2", ... ] - # } - - if ($Distributed) { - # Just the basename, don't include the path. - ($qunk{'filename'}, $dir_key, undef) = fileparse ($file_full_path); - } - else { - $dir_key = "./"; - $qunk{'filename'} = $file_full_path; - } - - # This may someday be used in a more sophisticated calculation - # of what other files are involved in this commit. For now, we - # don't use it much except for delta mode, because the - # common-commit-detection algorithm is hypothesized to be - # "good enough" as it stands. - $qunk{'time'} = $time; - - # We might be including revision numbers and/or tags and/or - # branch names in the output. Most of the code from here to - # loop-end deals with organizing these in qunk. - - $qunk{'revision'} = $revision; - $qunk{'state'} = $state; - if ( defined( $lines )) { - $qunk{'lines'} = $lines; - } - - # Grab the branch, even though we may or may not need it: - $qunk{'revision'} =~ /((?:\d+\.)+)\d+/; - my $branch_prefix = $1; - $branch_prefix =~ s/\.$//; # strip off final dot - if ($branch_names{$branch_prefix}) { - $qunk{'branch'} = $branch_names{$branch_prefix}; - } - - # Keep a record of the file's cvs state. - $qunk{'cvsstate'} = $state; - - # If there's anything in the @branch_roots array, then this - # revision is the root of at least one branch. We'll display - # them as branch names instead of revision numbers, the - # substitution for which is done directly in the array: - if (@branch_roots) { - my @roots = map { $branch_names{$_} } @branch_roots; - $qunk{'branchroots'} = \@roots; - } - - # Save tags too. - if (defined ($symbolic_names{$revision})) { - $qunk{'tags'} = $symbolic_names{$revision}; - delete $symbolic_names{$revision}; - - # If we're in 'delta' mode, update the latest observed - # times for the beginning and ending tags, and - # when we get around to printing output, we will simply restrict - # ourselves to that timeframe... - - if ($Delta_Mode) { - if (($time > $Delta_StartTime) && - (grep { $_ eq $Delta_From } @{$qunk{'tags'}})) - { - $Delta_StartTime = $time; - } - - if (($time > $Delta_EndTime) && - (grep { $_ eq $Delta_To } @{$qunk{'tags'}})) - { - $Delta_EndTime = $time; - } - } - } - - unless ($Hide_Branch_Additions and $msg_txt =~ /file \S+ was initially added on branch \S+./) { - # Add this file to the list - # (We use many spoonfuls of autovivication magic. Hashes and arrays - # will spring into existence if they aren't there already.) - - &debug ("(pushing log msg for ${dir_key}$qunk{'filename'})\n"); - - # Store with the files in this commit. Later we'll loop through - # again, making sure that revisions with the same log message - # and nearby commit times are grouped together as one commit. - push (@{$grand_poobah{$dir_key}{$author}{$time}{$msg_txt}}, \%qunk); - } - } - - CLEAR: - # Make way for the next message - undef $msg_txt; - undef $time; - undef $revision; - undef $author; - undef @branch_roots; - - # Maybe even make way for the next file: - if ($detected_file_separator) { - undef $file_full_path; - undef %branch_names; - undef %branch_numbers; - undef %symbolic_names; - } - } - - close (LOG_SOURCE); - - ### Process each ChangeLog - - while (my ($dir,$authorhash) = each %grand_poobah) - { - &debug ("DOING DIR: $dir\n"); - - # Here we twist our hash around, from being - # author => time => message => filelist - # in %$authorhash to - # time => author => message => filelist - # in %changelog. - # - # This is also where we merge entries. The algorithm proceeds - # through the timeline of the changelog with a sliding window of - # $Max_Checkin_Duration seconds; within that window, entries that - # have the same log message are merged. - # - # (To save space, we zap %$authorhash after we've copied - # everything out of it.) - - my %changelog; - while (my ($author,$timehash) = each %$authorhash) - { - my $lasttime; - my %stamptime; - foreach my $time (sort {$main::a <=> $main::b} (keys %$timehash)) - { - my $msghash = $timehash->{$time}; - while (my ($msg,$qunklist) = each %$msghash) - { - my $stamptime = $stamptime{$msg}; - if ((defined $stamptime) - and (($time - $stamptime) < $Max_Checkin_Duration) - and (defined $changelog{$stamptime}{$author}{$msg})) - { - push(@{$changelog{$stamptime}{$author}{$msg}}, @$qunklist); - } - else { - $changelog{$time}{$author}{$msg} = $qunklist; - $stamptime{$msg} = $time; - } - } - } - } - undef (%$authorhash); - - ### Now we can write out the ChangeLog! - - my ($logfile_here, $logfile_bak, $tmpfile); - - if (! $Output_To_Stdout) { - $logfile_here = $dir . $Log_File_Name; - $logfile_here =~ s/^\.\/\//\//; # fix any leading ".//" problem - $tmpfile = "${logfile_here}.cvs2cl$$.tmp"; - $logfile_bak = "${logfile_here}.bak"; - - open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\""; - } - else { - open (LOG_OUT, ">-") or die "Unable to open stdout for writing"; - } - - print LOG_OUT $ChangeLog_Header; - - if ($XML_Output) { - my $encoding = - length $XML_Encoding ? qq'encoding="$XML_Encoding"' : ''; - my $version = 'version="1.0"'; - my $declaration = - sprintf '', join ' ', grep length, $version, $encoding; - my $root = - $No_XML_Namespace ? - '' : - ''; - print LOG_OUT "$declaration\n\n$root\n\n"; - } - - my @key_list = (); - if($Chronological_Order) { - @key_list = sort {$main::a <=> $main::b} (keys %changelog); } else { - @key_list = sort {$main::b <=> $main::a} (keys %changelog); - } - foreach my $time (@key_list) - { - next if ($Delta_Mode && - (($time <= $Delta_StartTime) || - ($time > $Delta_EndTime && $Delta_EndTime))); - - # Set up the date/author line. - # kff todo: do some more XML munging here, on the header - # part of the entry: - my ($ignore,$min,$hour,$mday,$mon,$year,$wday) - = $UTC_Times ? gmtime($time) : localtime($time); - - # XML output includes everything else, we might as well make - # it always include Day Of Week too, for consistency. - if ($Show_Day_Of_Week or $XML_Output) { - $wday = ("Sunday", "Monday", "Tuesday", "Wednesday", - "Thursday", "Friday", "Saturday")[$wday]; - $wday = ($XML_Output) ? "${wday}\n" : " $wday"; - } - else { - $wday = ""; - } - - my $authorhash = $changelog{$time}; - if ($Show_Tag_Dates) { - my %tags; - while (my ($author,$mesghash) = each %$authorhash) { - while (my ($msg,$qunk) = each %$mesghash) { - foreach my $qunkref2 (@$qunk) { - if (defined ($$qunkref2{'tags'})) { - foreach my $tag (@{$$qunkref2{'tags'}}) { - $tags{$tag} = 1; - } - } - } - } - } - # Sort here for determinism to ease testing - foreach my $tag (sort keys %tags) { - if (!defined $tag_date_printed{$tag}) { - $tag_date_printed{$tag} = $time; - if ($XML_Output) { - # NOT YET DONE - } - else { - if ($Show_Times) { - printf LOG_OUT ("%4u-%02u-%02u${wday} %02u:%02u tag %s\n\n", - $year+1900, $mon+1, $mday, $hour, $min, $tag); - } else { - printf LOG_OUT ("%4u-%02u-%02u${wday} tag %s\n\n", - $year+1900, $mon+1, $mday, $tag); - } - } - } - } - } - while (my ($author,$mesghash) = each %$authorhash) - { - # If XML, escape in outer loop to avoid compound quoting: - if ($XML_Output) { - $author = &xml_escape ($author); - } - - FOOBIE: - # We sort here to enable predictable ordering for the testing porpoises - for my $msg (sort keys %$mesghash) - { - my $qunklist = $mesghash->{$msg}; - - ## MJP: 19.xii.01 : Exclude @ignore_tags - for my $ignore_tag (keys %ignore_tags) { - next FOOBIE - if grep($_ eq $ignore_tag, map(@{$_->{tags}}, - grep(defined $_->{tags}, - @$qunklist))); - } - ## MJP: 19.xii.01 : End exclude @ignore_tags - - # show only files with tag --show-tag $show_tag - if ( keys %show_tags ) { - next FOOBIE - if !grep(exists $show_tags{$_}, map(@{$_->{tags}}, - grep(defined $_->{tags}, - @$qunklist))); - } - - my $files = &pretty_file_list ($qunklist); - my $header_line; # date and author - my $body; # see below - my $wholething; # $header_line + $body - - if ($XML_Output) { - $header_line = - sprintf ("%4u-%02u-%02u\n" - . "${wday}" - . "\n" - . "%s\n", - $year+1900, $mon+1, $mday, $hour, $min, $author); - } - else { - if ($Show_Times) { - $header_line = - sprintf ("%4u-%02u-%02u${wday} %02u:%02u %s\n\n", - $year+1900, $mon+1, $mday, $hour, $min, $author); - } else { - $header_line = - sprintf ("%4u-%02u-%02u${wday} %s\n\n", - $year+1900, $mon+1, $mday, $author); - } - } - - $Text::Wrap::huge = 'overflow' - if $Text::Wrap::VERSION >= 2001.0130; - # Reshape the body according to user preferences. - if ($XML_Output) - { - $msg = &preprocess_msg_text ($msg); - $body = $files . $msg; - } - elsif ($No_Wrap && !$Summary) - { - $msg = &preprocess_msg_text ($msg); - $files = mywrap ("\t", "\t ", "* $files"); - $msg =~ s/\n(.+)/\n\t$1/g; - unless ($After_Header eq " ") { - $msg =~ s/^(.+)/\t$1/g; - } - $body = $files . $After_Header . $msg; - } - elsif ($Summary) - { - my( $filelist, $qunk ); - my( @DeletedQunks, @AddedQunks, @ChangedQunks ); - - $msg = &preprocess_msg_text ($msg); - # - # Sort the files (qunks) according to the operation that was - # performed. Files which were added have no line change - # indicator, whereas deleted files have state dead. - # - foreach $qunk ( @$qunklist ) - { - if ( "dead" eq $qunk->{'state'}) - { - push( @DeletedQunks, $qunk ); - } - elsif ( !exists( $qunk->{'lines'})) - { - push( @AddedQunks, $qunk ); - } - else - { - push( @ChangedQunks, $qunk ); - } - } - # - # The qunks list was originally in tree search order. Let's - # get that back. The lists, if they exist, will be reversed upon - # processing. - # - - # - # Now write the three sections onto $filelist - # - if ( @DeletedQunks ) - { - $filelist .= "\tDeleted:\n"; - foreach $qunk ( @DeletedQunks ) - { - $filelist .= "\t\t" . $qunk->{'filename'}; - $filelist .= " (" . $qunk->{'revision'} . ")"; - $filelist .= "\n"; - } - undef( @DeletedQunks ); - } - if ( @AddedQunks ) - { - $filelist .= "\tAdded:\n"; - foreach $qunk ( @AddedQunks ) - { - $filelist .= "\t\t" . $qunk->{'filename'}; - $filelist .= " (" . $qunk->{'revision'} . ")"; - $filelist .= "\n"; - } - undef( @AddedQunks ); - } - if ( @ChangedQunks ) - { - $filelist .= "\tChanged:\n"; - foreach $qunk ( @ChangedQunks ) - { - $filelist .= "\t\t" . $qunk->{'filename'}; - $filelist .= " (" . $qunk->{'revision'} . ")"; - $filelist .= ", \"" . $qunk->{'state'} . "\""; - $filelist .= ", lines: " . $qunk->{'lines'}; - $filelist .= "\n"; - } - undef( @ChangedQunks ); - } - chomp( $filelist ); - $msg =~ s/\n(.*)/\n\t$1/g; - unless ($After_Header eq " ") { - $msg =~ s/^(.*)/\t$1/g; - } - $body = $filelist . $After_Header . $msg; - } - else # do wrapping, either FSF-style or regular - { - if ($FSF_Style) - { - $files = mywrap ("\t", "\t", "* $files"); - - my $files_last_line_len = 0; - if ($After_Header eq " ") - { - $files_last_line_len = &last_line_len ($files); - $files_last_line_len += 1; # for $After_Header - } - - $msg = &wrap_log_entry - ($msg, "\t", 69 - $files_last_line_len, 69); - $body = $files . $After_Header . $msg; - } - else # not FSF-style - { - $msg = &preprocess_msg_text ($msg); - $body = $files . $After_Header . $msg; - $body = mywrap ("\t", "\t ", "* $body"); - $body =~ s/[ \t]+\n/\n/g; - } - } - - $body =~ s/[ \t]+\n/\n/g; - $wholething = $header_line . $body; - - if ($XML_Output) { - $wholething = "\n${wholething}\n"; - } - - # One last check: make sure it passes the regexp test, if the - # user asked for that. We have to do it here, so that the - # test can match against information in the header as well - # as in the text of the log message. - - # How annoying to duplicate so much code just because I - # can't figure out a way to evaluate scalars on the trailing - # operator portion of a regular expression. Grrr. - if ($Case_Insensitive) { - unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/oi)) { - print LOG_OUT "${wholething}\n"; - } - } - else { - unless ($Regexp_Gate && ($wholething !~ /$Regexp_Gate/o)) { - print LOG_OUT "${wholething}\n"; - } - } - } - } - } - - if ($XML_Output) { - print LOG_OUT "\n"; - } - - close (LOG_OUT); - - if (! $Output_To_Stdout) - { - # If accumulating, append old data to new before renaming. But - # don't append the most recent entry, since it's already in the - # new log due to CVS's idiosyncratic interpretation of "log -d". - if ($Cumulative && -f $logfile_here) - { - open (NEW_LOG, ">>$tmpfile") - or die "trouble appending to $tmpfile ($!)"; - - open (OLD_LOG, "<$logfile_here") - or die "trouble reading from $logfile_here ($!)"; - - my $started_first_entry = 0; - my $passed_first_entry = 0; - while () - { - if (! $passed_first_entry) - { - if ((! $started_first_entry) - && /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/) { - $started_first_entry = 1; - } - elsif (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/) { - $passed_first_entry = 1; - print NEW_LOG $_; - } - } - else { - print NEW_LOG $_; - } - } - - close (NEW_LOG); - close (OLD_LOG); - } - - if (-f $logfile_here) { - rename ($logfile_here, $logfile_bak); - } - rename ($tmpfile, $logfile_here); + $header_line = ''; } } } -sub parse_date_author_and_state () -{ - # Parses the date/time and author out of a line like: - # - # date: 1999/02/19 23:29:05; author: apharris; state: Exp; +# ------------------------------------- - my $line = shift; +sub preprocess_msg_text { + my $self = shift; + my ($text) = @_; - my ($year, $mon, $mday, $hours, $min, $secs, $author, $state, $rest) = - $line =~ - m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);\s+state:\s+([^;]+);(.*)# - or die "Couldn't parse date ``$line''"; - die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258); - # Kinda arbitrary, but useful as a sanity check - my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900); - my $lines; - if ( $rest =~ m#\s+lines:\s+(.*)# ) - { - $lines =$1; - } - return ($time, $author, $state, $lines); + $text = $self->SUPER::preprocess_msg_text($text); + + unless ( $No_Wrap ) { + # Strip off lone newlines, but only for lines that don't begin with + # whitespace or a mail-quoting character, since we want to preserve + # that kind of formatting. Also don't strip newlines that follow a + # period; we handle those specially next. And don't strip + # newlines that precede an open paren. + 1 while $text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g; + + # If a newline follows a period, make sure that when we bring up the + # bottom sentence, it begins with two spaces. + 1 while $text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2 $3/g; + } + + return $text; } +# ------------------------------------- + # Here we take a bunch of qunks and convert them into printed # summary that will include all the information the user asked for. -sub pretty_file_list () -{ - if ($Hide_Filenames and (! $XML_Output)) { - return ""; - } +sub pretty_file_list { + my $self = shift; + + return '' + if $Hide_Filenames; my $qunksref = shift; - my @qunkrefs = - grep +((! exists $_->{'tags'} or - ! grep exists $ignore_tags{$_}, @{$_->{'tags'}}) and - (! keys %show_tags or - (exists $_->{'tags'} and - grep exists $show_tags{$_}, @{$_->{'tags'}})) - ), - @$qunksref; my @filenames; - my $beauty = ""; # The accumulating header string for this entry. + my $beauty = ''; # The accumulating header string for this entry. my %non_unanimous_tags; # Tags found in a proper subset of qunks my %unanimous_tags; # Tags found in all qunks my %all_branches; # Branches found in any qunk - my $common_dir = undef; # Dir prefix common to all files ("" if none) my $fbegun = 0; # Did we begin printing filenames yet? - # First, loop over the qunks gathering all the tag/branch names. - # We'll put them all in non_unanimous_tags, and take out the - # unanimous ones later. - QUNKREF: - foreach my $qunkref (@qunkrefs) - { - # Keep track of whether all the files in this commit were in the - # same directory, and memorize it if so. We can make the output a - # little more compact by mentioning the directory only once. - if ($Common_Dir && (scalar (@qunkrefs)) > 1) - { - if (! (defined ($common_dir))) - { - my ($base, $dir); - ($base, $dir, undef) = fileparse ($$qunkref{'filename'}); + my ($common_dir, $qunkrefs) = + $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), $qunksref); - if ((! (defined ($dir))) # this first case is sheer paranoia - or ($dir eq "") - or ($dir eq "./") - or ($dir eq ".\\")) - { - $common_dir = ""; - } - else - { - $common_dir = $dir; - } - } - elsif ($common_dir ne "") - { - # Already have a common dir prefix, so how much of it can we preserve? - $common_dir = &common_path_prefix ($$qunkref{'filename'}, $common_dir); - } - } - else # only one file in this entry anyway, so common dir not an issue - { - $common_dir = ""; - } + my @qunkrefs = @$qunkrefs; - if (defined ($$qunkref{'branch'})) { - $all_branches{$$qunkref{'branch'}} = 1; - } - if (defined ($$qunkref{'tags'})) { - foreach my $tag (@{$$qunkref{'tags'}}) { - $non_unanimous_tags{$tag} = 1; - } - } - } - - # Any tag held by all qunks will be printed specially... but only if - # there are multiple qunks in the first place! - if ((scalar (@qunkrefs)) > 1) { - foreach my $tag (keys (%non_unanimous_tags)) { - my $everyone_has_this_tag = 1; - foreach my $qunkref (@qunkrefs) { - if ((! (defined ($$qunkref{'tags'}))) - or (! (grep ($_ eq $tag, @{$$qunkref{'tags'}})))) { - $everyone_has_this_tag = 0; - } - } - if ($everyone_has_this_tag) { - $unanimous_tags{$tag} = 1; - delete $non_unanimous_tags{$tag}; - } - } - } - - if ($XML_Output) - { - # If outputting XML, then our task is pretty simple, because we - # don't have to detect common dir, common tags, branch prefixing, - # etc. We just output exactly what we have, and don't worry about - # redundancy or readability. - - foreach my $qunkref (@qunkrefs) - { - my $filename = $$qunkref{'filename'}; - my $cvsstate = $$qunkref{'cvsstate'}; - my $revision = $$qunkref{'revision'}; - my $tags = $$qunkref{'tags'}; - my $branch = $$qunkref{'branch'}; - my $branchroots = $$qunkref{'branchroots'}; - - $filename = &xml_escape ($filename); # probably paranoia - $revision = &xml_escape ($revision); # definitely paranoia - - $beauty .= "\n"; - $beauty .= "${filename}\n"; - $beauty .= "${cvsstate}\n"; - $beauty .= "${revision}\n"; - if ($branch) { - $branch = &xml_escape ($branch); # more paranoia - $beauty .= "${branch}\n"; - } - foreach my $tag (@$tags) { - $tag = &xml_escape ($tag); # by now you're used to the paranoia - $beauty .= "${tag}\n"; - } - foreach my $root (@$branchroots) { - $root = &xml_escape ($root); # which is good, because it will continue - $beauty .= "${root}\n"; - } - $beauty .= "\n"; - } - - # Theoretically, we could go home now. But as long as we're here, - # let's print out the common_dir and utags, as a convenience to - # the receiver (after all, earlier code calculated that stuff - # anyway, so we might as well take advantage of it). - - if ((scalar (keys (%unanimous_tags))) > 1) { - foreach my $utag ((keys (%unanimous_tags))) { - $utag = &xml_escape ($utag); # the usual paranoia - $beauty .= "${utag}\n"; - } - } - if ($common_dir) { - $common_dir = &xml_escape ($common_dir); - $beauty .= "${common_dir}\n"; - } - - # That's enough for XML, time to go home: - return $beauty; - } - - # Else not XML output, so complexly compactify for chordate - # consumption. At this point we have enough global information - # about all the qunks to organize them non-redundantly for output. + # Not XML output, so complexly compactify for chordate consumption. At this + # point we have enough global information about all the qunks to organize + # them non-redundantly for output. if ($common_dir) { # Note that $common_dir still has its trailing slash @@ -1427,8 +460,8 @@ sub pretty_file_list () { foreach my $qunkref (@qunkrefs) { - if ((defined ($$qunkref{'branch'})) - and ($$qunkref{'branch'} eq $branch)) + if ((defined ($qunkref->branch)) + and ($qunkref->branch eq $branch)) { if ($fbegun) { # kff todo: comma-delimited in XML too? Sure. @@ -1437,12 +470,12 @@ sub pretty_file_list () else { $fbegun = 1; } - my $fname = substr ($$qunkref{'filename'}, length ($common_dir)); + my $fname = substr ($qunkref->filename, length ($common_dir)); $beauty .= $fname; - $$qunkref{'printed'} = 1; # Just setting a mark bit, basically + $qunkref->{'printed'} = 1; # Just setting a mark bit, basically - if ($Show_Tags && (defined @{$$qunkref{'tags'}})) { - my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}}); + if ( $Show_Tags and defined $qunkref->tags ) { + my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags}); if (@tags) { $beauty .= " (tags: "; @@ -1455,7 +488,7 @@ sub pretty_file_list () # Collect the revision numbers' last components, but don't # print them -- they'll get printed with the branch name # later. - $$qunkref{'revision'} =~ /.+\.([\d]+)$/; + $qunkref->revision =~ /.+\.([\d]+)$/; push (@brevisions, $1); # todo: we're still collecting branch roots, but we're not @@ -1488,13 +521,13 @@ sub pretty_file_list () my %fileinfo_printed; foreach my $qunkref (@qunkrefs) { - next if (defined ($$qunkref{'printed'})); # skip if already printed + next if (defined ($qunkref->{'printed'})); # skip if already printed - my $b = substr ($$qunkref{'filename'}, length ($common_dir)); + my $b = substr ($qunkref->filename, length ($common_dir)); # todo: Shlomo's change was this: - # $beauty .= substr ($$qunkref{'filename'}, - # (($common_dir eq "./") ? "" : length ($common_dir))); - $$qunkref{'printed'} = 1; # Set a mark bit. + # $beauty .= substr ($qunkref->filename, + # (($common_dir eq "./") ? '' : length ($common_dir))); + $qunkref->{'printed'} = 1; # Set a mark bit. if ($Show_Revisions || $Show_Tags || $Show_Dead) { @@ -1503,15 +536,15 @@ sub pretty_file_list () if ($Show_Revisions) { $started_addendum = 1; $b .= " ("; - $b .= "$$qunkref{'revision'}"; + $b .= $qunkref->revision; } - if ($Show_Dead && $$qunkref{'cvsstate'} =~ /dead/) + if ($Show_Dead && $qunkref->state =~ /dead/) { # Deliberately not using $started_addendum. Keeping it simple. $b .= "[DEAD]"; } - if ($Show_Tags && (defined $$qunkref{'tags'})) { - my @tags = grep ($non_unanimous_tags{$_}, @{$$qunkref{'tags'}}); + if ($Show_Tags && (defined $qunkref->tags)) { + my @tags = grep ($non_unanimous_tags{$_}, @{$qunkref->tags}); if ((scalar (@tags)) > 0) { if ($started_addendum) { $b .= ", "; @@ -1553,91 +586,677 @@ sub pretty_file_list () return $beauty; } -sub min ($$) { $_[0] < $_[1] ? $_[0] : $_[1] } +# ------------------------------------- -sub common_path_prefix ($$) -{ - my ($path1, $path2) = @_; +sub output_tagdate { + my $self = shift; + my ($fh, $time, $tag) = @_; - # For compatibility (with older versions of cvs2cl.pl), we think in UN*X - # terms, and mould windoze filenames to match. Is this really appropriate? - # If a file is checked in under UN*X, and cvs log run on windoze, which way - # do the path separators slope? Can we use fileparse as per the local - # conventions? If so, we should probably have a user option to specify an - # OS to emulate to handle stdin-fed logs. If we did this, we could avoid - # the nasty \-/ transmogrification below. + my $fdatetime = $self->fdatetime($time); + print $fh "$fdatetime tag $tag\n\n"; + return; +} - my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2; +# ------------------------------------- - # Transmogrify Windows filenames to look like Unix. - # (It is far more likely that someone is running cvs2cl.pl under - # Windows than that they would genuinely have backslashes in their - # filenames.) - tr!\\!/! - for $dir1, $dir2; +sub format_body { + my $self = shift; + my ($msg, $files, $qunklist) = @_; - my ($accum1, $accum2, $last_common_prefix) = ('') x 3; + my $body; - my @path1 = grep length($_), split qr!/!, $dir1; - my @path2 = grep length($_), split qr!/!, $dir2; - - my @common_path; - for (0..min($#path1,$#path2)) { - if ( $path1[$_] eq $path2[$_]) { - push @common_path, $path1[$_]; + if ( $No_Wrap and ! $Summary ) { + $msg = $self->preprocess_msg_text($msg); + $files = $self->mywrap("\t", "\t ", "* $files"); + $msg =~ s/\n(.+)/\n$Indent$1/g; + unless ($After_Header eq " ") { + $msg =~ s/^(.+)/$Indent$1/g; + } + if ( $Hide_Filenames ) { + $body = $After_Header . $msg; } else { - last; + $body = $files . $After_Header . $msg; + } + } elsif ( $Summary ) { + my ($filelist, $qunk); + my (@DeletedQunks, @AddedQunks, @ChangedQunks); + + $msg = $self->preprocess_msg_text($msg); + # + # Sort the files (qunks) according to the operation that was + # performed. Files which were added have no line change + # indicator, whereas deleted files have state dead. + # + foreach $qunk ( @$qunklist ) { + if ( "dead" eq $qunk->state) { + push @DeletedQunks, $qunk; + } elsif ( ! defined $qunk->lines ) { + push @AddedQunks, $qunk; + } else { + push @ChangedQunks, $qunk; + } + } + # + # The qunks list was originally in tree search order. Let's + # get that back. The lists, if they exist, will be reversed upon + # processing. + # + + # + # Now write the three sections onto $filelist + # + if ( @DeletedQunks ) { + $filelist .= "\tDeleted:\n"; + foreach $qunk ( @DeletedQunks ) { + $filelist .= "\t\t" . $qunk->filename; + $filelist .= " (" . $qunk->revision . ")"; + $filelist .= "\n"; + } + undef @DeletedQunks; + } + + if ( @AddedQunks ) { + $filelist .= "\tAdded:\n"; + foreach $qunk (@AddedQunks) { + $filelist .= "\t\t" . $qunk->filename; + $filelist .= " (" . $qunk->revision . ")"; + $filelist .= "\n"; + } + undef @AddedQunks ; + } + + if ( @ChangedQunks ) { + $filelist .= "\tChanged:\n"; + foreach $qunk (@ChangedQunks) { + $filelist .= "\t\t" . $qunk->filename; + $filelist .= " (" . $qunk->revision . ")"; + $filelist .= ", \"" . $qunk->state . "\""; + $filelist .= ", lines: " . $qunk->lines; + $filelist .= "\n"; + } + undef @ChangedQunks; + } + + chomp $filelist; + + if ( $Hide_Filenames ) { + $filelist = ''; + } + + $msg =~ s/\n(.*)/\n$Indent$1/g; + unless ( $After_Header eq " " or $FSF_Style ) { + $msg =~ s/^(.*)/$Indent$1/g; + } + + unless ( $No_Wrap ) { + if ( $FSF_Style ) { + $msg = $self->wrap_log_entry($msg, '', 69, 69); + chomp($msg); + chomp($msg); + } else { + $msg = $self->mywrap('', $Indent, "$msg"); + $msg =~ s/[ \t]+\n/\n/g; + } + } + + $body = $filelist . $After_Header . $msg; + } else { # do wrapping, either FSF-style or regular + my $latter_wrap = $No_Extra_Indent ? $Indent : "$Indent "; + + if ( $FSF_Style ) { + $files = $self->mywrap($Indent, $latter_wrap, "* $files"); + + my $files_last_line_len = 0; + if ( $After_Header eq " " ) { + $files_last_line_len = $self->last_line_len($files); + $files_last_line_len += 1; # for $After_Header + } + + $msg = $self->wrap_log_entry($msg, $latter_wrap, 69-$files_last_line_len, 69); + $body = $files . $After_Header . $msg; + } else { # not FSF-style + $msg = $self->preprocess_msg_text($msg); + $body = $files . $After_Header . $msg; + $body = $self->mywrap($Indent, $latter_wrap, "* $body"); + $body =~ s/[ \t]+\n/\n/g; } } - return join '', map "$_/", @common_path; + return $body; } -sub preprocess_msg_text () -{ - my $text = shift; +# ---------------------------------------------------------------------------- - # Strip out carriage returns (as they probably result from DOSsy editors). - $text =~ s/\r\n/\n/g; +package CVS::Utils::ChangeLog::EntrySet::Output::XML; - # If it *looks* like two newlines, make it *be* two newlines: - $text =~ s/\n\s*\n/\n\n/g; +use base qw( CVS::Utils::ChangeLog::EntrySet::Output ); - if ($XML_Output) - { - $text = &xml_escape ($text); - chomp $text; - $text = "${text}\n"; - } - elsif (! $No_Wrap) - { - # Strip off lone newlines, but only for lines that don't begin with - # whitespace or a mail-quoting character, since we want to preserve - # that kind of formatting. Also don't strip newlines that follow a - # period; we handle those specially next. And don't strip - # newlines that precede an open paren. - 1 while ($text =~ s/(^|\n)([^>\s].*[^.\n])\n([^>\n])/$1$2 $3/g); +use File::Basename qw( fileparse ); - # If a newline follows a period, make sure that when we bring up the - # bottom sentence, it begins with two spaces. - 1 while ($text =~ s/(^|\n)([^>\s].*)\n([^>\n])/$1$2 $3/g); - } +sub new { + my $class = shift; + bless \(my($ self)), $class; +} + +# ------------------------------------- + +sub header_line { + my $self = shift; + my ($time, $author, $lastdate) = @_; + + my $header_line = ''; + + my $isoDate; + + my ($y, $m, $d, $H, $M, $S) = (gmtime($time))[5,4,3,2,1,0]; + + # Ideally, this would honor $UTC_Times and use +HH:MM syntax + $isoDate = sprintf("%04d-%02d-%02dT%02d:%02d:%02dZ", + $y + 1900, $m + 1, $d, $H, $M, $S); + + my (undef,$min,$hour,$mday,$mon,$year,$wday) + = $UTC_Times ? gmtime($time) : localtime($time); + + my $date = $self->fdatetime($time); + $wday = $self->wday($wday); + + $header_line = + sprintf ("%4u-%02u-%02u\n${wday}\n", + $year+1900, $mon+1, $mday, $hour, $min); + $header_line .= "$isoDate\n" + unless $No_XML_ISO_Date; + $header_line .= sprintf("%s\n" , $author); +} + +# ------------------------------------- + +sub wday { + my $self = shift; my $class = ref $self; + my ($wday) = @_; + + return '' . $class->weekday_en($wday) . "\n"; +} + +# ------------------------------------- + +sub escape { + my $self = shift; + + my $txt = shift; + $txt =~ s/&/&/g; + $txt =~ s//>/g; + return $txt; +} + +# ------------------------------------- + +sub output_header { + my $self = shift; + my ($fh) = @_; + + my $encoding = + length $XML_Encoding ? qq'encoding="$XML_Encoding"' : ''; + my $version = 'version="1.0"'; + my $declaration = + sprintf '', join ' ', grep length, $version, $encoding; + my $root = + $No_XML_Namespace ? + '' : + ''; + print $fh "$declaration\n\n$root\n\n"; +} + +# ------------------------------------- + +sub output_footer { + my $self = shift; + my ($fh) = @_; + + print $fh "\n"; +} + +# ------------------------------------- + +sub preprocess_msg_text { + my $self = shift; + my ($text) = @_; + + $text = $self->SUPER::preprocess_msg_text($text); + + $text = $self->escape($text); + chomp $text; + $text = "${text}\n"; return $text; } -sub last_line_len () -{ +# ------------------------------------- + +# Here we take a bunch of qunks and convert them into printed +# summary that will include all the information the user asked for. +sub pretty_file_list { + my $self = shift; + my ($qunksref) = @_; + + my $beauty = ''; # The accumulating header string for this entry. + my %non_unanimous_tags; # Tags found in a proper subset of qunks + my %unanimous_tags; # Tags found in all qunks + my %all_branches; # Branches found in any qunk + my $fbegun = 0; # Did we begin printing filenames yet? + + my ($common_dir, $qunkrefs) = + $self->_pretty_file_list(\(%unanimous_tags, %non_unanimous_tags, %all_branches), + $qunksref); + + my @qunkrefs = @$qunkrefs; + + # If outputting XML, then our task is pretty simple, because we + # don't have to detect common dir, common tags, branch prefixing, + # etc. We just output exactly what we have, and don't worry about + # redundancy or readability. + + foreach my $qunkref (@qunkrefs) + { + my $filename = $qunkref->filename; + my $state = $qunkref->state; + my $revision = $qunkref->revision; + my $tags = $qunkref->tags; + my $branch = $qunkref->branch; + my $branchroots = $qunkref->roots; + + $filename = $self->escape($filename); # probably paranoia + $revision = $self->escape($revision); # definitely paranoia + + $beauty .= "\n"; + $beauty .= "${filename}\n"; + $beauty .= "${state}\n"; + $beauty .= "${revision}\n"; + if ($branch) { + $branch = $self->escape($branch); # more paranoia + $beauty .= "${branch}\n"; + } + foreach my $tag (@$tags) { + $tag = $self->escape($tag); # by now you're used to the paranoia + $beauty .= "${tag}\n"; + } + foreach my $root (@$branchroots) { + $root = $self->escape($root); # which is good, because it will continue + $beauty .= "${root}\n"; + } + $beauty .= "\n"; + } + + # Theoretically, we could go home now. But as long as we're here, + # let's print out the common_dir and utags, as a convenience to + # the receiver (after all, earlier code calculated that stuff + # anyway, so we might as well take advantage of it). + + if ((scalar (keys (%unanimous_tags))) > 1) { + foreach my $utag ((keys (%unanimous_tags))) { + $utag = $self->escape($utag); # the usual paranoia + $beauty .= "${utag}\n"; + } + } + if ($common_dir) { + $common_dir = $self->escape($common_dir); + $beauty .= "${common_dir}\n"; + } + + # That's enough for XML, time to go home: + return $beauty; +} + +# ------------------------------------- + +sub output_tagdate { + # NOT YET DONE +} + +# ------------------------------------- + +sub output_entry { + my $self = shift; + my ($fh, $entry) = @_; + print $fh "\n$entry\n\n"; +} + +# ------------------------------------- + +sub format_body { + my $self = shift; + my ($msg, $files, $qunklist) = @_; + + $msg = $self->preprocess_msg_text($msg); + return $files . $msg; +} + +# ---------------------------------------------------------------------------- + +package CVS::Utils::ChangeLog::EntrySet::Output; + +use Carp qw( croak ); +use File::Basename qw( fileparse ); + +# Class Utility Functions ------------- + +{ # form closure + +my @weekdays = (qw(Sunday Monday Tuesday Wednesday Thursday Friday Saturday)); +sub weekday_en { + my $class = shift; + return $weekdays[$_[0]]; +} + +} + +# Abstract Subrs ---------------------- + +sub wday { croak "Whoops. Abtract method call (wday).\n" } +sub pretty_file_list { croak "Whoops. Abtract method call (pretty_file_list).\n" } +sub output_tagdate { croak "Whoops. Abtract method call (output_tagdate).\n" } +sub header_line { croak "Whoops. Abtract method call (header_line).\n" } + +# Instance Subrs ---------------------- + +sub output_header { } + +# ------------------------------------- + +sub output_entry { + my $self = shift; + my ($fh, $entry) = @_; + print $fh "$entry\n"; +} + +# ------------------------------------- + +sub output_footer { } + +# ------------------------------------- + +sub escape { return $_[1] } + +# ------------------------------------- + +sub output_changelog { +my $self = shift; my $class = ref $self; + my ($grand_poobah) = @_; + ### Process each ChangeLog + + while (my ($dir,$authorhash) = each %$grand_poobah) + { + &main::debug ("DOING DIR: $dir\n"); + + # Here we twist our hash around, from being + # author => time => message => filelist + # in %$authorhash to + # time => author => message => filelist + # in %changelog. + # + # This is also where we merge entries. The algorithm proceeds + # through the timeline of the changelog with a sliding window of + # $Max_Checkin_Duration seconds; within that window, entries that + # have the same log message are merged. + # + # (To save space, we zap %$authorhash after we've copied + # everything out of it.) + + my %changelog; + while (my ($author,$timehash) = each %$authorhash) + { + my %stamptime; + foreach my $time (sort {$a <=> $b} (keys %$timehash)) + { + my $msghash = $timehash->{$time}; + while (my ($msg,$qunklist) = each %$msghash) + { + my $stamptime = $stamptime{$msg}; + if ((defined $stamptime) + and (($time - $stamptime) < $Max_Checkin_Duration) + and (defined $changelog{$stamptime}{$author}{$msg})) + { + push(@{$changelog{$stamptime}{$author}{$msg}}, $qunklist->files); + } + else { + $changelog{$time}{$author}{$msg} = $qunklist->files; + $stamptime{$msg} = $time; + } + } + } + } + undef (%$authorhash); + + ### Now we can write out the ChangeLog! + + my ($logfile_here, $logfile_bak, $tmpfile); + my $lastdate; + + if (! $Output_To_Stdout) { + $logfile_here = $dir . $Log_File_Name; + $logfile_here =~ s/^\.\/\//\//; # fix any leading ".//" problem + $tmpfile = "${logfile_here}.cvs2cl$$.tmp"; + $logfile_bak = "${logfile_here}.bak"; + + open (LOG_OUT, ">$tmpfile") or die "Unable to open \"$tmpfile\""; + } + else { + open (LOG_OUT, ">-") or die "Unable to open stdout for writing"; + } + + print LOG_OUT $ChangeLog_Header; + + my %tag_date_printed; + + $self->output_header(\*LOG_OUT); + + my @key_list = (); + if($Chronological_Order) { + @key_list = sort {$a <=> $b} (keys %changelog); + } else { + @key_list = sort {$b <=> $a} (keys %changelog); + } + foreach my $time (@key_list) + { + next if ($Delta_Mode && + (($time <= $Delta_StartTime) || + ($time > $Delta_EndTime && $Delta_EndTime))); + + # Set up the date/author line. + # kff todo: do some more XML munging here, on the header + # part of the entry: + my (undef,$min,$hour,$mday,$mon,$year,$wday) + = $UTC_Times ? gmtime($time) : localtime($time); + + $wday = $self->wday($wday); + # XML output includes everything else, we might as well make + # it always include Day Of Week too, for consistency. + my $authorhash = $changelog{$time}; + if ($Show_Tag_Dates) { + my %tags; + while (my ($author,$mesghash) = each %$authorhash) { + while (my ($msg,$qunk) = each %$mesghash) { + foreach my $qunkref2 (@$qunk) { + if (defined ($qunkref2->tags)) { + foreach my $tag (@{$qunkref2->tags}) { + $tags{$tag} = 1; + } + } + } + } + } + # Sort here for determinism to ease testing + foreach my $tag (sort keys %tags) { + if ( ! defined $tag_date_printed{$tag} ) { + $tag_date_printed{$tag} = $time; + $self->output_tagdate(\*LOG_OUT, $time, $tag); + } + } + } + while (my ($author,$mesghash) = each %$authorhash) + { + # If XML, escape in outer loop to avoid compound quoting: + $author = $self->escape($author); + + FOOBIE: + # We sort here to enable predictable ordering for the testing porpoises + for my $msg (sort keys %$mesghash) + { + my $qunklist = $mesghash->{$msg}; + + ## MJP: 19.xii.01 : Exclude @ignore_tags + for my $ignore_tag (keys %ignore_tags) { + next FOOBIE + if grep($_ eq $ignore_tag, map(@{$_->{tags}}, + grep(defined $_->{tags}, + @$qunklist))); + } + ## MJP: 19.xii.01 : End exclude @ignore_tags + + # show only files with tag --show-tag $show_tag + if ( keys %show_tags ) { + next FOOBIE + if !grep(exists $show_tags{$_}, map(@{$_->{tags}}, + grep(defined $_->{tags}, + @$qunklist))); + } + + my $files = $self->pretty_file_list($qunklist); + my $header_line; # date and author + my $wholething; # $header_line + $body + + my $date = $self->fdatetime($time); + $header_line = $self->header_line($time, $author, $lastdate); + $lastdate = $date; + + $Text::Wrap::huge = 'overflow' + if $Text::Wrap::VERSION >= 2001.0130; + # Reshape the body according to user preferences. + my $body = $self->format_body($msg, $files, $qunklist); + + $body =~ s/[ \t]+\n/\n/g; + $wholething = $header_line . $body; + + # One last check: make sure it passes the regexp test, if the + # user asked for that. We have to do it here, so that the + # test can match against information in the header as well + # as in the text of the log message. + + # How annoying to duplicate so much code just because I + # can't figure out a way to evaluate scalars on the trailing + # operator portion of a regular expression. Grrr. + if ($Case_Insensitive) { + unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/oi ) ) { + $self->output_entry(\*LOG_OUT, $wholething); + } + } + else { + unless ( $Regexp_Gate and ( $wholething !~ /$Regexp_Gate/o ) ) { + $self->output_entry(\*LOG_OUT, $wholething); + } + } + } + } + } + + $self->output_footer(\*LOG_OUT); + + close (LOG_OUT); + + if ( ! $Output_To_Stdout ) { + # If accumulating, append old data to new before renaming. But + # don't append the most recent entry, since it's already in the + # new log due to CVS's idiosyncratic interpretation of "log -d". + if ($Cumulative && -f $logfile_here) { + open NEW_LOG, ">>$tmpfile" + or die "trouble appending to $tmpfile ($!)"; + + open OLD_LOG, "<$logfile_here" + or die "trouble reading from $logfile_here ($!)"; + + my $started_first_entry = 0; + my $passed_first_entry = 0; + while () { + if ( ! $passed_first_entry ) { + if ( ( ! $started_first_entry ) + and /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) { + $started_first_entry = 1; + } elsif ( /^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/ ) { + $passed_first_entry = 1; + print NEW_LOG $_; + } + } else { + print NEW_LOG $_; + } + } + + close NEW_LOG; + close OLD_LOG; + } + + if ( -f $logfile_here ) { + rename $logfile_here, $logfile_bak; + } + rename $tmpfile, $logfile_here; + } + } +} + +# ------------------------------------- + +# Don't call this wrap, because with 5.5.3, that clashes with the +# (unconditional :-( ) export of wrap() from Text::Wrap +sub mywrap { + my $self = shift; + my ($indent1, $indent2, @text) = @_; + # If incoming text looks preformatted, don't get clever + my $text = Text::Wrap::wrap($indent1, $indent2, @text); + if ( grep /^\s+/m, @text ) { + return $text; + } + my @lines = split /\n/, $text; + $indent2 =~ s!^((?: {8})+)!"\t" x (length($1)/8)!e; + $lines[0] =~ s/^$indent1\s+/$indent1/; + s/^$indent2\s+/$indent2/ + for @lines[1..$#lines]; + my $newtext = join "\n", @lines; + $newtext .= "\n" + if substr($text, -1) eq "\n"; + return $newtext; +} + +# ------------------------------------- + +sub preprocess_msg_text { + my $self = shift; + my ($text) = @_; + + # Strip out carriage returns (as they probably result from DOSsy editors). + $text =~ s/\r\n/\n/g; + # If it *looks* like two newlines, make it *be* two newlines: + $text =~ s/\n\s*\n/\n\n/g; + + return $text; +} + +# ------------------------------------- + +sub last_line_len { + my $self = shift; + my $files_list = shift; my @lines = split (/\n/, $files_list); my $last_line = pop (@lines); return length ($last_line); } +# ------------------------------------- + # A custom wrap function, sensitive to some common constructs used in # log entries. -sub wrap_log_entry () -{ +sub wrap_log_entry { + my $self = shift; + my $text = shift; # The text to wrap. my $left_pad_str = shift; # String to pad with on the left. @@ -1645,8 +1264,8 @@ sub wrap_log_entry () my $length_remaining = shift; # Amount left on current line. my $max_line_length = shift; # Amount left for a blank line. - my $wrapped_text = ""; # The accumulating wrapped entry. - my $user_indent = ""; # Inherited user_indent from prev line. + my $wrapped_text = ''; # The accumulating wrapped entry. + my $user_indent = ''; # Inherited user_indent from prev line. my $first_time = 1; # First iteration of the loop? my $suppress_line_start_match = 0; # Set to disable line start checks. @@ -1661,7 +1280,7 @@ sub wrap_log_entry () $user_indent = $1; } else { - $user_indent = ""; + $user_indent = ''; } # If it matches any of the line-start regexps, print a newline now... @@ -1704,7 +1323,7 @@ sub wrap_log_entry () if ($this_len == 0) { # Blank lines should cancel any user_indent level. - $user_indent = ""; + $user_indent = ''; $length_remaining = $max_line_length; } elsif ($this_len >= $length_remaining) # Line too long, try breaking it. @@ -1816,17 +1435,359 @@ sub wrap_log_entry () return $wrapped_text; } -sub xml_escape () -{ - my $txt = shift; - $txt =~ s/&/&/g; - $txt =~ s//>/g; - return $txt; +# ------------------------------------- + +sub _pretty_file_list { + my $self = shift; + + my ($unanimous_tags, $non_unanimous_tags, $all_branches, $qunksref) = @_; + + my @qunkrefs = + grep +( ( ! $_->tags_exists + or + ! grep exists $ignore_tags{$_}, @{$_->tags}) + and + ( ! keys %show_tags + or + ( $_->tags_exists + and + grep exists $show_tags{$_}, @{$_->tags} ) + ) + ), + @$qunksref; + + my $common_dir; # Dir prefix common to all files ('' if none) + + # First, loop over the qunks gathering all the tag/branch names. + # We'll put them all in non_unanimous_tags, and take out the + # unanimous ones later. + QUNKREF: + foreach my $qunkref (@qunkrefs) + { + # Keep track of whether all the files in this commit were in the + # same directory, and memorize it if so. We can make the output a + # little more compact by mentioning the directory only once. + if ($Common_Dir && (scalar (@qunkrefs)) > 1) + { + if (! (defined ($common_dir))) + { + my ($base, $dir); + ($base, $dir, undef) = fileparse ($qunkref->filename); + + if ((! (defined ($dir))) # this first case is sheer paranoia + or ($dir eq '') + or ($dir eq "./") + or ($dir eq ".\\")) + { + $common_dir = ''; + } + else + { + $common_dir = $dir; + } + } + elsif ($common_dir ne '') + { + # Already have a common dir prefix, so how much of it can we preserve? + $common_dir = &main::common_path_prefix ($qunkref->filename, $common_dir); + } + } + else # only one file in this entry anyway, so common dir not an issue + { + $common_dir = ''; + } + + if (defined ($qunkref->branch)) { + $all_branches->{$qunkref->branch} = 1; + } + if (defined ($qunkref->tags)) { + foreach my $tag (@{$qunkref->tags}) { + $non_unanimous_tags->{$tag} = 1; + } + } + } + + # Any tag held by all qunks will be printed specially... but only if + # there are multiple qunks in the first place! + if ((scalar (@qunkrefs)) > 1) { + foreach my $tag (keys (%$non_unanimous_tags)) { + my $everyone_has_this_tag = 1; + foreach my $qunkref (@qunkrefs) { + if ((! (defined ($qunkref->tags))) + or (! (grep ($_ eq $tag, @{$qunkref->tags})))) { + $everyone_has_this_tag = 0; + } + } + if ($everyone_has_this_tag) { + $unanimous_tags->{$tag} = 1; + delete $non_unanimous_tags->{$tag}; + } + } + } + + return $common_dir, \@qunkrefs; } -sub maybe_read_user_map_file () -{ +# ------------------------------------- + +sub fdatetime { + my $self = shift; + + my ($year, $mday, $mon, $wday, $hour, $min); + + if ( @_ > 1 ) { + ($year, $mday, $mon, $wday, $hour, $min) = @_; + } else { + my ($time) = @_; + (undef, $min, $hour, $mday, $mon, $year, $wday) = + $UTC_Times ? gmtime($time) : localtime($time); + + $year += 1900; + $mon += 1; + $wday = $self->wday($wday); + } + + my $fdate = $self->fdate($year, $mon, $mday, $wday); + + if ($Show_Times) { + my $ftime = $self->ftime($hour, $min); + return "$fdate $ftime"; + } else { + return $fdate; + } +} + +# ------------------------------------- + +sub fdate { + my $self = shift; + + my ($year, $mday, $mon, $wday); + + if ( @_ > 1 ) { + ($year, $mon, $mday, $wday) = @_; + } else { + my ($time) = @_; + (undef, undef, undef, $mday, $mon, $year, $wday) = + $UTC_Times ? gmtime($time) : localtime($time); + + $year += 1900; + $mon += 1; + $wday = $self->wday($wday); + } + + return sprintf '%4u-%02u-%02u%s', $year, $mon, $mday, $wday; +} + +# ------------------------------------- + +sub ftime { + my $self = shift; + + my ($hour, $min); + + if ( @_ > 1 ) { + ($hour, $min) = @_; + } else { + my ($time) = @_; + (undef, $min, $hour) = $UTC_Times ? gmtime($time) : localtime($time); + } + + return sprintf '%02u:%02u', $hour, $min; +} + +# ---------------------------------------------------------------------------- + +package CVS::Utils::ChangeLog::Message; + +sub new { + my $class = shift; + my ($msg) = @_; + + my %self = (msg => $msg, files => []); + + bless \%self, $class; +} + +sub add_fileentry { + my $self = shift; + my ($fileentry) = @_; + + die "Not a fileentry: $fileentry" + unless $fileentry->isa('CVS::Utils::ChangeLog::FileEntry'); + + push @{$self->{files}}, $fileentry; +} + +sub files { wantarray ? @{$_[0]->{files}} : $_[0]->{files} } + +# ---------------------------------------------------------------------------- + +package CVS::Utils::ChangeLog::FileEntry; + +# Each revision of a file has a little data structure (a `qunk') +# associated with it. That data structure holds not only the +# file's name, but any additional information about the file +# that might be needed in the output, such as the revision +# number, tags, branches, etc. The reason to have these things +# arranged in a data structure, instead of just appending them +# textually to the file's name, is that we may want to do a +# little rearranging later as we write the output. For example, +# all the files on a given tag/branch will go together, followed +# by the tag in parentheses (so trunk or otherwise non-tagged +# files would go at the end of the file list for a given log +# message). This rearrangement is a lot easier to do if we +# don't have to reparse the text. +# +# A qunk looks like this: +# +# { +# filename => "hello.c", +# revision => "1.4.3.2", +# time => a timegm() return value (moment of commit) +# tags => [ "tag1", "tag2", ... ], +# branch => "branchname" # There should be only one, right? +# roots => [ "branchtag1", "branchtag2", ... ] +# } + +# Single top-level ChangeLog, or one per subdirectory? +my $distributed; +sub distributed { $#_ ? ($distributed = $_[1]) : $distributed; } + +sub new { + my $class = shift; + my ($path, $time, $revision, $state, $lines, + $branch_names, $branch_roots, $symbolic_names) = @_; + + my %self = (time => $time, + revision => $revision, + state => $state, + lines => $lines, + ); + + if ( $distributed ) { + @self{qw(filename dir_key)} = fileparse($path); + } else { + @self{qw(filename dir_key)} = ($path, './'); + } + + # Grab the branch, even though we may or may not need it: + (my ($branch_prefix) = ($revision =~ /((?:\d+\.)+)\d+/)); + $branch_prefix =~ s/\.$//; + $self{branch} = $branch_names->{$branch_prefix} + if $branch_names->{$branch_prefix}; + + # If there's anything in the @branch_roots array, then this + # revision is the root of at least one branch. We'll display + # them as branch names instead of revision numbers, the + # substitution for which is done directly in the array: + $self{'roots'} = [ map { $branch_names->{$_} } @$branch_roots ] + if @$branch_roots; + + if ( exists $symbolic_names->{$revision} ) { + $self{tags} = delete $symbolic_names->{$revision}; + &main::delta_check($time, $self{tags}); + } + + bless \%self, $class; +} + +sub filename { $_[0]->{filename} } +sub dir_key { $_[0]->{dir_key} } +sub revision { $_[0]->{revision} } +sub branch { $_[0]->{branch} } +sub state { $_[0]->{state} } +sub lines { $_[0]->{lines} } +sub roots { $_[0]->{roots} } + +sub tags { $_[0]->{tags} } +sub tags_exists { + exists $_[0]->{tags}; +} + +# This may someday be used in a more sophisticated calculation of what other +# files are involved in this commit. For now, we don't use it much except for +# delta mode, because the common-commit-detection algorithm is hypothesized to +# be "good enough" as it stands. +sub time { $_[0]->{time} } + +package main; + +# Subrs ---------------------------------------------------------------------- + +sub delta_check { + my ($time, $tags) = @_; + + # If we're in 'delta' mode, update the latest observed times for the + # beginning and ending tags, and when we get around to printing output, we + # will simply restrict ourselves to that timeframe... + return + unless $Delta_Mode; + + $Delta_StartTime = $time + if $time > $Delta_StartTime and grep { $_ eq $Delta_From } @$tags; + + $Delta_EndTime = $time + if $time > $Delta_EndTime and grep { $_ eq $Delta_To } @$tags; +} + +sub run_ext { + my ($cmd) = @_; + $cmd = [$cmd] + unless ref $cmd; + local $" = ' '; + my $out = qx"@$cmd 2>&1"; + my $rv = $?; + my ($sig, $core, $exit) = ($? & 127, $? & 128, $? >> 8); + return $out, $exit, $sig, $core; +} + +# ------------------------------------- + +# If accumulating, grab the boundary date from pre-existing ChangeLog. +sub maybe_grab_accumulation_date { + if (! $Cumulative || $Update) { + return ''; + } + + # else + + open (LOG, "$Log_File_Name") + or die ("trouble opening $Log_File_Name for reading ($!)"); + + my $boundary_date; + while () + { + if (/^(\d\d\d\d-\d\d-\d\d\s+\d\d:\d\d)/) + { + $boundary_date = "$1"; + last; + } + } + + close (LOG); + + # convert time from utc to local timezone if the ChangeLog has + # dates/times in utc + if ($UTC_Times && $boundary_date) + { + # convert the utc time to a time value + my ($year,$mon,$mday,$hour,$min) = $boundary_date =~ + m#(\d+)-(\d+)-(\d+)\s+(\d+):(\d+)#; + my $time = timegm(0,$min,$hour,$mday,$mon-1,$year-1900); + # print the timevalue in the local timezone + my ($ignore,$wday); + ($ignore,$min,$hour,$mday,$mon,$year,$wday) = localtime($time); + $boundary_date=sprintf ("%4u-%02u-%02u %02u:%02u", + $year+1900,$mon+1,$mday,$hour,$min); + } + + return $boundary_date; +} + +# ------------------------------------- + +sub maybe_read_user_map_file { my %expansions; my $User_Map_Input; @@ -1921,8 +1882,506 @@ sub maybe_read_user_map_file () return %expansions; } -sub parse_options () -{ +# ------------------------------------- + +sub read_file_path { + my ($line) = @_; + + my $path; + + if ( $line =~ /^Working file: (.*)/ ) { + $path = $1; + } elsif ( defined $RCS_Root + and + $line =~ m|^RCS file: $RCS_Root[/\\](.*),v$| ) { + $path = $1; + $path =~ s!Attic/!!; + } else { + return; + } + + if ( @Ignore_Files ) { + my $base; + ($base, undef, undef) = fileparse($path); + + my $xpath = $Case_Insensitive ? lc($path) : $path; + if ( grep index($path, $_) > -1, @Ignore_Files ) { + return; + } + } + + return $path; +} + +# ------------------------------------- + +sub read_symbolic_name { + my ($line, $branch_names, $branch_numbers, $symbolic_names) = @_; + + # All tag names are listed with whitespace in front in cvs log + # output; so if see non-whitespace, then we're done collecting. + if ( /^\S/ ) { + return 0; + } else { + # we're looking at a tag name, so parse & store it + + # According to the Cederqvist manual, in node "Tags", tag names must start + # with an uppercase or lowercase letter and can contain uppercase and + # lowercase letters, digits, `-', and `_'. However, it's not our place to + # enforce that, so we'll allow anything CVS hands us to be a tag: + my ($tag_name, $tag_rev) = ($line =~ /^\s+([^:]+): ([\d.]+)$/); + + # A branch number either has an odd number of digit sections + # (and hence an even number of dots), or has ".0." as the + # second-to-last digit section. Test for these conditions. + my $real_branch_rev = ''; + if ( $tag_rev =~ /^(\d+\.\d+\.)+\d+$/ # Even number of dots... + and + $tag_rev !~ /^(1\.)+1$/ ) { # ...but not "1.[1.]1" + $real_branch_rev = $tag_rev; + } elsif ($tag_rev =~ /(\d+\.(\d+\.)+)0.(\d+)/) { # Has ".0." + $real_branch_rev = $1 . $3; + } + + # If we got a branch, record its number. + if ( $real_branch_rev ) { + $branch_names->{$real_branch_rev} = $tag_name; + if ( @Follow_Branches ) { + if ( grep $_ eq $tag_name, @Follow_Branches ) { + $branch_numbers->{$tag_name} = $real_branch_rev; + } + } + } else { + # Else it's just a regular (non-branch) tag. + push @{$symbolic_names->{$tag_rev}}, $tag_name; + } + } + + return 1; +} + +# ------------------------------------- + +sub read_revision { + my ($line, $branch_numbers) = @_; + + my ($revision) = ( $line =~ /^revision (\d+\.[\d.]+)/ ); + + return + unless $revision; + + return $revision + unless @Follow_Branches; + + foreach my $branch (@Follow_Branches) { + # Special case for following trunk revisions + return $revision + if $branch =~ /^trunk$/i and $revision =~ /^[0-9]+\.[0-9]+$/; + + if ( my $branch_number = $branch_numbers->{$branch} ) { + # Are we on one of the follow branches or an ancestor of same? + + # If this revision is a prefix of the branch number, or possibly is less + # in the minormost number, OR if this branch number is a prefix of the + # revision, then yes. Otherwise, no. + + # So below, we determine if any of those conditions are met. + + # Trivial case: is this revision on the branch? (Compare this way to + # avoid regexps that screw up Emacs indentation, argh.) + if ( substr($revision, 0, (length($branch_number) + 1)) + eq + ($branch_number . ".") ) { + return $revision; + } elsif ( length($branch_number) > length($revision) + and + $No_Ancestors ) { + # Non-trivial case: check if rev is ancestral to branch + + # r_left still has the trailing "." + my ($r_left, $r_end) = ($revision =~ /^((?:\d+\.)+)(\d+)$/); + + # b_left still has trailing "." + # b_mid has no trailing "." + my ($b_left, $b_mid) = ($branch_number =~ /^((?:\d+\.)+)(\d+)\.\d+$/); + + return $revision + if $r_left eq $b_left and $r_end <= $b_mid; + } + } + } + + # Else we are following branches, but this revision isn't on the + # path. So skip it. + return; +} + +# ------------------------------------- + +{ # Closure over %gecos_warned +my %gecos_warned; +sub read_date_author_and_state { + my ($line, $usermap) = @_; + + my ($time, $author, $state, $lines) = parse_date_author_and_state($line); + + if ( defined($usermap->{$author}) and $usermap->{$author} ) { + $author = $usermap->{$author}; + } elsif ( defined $Domain or $Gecos == 1 ) { + my $email = $author; + $email = $author."@".$Domain + if defined $Domain && $Domain ne ''; + + my $pw = getpwnam($author); + my ($fullname, $office, $workphone, $homephone); + if ( defined $pw ) { + ($fullname, $office, $workphone, $homephone) = + split /\s*,\s*/, $pw->gecos; + } else { + warn "Couldn't find gecos info for author '$author'\n" + unless $gecos_warned{$author}++; + $fullname = ''; + } + for (grep defined, $fullname, $office, $workphone, $homephone) { + s/&/ucfirst(lc($pw->name))/ge; + } + $author = $fullname . " <" . $email . ">" + if $fullname ne ''; + } + + return $time, $author, $state, $lines; +} +} + +# ------------------------------------- + +sub read_branches { + my ($line) = @_; + + if ( $Show_Branches ) { + my $lst = $1; + $lst =~ s/(1\.)+1;|(1\.)+1$//; # ignore the trivial branch 1.1.1 + if ( $lst ) { + return split (/;\s+/, $lst); + } else { + return; + } + } else { + # Ugh. This really bothers me. Suppose we see a log entry + # like this: + # + # ---------------------------- + # revision 1.1 + # date: 1999/10/17 03:07:38; author: jrandom; state: Exp; + # branches: 1.1.2; + # Intended first line of log message begins here. + # ---------------------------- + # + # The question is, how we can tell the difference between that + # log message and a *two*-line log message whose first line is + # + # "branches: 1.1.2;" + # + # See the problem? The output of "cvs log" is inherently + # ambiguous. + # + # For now, we punt: we liberally assume that people don't + # write log messages like that, and just toss a "branches:" + # line if we see it but are not showing branches. I hope no + # one ever loses real log data because of this. + return; + } +} + +# ------------------------------------- + +sub read_changelog { + my ($command) = @_; + + my $grand_poobah = CVS::Utils::ChangeLog::EntrySet->new; + + my $file_full_path; + my $detected_file_separator; + my $author; + my $revision; + my $time; + my $state; + my $lines; + my $msg_txt; + + # We might be expanding usernames + my %usermap = maybe_read_user_map_file; + + # In general, it's probably not very maintainable to use state + # variables like this to tell the loop what it's doing at any given + # moment, but this is only the first one, and if we never have more + # than a few of these, it's okay. + my $collecting_symbolic_names = 0; + my %symbolic_names; # Where tag names get stored. + my %branch_names; # We'll grab branch names while we're at it. + my %branch_numbers; # Save some revisions for @Follow_Branches + my @branch_roots; # For showing which files are branch ancestors. + + if (! $Input_From_Stdin) { + my $Log_Source_Command = join(' ', @$command); + &debug ("(run \"${Log_Source_Command}\")\n"); + open (LOG_SOURCE, "$Log_Source_Command |") + or die "unable to run \"${Log_Source_Command}\""; + } + else { + open (LOG_SOURCE, "-") or die "unable to open stdin for reading"; + } + + binmode LOG_SOURCE; + + XX_Log_Source: + while () { + # Canonicalize line endings + s/\r$//; + + # If on a new file and don't see filename, skip until we find it, and + # when we find it, grab it. + if ( ! defined $file_full_path ) { + $file_full_path = read_file_path($_); + next XX_Log_Source; + } elsif ( /^symbolic names:$/ ) { + # Collect tag names in case we're asked to print them in the output. + $collecting_symbolic_names = 1; + next XX_Log_Source; # There's no more info on this line, so skip to next + } elsif ($collecting_symbolic_names) { + $collecting_symbolic_names = + read_symbolic_name($_, + \(%branch_names, %branch_numbers, %symbolic_names)); + next XX_Log_Source; + } + + # If have file name, but not revision, and see revision, then grab + # it. (We collect unconditionally, even though we may or may not + # ever use it.) + if ( ( ! defined $revision) ) { + $revision = read_revision($_, \%branch_numbers); + # This breaks, because files with no messages don't get to call clear + # and so the file picks up messages from the next file in sequence + # next XX_Log_Source; + } + + # If we don't have a revision right now, we couldn't possibly + # be looking at anything useful. + if (! (defined ($revision))) { + $detected_file_separator = /^$file_separator$/o; + if ($detected_file_separator) { + # No revisions for this file; can happen, e.g. "cvs log -d DATE" + goto XX_Clear; + } + else { + next XX_Log_Source; + } + } + + # If have file name but not date and author, and see date or + # author, then grab them: + unless (defined $time) { + if (/^date: .*/) { + ($time, $author, $state, $lines) = + read_date_author_and_state($_, \%usermap); + } else { + $detected_file_separator = /^$file_separator$/o; + goto XX_Clear + # No revisions for this file; can happen, e.g. "cvs log -d DATE" + if $detected_file_separator; + } + + # If the date/time/author hasn't been found yet, we couldn't + # possibly care about anything we see. So skip: + next XX_Log_Source; + } + + # A "branches: ..." line here indicates that one or more branches + # are rooted at this revision. If we're showing branches, then we + # want to show that fact as well, so we collect all the branches + # that this is the latest ancestor of and store them in + # @branch_roots. Just for reference, the format of the line we're + # seeing at this point is: + # + # branches: 1.5.2; 1.5.4; ...; + # + # Okay, here goes: + if ( /^branches:\s+(.*);$/ ) { + @branch_roots = read_branches($_); + next XX_Log_Source; + } + + # If have file name, time, and author, then we're just grabbing + # log message texts: + $detected_file_separator = /^$file_separator$/o; + if ($detected_file_separator && ! (defined $revision)) { + # No revisions for this file; can happen, e.g. "cvs log -d DATE" + goto XX_Clear; + } + unless ($detected_file_separator || /^$logmsg_separator$/o) + { + $msg_txt .= $_; # Normally, just accumulate the message... + next XX_Log_Source; + } + # ... until a msg separator is encountered: + # Ensure the message contains something: + if ((! $msg_txt) + || ($msg_txt =~ /^\s*\.\s*$|^\s*$/) + || ($msg_txt =~ /\*\*\* empty log message \*\*\*/)) + { + if ($Prune_Empty_Msgs) { + goto XX_Clear; + } + # else + $msg_txt = "[no log message]\n"; + } + + ### Store it all in the Grand Poobah: + { + my $qunk = CVS::Utils::ChangeLog::FileEntry->new($file_full_path, $time, $revision, + $state, $lines, + \%branch_names, \@branch_roots, + \%symbolic_names); + + # We might be including revision numbers and/or tags and/or + # branch names in the output. Most of the code from here to + # loop-end deals with organizing these in qunk. + + unless ( $Hide_Branch_Additions + and + $msg_txt =~ /file .+ was initially added on branch \S+./ ) { + # Add this file to the list + # (We use many spoonfuls of autovivication magic. Hashes and arrays + # will spring into existence if they aren't there already.) + + &debug ("(pushing log msg for ". $qunk->dir_key . $qunk->filename . ")\n"); + + # Store with the files in this commit. Later we'll loop through + # again, making sure that revisions with the same log message + # and nearby commit times are grouped together as one commit. + $grand_poobah->{$qunk->dir_key}{$author}{$time}{$msg_txt} = + CVS::Utils::ChangeLog::Message->new($msg_txt) + unless exists $grand_poobah->{$qunk->dir_key}{$author}{$time}{$msg_txt}; + $grand_poobah->{$qunk->dir_key}{$author}{$time}{$msg_txt}->add_fileentry($qunk); + } + } + + XX_Clear: + # Make way for the next message + undef $msg_txt; + undef $time; + undef $revision; + undef $author; + undef @branch_roots; + + # Maybe even make way for the next file: + if ($detected_file_separator) { + undef $file_full_path; + undef %branch_names; + undef %branch_numbers; + undef %symbolic_names; + } + } + + close LOG_SOURCE + or die sprintf("Problem reading log input (exit/signal/core: %d/%d/%d)\n", + $? >> 8, $? & 127, $? & 128); + + return $grand_poobah; +} + +# ------------------------------------- + +# Fills up a ChangeLog structure in the current directory. +sub derive_changelog { + my ($command) = @_; + + # See "The Plan" above for a full explanation. + + # Might be adding to an existing ChangeLog + my $accumulation_date = maybe_grab_accumulation_date; + if ($accumulation_date) { + # Insert -d immediately after 'cvs log' + my $Log_Date_Command = "-d\'>${accumulation_date}\'"; + + my ($log_index) = grep $command->[$_] eq 'log', 0..$#$command; + splice @$command, $log_index+1, 0, $Log_Date_Command; + &debug ("(adding log msg starting from $accumulation_date)\n"); + } + +# output_changelog(read_changelog($command)); + read_changelog($command)->output_changelog; +} + +# ------------------------------------- + +sub parse_date_author_and_state { + # Parses the date/time and author out of a line like: + # + # date: 1999/02/19 23:29:05; author: apharris; state: Exp; + + my $line = shift; + + my ($year, $mon, $mday, $hours, $min, $secs, $author, $state, $rest) = + $line =~ + m#(\d+)/(\d+)/(\d+)\s+(\d+):(\d+):(\d+);\s+author:\s+([^;]+);\s+state:\s+([^;]+);(.*)# + or die "Couldn't parse date ``$line''"; + die "Bad date or Y2K issues" unless ($year > 1969 and $year < 2258); + # Kinda arbitrary, but useful as a sanity check + my $time = timegm($secs,$min,$hours,$mday,$mon-1,$year-1900); + my $lines; + if ( $rest =~ m#\s+lines:\s+(.*)# ) + { + $lines =$1; + } + return ($time, $author, $state, $lines); +} + +# ------------------------------------- + +sub min { $_[0] < $_[1] ? $_[0] : $_[1] } + +# ------------------------------------- + +sub common_path_prefix { + my ($path1, $path2) = @_; + + # For compatibility (with older versions of cvs2cl.pl), we think in UN*X + # terms, and mould windoze filenames to match. Is this really appropriate? + # If a file is checked in under UN*X, and cvs log run on windoze, which way + # do the path separators slope? Can we use fileparse as per the local + # conventions? If so, we should probably have a user option to specify an + # OS to emulate to handle stdin-fed logs. If we did this, we could avoid + # the nasty \-/ transmogrification below. + + my ($dir1, $dir2) = map +(fileparse($_))[1], $path1, $path2; + + # Transmogrify Windows filenames to look like Unix. + # (It is far more likely that someone is running cvs2cl.pl under + # Windows than that they would genuinely have backslashes in their + # filenames.) + tr!\\!/! + for $dir1, $dir2; + + my ($accum1, $accum2, $last_common_prefix) = ('') x 3; + + my @path1 = grep length($_), split qr!/!, $dir1; + my @path2 = grep length($_), split qr!/!, $dir2; + + my @common_path; + for (0..min($#path1,$#path2)) { + if ( $path1[$_] eq $path2[$_]) { + push @common_path, $path1[$_]; + } else { + last; + } + } + + return join '', map "$_/", @common_path; +} + +# ------------------------------------- +sub parse_options { # Check this internally before setting the global variable. my $output_file; @@ -1930,209 +2389,150 @@ sub parse_options () # the end of this subroutine. my $exit_with_admonishment = 0; + # command to generate the log + my @log_source_command = qw( cvs log ); + my (@Global_Opts, @Local_Opts); - while (my $arg = shift (@ARGV)) - { - if ($arg =~ /^-h$|^-help$|^--help$|^--usage$|^-?$/) { - $Print_Usage = 1; - } - elsif ($arg =~ /^--delta$/) { - my $narg = shift(@ARGV) || die "$arg needs argument.\n"; - if ($narg =~ /^([A-Za-z][A-Za-z0-9_\-]*):([A-Za-z][A-Za-z0-9_\-]*)$/) { - $Delta_From = $1; - $Delta_To = $2; - $Delta_Mode = 1; - } else { - die "--delta FROM_TAG:TO_TAG is what you meant to say.\n"; - } - } - elsif ($arg =~ /^--debug$/) { # unadvertised option, heh - $Debug = 1; - } - elsif ($arg =~ /^--version$/) { - $Print_Version = 1; - } - elsif ($arg =~ /^-g$|^--global-opts$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - # Don't assume CVS is called "cvs" on the user's system: - push @Global_Opts, $narg; - $Log_Source_Command =~ s/(^\S*)/$1 $narg/; - } - elsif ($arg =~ /^-l$|^--log-opts$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - push @Local_Opts, $narg; - $Log_Source_Command .= " $narg"; - } - elsif ($arg =~ /^-f$|^--file$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - $output_file = $narg; - } - elsif ($arg =~ /^--accum$/) { - $Cumulative = 1; - } - elsif ($arg =~ /^--update$/) { - $Update = 1; - } - elsif ($arg =~ /^--fsf$/) { - $FSF_Style = 1; - } - elsif ($arg =~ /^--FSF$/) { - $Show_Times = 0; - $Common_Dir = 0; - } - elsif ($arg =~ /^--rcs/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - $RCS_Root = $narg; - $RCS_Mode = 1; - } - elsif ($arg =~ /^-U$|^--usermap$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - $User_Map_File = $narg; - } - elsif ($arg =~ /^--gecos$/) { - $Gecos = 1; - } - elsif ($arg =~ /^--domain$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - $Domain = $narg; - } - elsif ($arg =~ /^--passwd$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - $User_Passwd_File = $narg; - } - elsif ($arg =~ /^--mailname$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - warn "--mailname is deprecated; please use --domain instead\n"; - $Domain = $narg; - } - elsif ($arg =~ /^-W$|^--window$/) { - defined(my $narg = shift (@ARGV)) || die "$arg needs argument.\n"; - $Max_Checkin_Duration = $narg; - } - elsif ($arg =~ /^--chrono$/) { - $Chronological_Order = 1; - } - elsif ($arg =~ /^-I$|^--ignore$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - push (@Ignore_Files, $narg); - } - elsif ($arg =~ /^-C$|^--case-insensitive$/) { - $Case_Insensitive = 1; - } - elsif ($arg =~ /^-R$|^--regexp$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - $Regexp_Gate = $narg; - } - elsif ($arg =~ /^--stdout$/) { - $Output_To_Stdout = 1; - } - elsif ($arg =~ /^--version$/) { - $Print_Version = 1; - } - elsif ($arg =~ /^-d$|^--distributed$/) { - $Distributed = 1; - } - elsif ($arg =~ /^-P$|^--prune$/) { - $Prune_Empty_Msgs = 1; - } - elsif ($arg =~ /^-S$|^--separate-header$/) { - $After_Header = "\n\n"; - } - elsif ($arg =~ /^--no-wrap$/) { - $No_Wrap = 1; - } - elsif ($arg =~ /^--summary$/) { - $Summary = 1; - $After_Header = "\n\n"; # Summary implies --separate-header - } - elsif ($arg =~ /^--gmt$|^--utc$/) { - $UTC_Times = 1; - } - elsif ($arg =~ /^-w$|^--day-of-week$/) { - $Show_Day_Of_Week = 1; - } - elsif ($arg =~ /^--no-times$/) { - $Show_Times = 0; - } - elsif ($arg =~ /^-r$|^--revisions$/) { - $Show_Revisions = 1; - } - elsif ($arg =~ /^--show-dead$/) { - $Show_Dead = 1; - } - elsif ($arg =~ /^--no-hide-branch-additions$/) { - $Hide_Branch_Additions = 0; - } - elsif ($arg =~ /^-t$|^--tags$/) { - $Show_Tags = 1; - } - elsif ($arg =~ /^-T$|^--tagdates$/) { - $Show_Tag_Dates = 1; - } - elsif ($arg =~ /^-b$|^--branches$/) { - $Show_Branches = 1; - } - elsif ($arg =~ /^-F$|^--follow$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - push (@Follow_Branches, $narg); - } - elsif ($arg =~ /^--stdin$/) { - $Input_From_Stdin = 1; - } - elsif ($arg =~ /^--header$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - $ChangeLog_Header = &slurp_file ($narg); - if (! defined ($ChangeLog_Header)) { - $ChangeLog_Header = ""; - } - } - elsif ($arg =~ /^--xml-encoding$/) { - my $narg = shift (@ARGV) || die "$arg needs argument.\n"; - $XML_Encoding = $narg ; - } - elsif ($arg =~ /^--xml$/) { - $XML_Output = 1; - } - elsif ($arg =~ /^--noxmlns$/) { - $No_XML_Namespace = 1; - } - elsif ($arg =~ /^--hide-filenames$/) { - $Hide_Filenames = 1; - $After_Header = ""; - } - elsif ($arg =~ /^--no-common-dir$/) { - $Common_Dir = 0; - } - elsif ($arg =~ /^--ignore-tag$/ ) { - die "$arg needs argument.\n" - unless @ARGV; - $ignore_tags{shift @ARGV} = 1; - } - elsif ($arg =~ /^--show-tag$/ ) { - die "$arg needs argument.\n" - unless @ARGV; - $show_tags{shift @ARGV} = 1; - } - elsif ( lc ($arg) eq '--test-code' ) { - # Deliberately undocumented. This is not a public interface, - # and may change/disappear at any time. - die "$arg needs argument.\n" - unless @ARGV; - $TestCode = shift @ARGV; - } - elsif ($arg =~ /^--no-ancestors$/) { - $No_Ancestors = 1; - } - else { - # Just add a filename as argument to the log command - $Log_Source_Command .= " '$arg'"; - } - } + Getopt::Long::Configure(qw( bundling permute no_getopt_compat + pass_through no_ignore_case )); + GetOptions('help|usage|h' => \$Print_Usage, + 'debug' => \$Debug, # unadvertised option, heh + 'version' => \$Print_Version, + + 'file|f=s' => \$output_file, + 'accum' => \$Cumulative, + 'update' => \$Update, + 'fsf' => \$FSF_Style, + 'rcs=s' => \$RCS_Root, + 'usermap|U=s' => \$User_Map_File, + 'gecos' => \$Gecos, + 'domain=s' => \$Domain, + 'passwd=s' => \$User_Passwd_File, + 'window|W=i' => \$Max_Checkin_Duration, + 'chrono' => \$Chronological_Order, + 'ignore|I=s' => \@Ignore_Files, + 'case-insensitive|C' => \$Case_Insensitive, + 'regexp|R=s' => \$Regexp_Gate, + 'stdin' => \$Input_From_Stdin, + 'stdout' => \$Output_To_Stdout, + 'distributed|d' => sub { CVS::Utils::ChangeLog::FileEntry->distributed(1) }, + 'prune|P' => \$Prune_Empty_Msgs, + 'no-wrap' => \$No_Wrap, + 'gmt|utc' => \$UTC_Times, + 'day-of-week|w' => \$Show_Day_Of_Week, + 'revisions|r' => \$Show_Revisions, + 'show-dead' => \$Show_Dead, + 'tags|t' => \$Show_Tags, + 'tagdates|T' => \$Show_Tag_Dates, + 'branches|b' => \$Show_Branches, + 'follow|F=s' => \@Follow_Branches, + 'xml-encoding=s' => \$XML_Encoding, + 'xml' => \$XML_Output, + 'noxmlns' => \$No_XML_Namespace, + 'no-xml-iso-date' => \$No_XML_ISO_Date, + 'no-ancestors' => \$No_Ancestors, + + 'no-indent' => sub { + $Indent = ''; + }, + + 'summary' => sub { + $Summary = 1; + $After_Header = "\n\n"; # Summary implies --separate-header + }, + + 'no-times' => sub { + $Show_Times = 0; + }, + + 'no-hide-branch-additions' => sub { + $Hide_Branch_Additions = 0; + }, + + 'no-common-dir' => sub { + $Common_Dir = 0; + }, + + 'ignore-tag=s' => sub { + $ignore_tags{$_[1]} = 1; + }, + + 'show-tag=s' => sub { + $show_tags{$_[1]} = 1; + }, + + # Deliberately undocumented. This is not a public interface, and + # may change/disappear at any time. + 'test-code=s' => \$TestCode, + + 'delta=s' => sub { + my $arg = $_[1]; + if ( $arg =~ + /^([A-Za-z][A-Za-z0-9_\-]*):([A-Za-z][A-Za-z0-9_\-]*)$/ ) { + $Delta_From = $1; + $Delta_To = $2; + $Delta_Mode = 1; + } else { + die "--delta FROM_TAG:TO_TAG is what you meant to say.\n"; + } + }, + + 'FSF' => sub { + $Show_Times = 0; + $Common_Dir = 0; + $No_Extra_Indent = 1; + $Indent = "\t"; + }, + + 'header=s' => sub { + my $narg = $_[1]; + $ChangeLog_Header = &slurp_file ($narg); + if (! defined ($ChangeLog_Header)) { + $ChangeLog_Header = ''; + } + }, + + 'global-opts|g=s' => sub { + my $narg = $_[1]; + push @Global_Opts, $narg; + splice @log_source_command, 1, 0, $narg; + }, + + 'log-opts|l=s' => sub { + my $narg = $_[1]; + push @Local_Opts, $narg; + push @log_source_command, $narg; + }, + + 'mailname=s' => sub { + my $narg = $_[1]; + warn "--mailname is deprecated; please use --domain instead\n"; + $Domain = $narg; + }, + + 'separate-header|S' => sub { + $After_Header = "\n\n"; + $No_Extra_Indent = 1; + }, + + 'group-within-date' => sub { + $GroupWithinDate = 1; + $Show_Times = 0; + }, + + 'hide-filenames' => sub { + $Hide_Filenames = 1; + $After_Header = ''; + }, + ) + or die "options parsing failed\n"; + + push @log_source_command, map "'$_'", @ARGV; ## Check for contradictions... - if ($Output_To_Stdout && $Distributed) { + if ($Output_To_Stdout && CVS::Utils::ChangeLog::FileEntry->distributed) { print STDERR "cannot pass both --stdout and --distributed\n"; $exit_with_admonishment = 1; } @@ -2157,6 +2557,16 @@ sub parse_options () $exit_with_admonishment = 1; } + # Other consistency checks and option-driven logic + + # Bleargh. Compensate for a deficiency of custom wrapping. + if ( ($After_Header ne " ") and $FSF_Style ) { + $After_Header .= "\t"; + } + + @Ignore_Files = map lc, @Ignore_Files + if $Case_Insensitive; + # Or if any other error message has already been printed out, we # just leave now: if ($exit_with_admonishment) { @@ -2177,10 +2587,13 @@ sub parse_options () if ($output_file) { $Log_File_Name = $output_file; } + + return \@log_source_command; } -sub slurp_file () -{ +# ------------------------------------- + +sub slurp_file { my $filename = shift || die ("no filename passed to slurp_file()"); my $retstr; @@ -2193,237 +2606,481 @@ sub slurp_file () return $retstr; } -sub debug () -{ +# ------------------------------------- + +sub debug { if ($Debug) { my $msg = shift; print STDERR $msg; } } -sub version () -{ +# ------------------------------------- + +sub version { print "cvs2cl.pl version ${VERSION}; distributed under the GNU GPL.\n"; } -sub usage () -{ +# ------------------------------------- + +sub usage { &version (); - print <<'END_OF_INFO'; -Generate GNU-style ChangeLogs in CVS working copies. -Notes about the output format(s): + eval "use Pod::Usage qw( pod2usage )"; - The default output of cvs2cl.pl is designed to be compact, formally - unambiguous, but still easy for humans to read. It is largely - self-explanatory, I hope; the one abbreviation that might not be - obvious is "utags". That stands for "universal tags" -- a - universal tag is one held by all the files in a given change entry. + if ( $@ ) { + print <<'END'; - If you need output that's easy for a program to parse, use the - --xml option. Note that with XML output, just about all available - information is included with each change entry, whether you asked - for it or not, on the theory that your parser can ignore anything - it's not looking for. +* Pod::Usage was not found. The formatting may be suboptimal. Consider + upgrading your Perl --- Pod::Usage is standard from 5.6 onwards, and + versions of perl prior to 5.6 are getting rather rusty, now. Alternatively, + install Pod::Usage direct from CPAN. +END -Notes about the options and arguments (the actual options are listed -last in this usage message): + local $/ = undef; + my $message = ; + $message =~ s/^=(head1|item) //gm; + $message =~ s/^=(over|back).*\n//gm; + $message =~ s/\n{3,}/\n\n/g; + print $message; + } else { + print "\n"; + pod2usage( -exitval => 'NOEXIT', + -verbose => 1, + -output => \*STDOUT, + ); + } - * The -I and -F options may appear multiple times. - - * To follow trunk revisions, use "-F trunk" ("-F TRUNK" also works). - This is okay because no would ever, ever be crazy enough to name a - branch "trunk", right? Right. - - * For the -U option, the UFILE should be formatted like - CVSROOT/users. That is, each line of UFILE looks like this - jrandom:jrandom@red-bean.com - or maybe even like this - jrandom:'Jesse Q. Random ' - Don't forget to quote the portion after the colon if necessary. - - * Many people want to filter by date. To do so, invoke cvs2cl.pl - like this: - cvs2cl.pl -l "-d'DATESPEC'" - where DATESPEC is any date specification valid for "cvs log -d". - (Note that CVS 1.10.7 and below requires there be no space between - -d and its argument). - -Options/Arguments: - - -h, -help, --help, or -? Show this usage and exit - --version Show version and exit - -r, --revisions Show revision numbers in output - -b, --branches Show branch names in revisions when possible - -t, --tags Show tags (symbolic names) in output - -T, --tagdates Show tags in output on their first occurance - --show-dead Show dead files - --stdin Read from stdin, don't run cvs log - --stdout Output to stdout not to ChangeLog - -d, --distributed Put ChangeLogs in subdirs - -f FILE, --file FILE Write to FILE instead of "ChangeLog" - --fsf Use this if log data is in FSF ChangeLog style - --FSF Attempt strict FSF-standard compatible output - -W SECS, --window SECS Window of time within which log entries unify - -U UFILE, --usermap UFILE Expand usernames to email addresses from UFILE - --passwd PASSWORDFILE Use system passwd file for user name expansion. - If no mail domain is provided (via --domain), - it tries to read one from /etc/mailname else - output of - hostname -d / dnsdomainname / domainname. Dies - if none successful. Use a domain of '' to - prevent the addition of a mail domain. - --domain DOMAIN Domain to build email addresses from - --gecos Get user information from GECOS data - -R REGEXP, --regexp REGEXP Include only entries that match REGEXP - -I REGEXP, --ignore REGEXP Ignore files whose names match REGEXP - -C, --case-insensitive Any regexp matching is done case-insensitively - -F BRANCH, --follow BRANCH Show only revisions on or ancestral to BRANCH - --no-ancestors When using -F, only track changes since the - BRANCH started - --no-hide-branch-additions By default, entries generated by cvs for a file - added on a branch (a dead 1.1 entry) are not - shown. This flag reverses that action. - -S, --separate-header Blank line between each header and log message - --summary Add CVS change summary information - --no-wrap Don't auto-wrap log message (recommend -S also) - --gmt, --utc Show times in GMT/UTC instead of local time - --accum Add to an existing ChangeLog (incompat w/ --xml) - -w, --day-of-week Show day of week - --no-times Don't show times in output - --chrono Output log in chronological order - (default is reverse chronological order) - --header FILE Get ChangeLog header from FILE ("-" means stdin) - --xml Output XML instead of ChangeLog format - --xml-encoding ENCODING Insert encoding clause in XML header - --noxmlns Don't include xmlns= attribute in root element - --hide-filenames Don't show filenames (ignored for XML output) - --no-common-dir Don't shorten directory names from filenames. - --rcs CVSROOT Handle filenames from raw RCS, for instance - those produced by "cvs rlog" output, stripping - the prefix CVSROOT. - -P, --prune Don't show empty log messages - --ignore-tag TAG Ignore individual changes that are associated - with a given tag. May be repeated, if so, - changes that are associated with any of the - given tags are ignored. - --show-tag TAG Log only individual changes that are associated - with a given tag. May be repeated, if so, - changes that are associated with any of the - given tags are logged. - --delta FROM_TAG:TO_TAG Attempt a delta between two tags (since FROM_TAG - up to & including TO_TAG). The algorithm is a - simple date-based one (this is a *hard* problem) - so results are imperfect - -g OPTS, --global-opts OPTS Invoke like this "cvs OPTS log ..." - -l OPTS, --log-opts OPTS Invoke like this "cvs ... log OPTS" - FILE1 [FILE2 ...] Show only log information for the named FILE(s) - -See http://www.red-bean.com/cvs2cl for maintenance and bug info. -END_OF_INFO + return; } -__END__ +# Main ----------------------------------------------------------------------- + +my $log_source_command = parse_options; +if ( defined $TestCode ) { + eval $TestCode; + die "Eval failed: '$@'\n" + if $@; +} else { + derive_changelog($log_source_command); +} + +__DATA__ =head1 NAME -cvs2cl.pl - produces GNU-style ChangeLogs in CVS working copies, by - running "cvs log" and parsing the output. Shared log entries are - unified in an intuitive way. +cvs2cl.pl - convert cvs log messages to changelogs + +=head1 SYNOPSIS + +B [I] [I [I ...]] =head1 DESCRIPTION -This script generates GNU-style ChangeLog files from CVS log -information. Basic usage: just run it inside a working copy and a -ChangeLog will appear. It requires repository access (i.e., 'cvs log' -must work). Run "cvs2cl.pl --help" to see more advanced options. +cvs2cl produces a GNU-style ChangeLog for CVS-controlled sources by +running "cvs log" and parsing the output. Duplicate log messages get +unified in the Right Way. -See http://www.red-bean.com/cvs2cl for updates, and for instructions -on getting anonymous CVS access to this script. +The default output of cvs2cl is designed to be compact, formally unambiguous, +but still easy for humans to read. It should be largely self-explanatory; the +one abbreviation that might not be obvious is "utags". That stands for +"universal tags" -- a universal tag is one held by all the files in a given +change entry. -Maintainer: Karl Fogel -Please report bugs to . +If you need output that's easy for a program to parse, use the B<--xml> option. +Note that with XML output, just about all available information is included +with each change entry, whether you asked for it or not, on the theory that +your parser can ignore anything it's not looking for. -=head1 README +If filenames are given as arguments cvs2cl only shows log information for the +named files. -This script generates GNU-style ChangeLog files from CVS log -information. Basic usage: just run it inside a working copy and a -ChangeLog will appear. It requires repository access (i.e., 'cvs log' -must work). Run "cvs2cl.pl --help" to see more advanced options. +=head1 OPTIONS -See http://www.red-bean.com/cvs2cl for updates, and for instructions -on getting anonymous CVS access to this script. +=over 4 -Maintainer: Karl Fogel -Please report bugs to . +=item B<-h>, B<-help>, B<--help>, B<-?> + +Show a short help and exit. + +=item B<--version> + +Show version and exit. + +=item B<-r>, B<--revisions> + +Show revision numbers in output. + +=item B<-b>, B<--branches> + +Show branch names in revisions when possible. + +=item B<-t>, B<--tags> + +Show tags (symbolic names) in output. + +=item B<-T>, B<--tagdates> + +Show tags in output on their first occurance. + +=item B<--show-dead> + +Show dead files. + +=item B<--stdin> + +Read from stdin, don't run cvs log. + +=item B<--stdout> + +Output to stdout not to ChangeLog. + +=item B<-d>, B<--distributed> + +Put ChangeLogs in subdirs. + +=item B<-f> I, B<--file> I + +Write to I instead of ChangeLog. + +=item B<--fsf> + +Use this if log data is in FSF ChangeLog style. + +=item B<--FSF> + +Attempt strict FSF-standard compatible output. + +=item B<-W> I, B<--window> I + +Window of time within which log entries unify. + +=item -B I, B<--usermap> I + +Expand usernames to email addresses from I. + +=item B<--passwd> I + +Use system passwd file for user name expansion. If no mail domain is provided +(via B<--domain>), it tries to read one from B, output of B, B, or B. cvs2cl exits with an error if none of +those options is successful. Use a domain of '' to prevent the addition of a +mail domain. + +=item B<--domain> I + +Domain to build email addresses from. + +=item B<--gecos> + +Get user information from GECOS data. + +=item B<-R> I, B<--regexp> I + +Include only entries that match I. This option may be used multiple +times. + +=item B<-I> I, B<--ignore> I + +Ignore files whose names match I. This option may be used multiple +times. + +=item B<-C>, B<--case-insensitive> + +Any regexp matching is done case-insensitively. + +=item B<-F> I, B<--follow> I + +Show only revisions on or ancestral to I. + +=item B<--no-ancestors> + +When using B<-F>, only track changes since the I started. + +=item B<--no-hide-branch-additions> + +By default, entries generated by cvs for a file added on a branch (a dead 1.1 +entry) are not shown. This flag reverses that action. + +=item B<-S>, B<--separate-header> + +Blank line between each header and log message. + +=item B<--summary> + +Add CVS change summary information. + +=item B<--no-wrap> + +Don't auto-wrap log message (recommend B<-S> also). + +=item B<--no-indent> + +Don't indent log message + +=item B<--gmt>, B<--utc> + +Show times in GMT/UTC instead of local time. + +=item B<--accum> + +Add to an existing ChangeLog (incompatible with B<--xml>). + +=item B<-w>, B<--day-of-week> + +Show day of week. + +=item B<--no-times> + +Don't show times in output. + +=item B<--chrono> + +Output log in chronological order (default is reverse chronological order). + +=item B<--header> I + +Get ChangeLog header from I ("B<->" means stdin). + +=item B<--xml> + +Output XML instead of ChangeLog format. + +=item B<--xml-encoding> I + +Insert encoding clause in XML header. + +=item B<--noxmlns> + +Don't include xmlns= attribute in root element. + +=item B<--hide-filenames> + +Don't show filenames (ignored for XML output). + +=item B<--no-common-dir> + +Don't shorten directory names from filenames. + +=item B<--rcs> I + +Handle filenames from raw RCS, for instance those produced by "cvs rlog" +output, stripping the prefix I. + +=item B<-P>, B<--prune> + +Don't show empty log messages. + +=item B<--ignore-tag> I + +Ignore individual changes that are associated with a given tag. +May be repeated, if so, changes that are associated with any of +the given tags are ignored. + +=item B<--show-tag> I + +Log only individual changes that are associated with a given +tag. May be repeated, if so, changes that are associated with +any of the given tags are logged. + +=item B<--delta> IB<:>I + +Attempt a delta between two tags (since I up to and +including I). The algorithm is a simple date-based one +(this is a hard problem) so results are imperfect. + +=item B<-g> I, B<--global-opts> I + +Pass I to cvs like in "cvs I log ...". + +=item B<-l> I, B<--log-opts> I + +Pass I to cvs log like in "cvs ... log I". + +=back + +Notes about the options and arguments: + +=over 4 + +=item * + +The B<-I> and B<-F> options may appear multiple times. + +=item * + +To follow trunk revisions, use "B<-F trunk>" ("B<-F TRUNK>" also works). This is +okay because no would ever, ever be crazy enough to name a branch "trunk", +right? Right. + +=item * + +For the B<-U> option, the I should be formatted like CVSROOT/users. That is, +each line of I looks like this: + + jrandom:jrandom@red-bean.com + +or maybe even like this + + jrandom:'Jesse Q. Random ' + +Don't forget to quote the portion after the colon if necessary. + +=item * + +Many people want to filter by date. To do so, invoke cvs2cl.pl like this: + + cvs2cl.pl -l "-d'DATESPEC'" + +where DATESPEC is any date specification valid for "cvs log -d". (Note that +CVS 1.10.7 and below requires there be no space between -d and its argument). + +=item * + +Dates/times are interpreted in the local time zone. + +=item * + +Remember to quote the argument to `B<-l>' so that your shell doesn't interpret +spaces as argument separators. + +=item * + +See the 'Common Options' section of the cvs manual ('info cvs' on UNIX-like +systems) for more information. + +=item * + +Note that the rules for quoting under windows shells are different. + +=back + +=head1 EXAMPLES + +Some examples (working on UNIX shells): + + # logs after 6th March, 2003 (inclusive) + cvs2cl.pl -l "-d'>2003-03-06'" + # logs after 4:34PM 6th March, 2003 (inclusive) + cvs2cl.pl -l "-d'>2003-03-06 16:34'" + # logs between 4:46PM 6th March, 2003 (exclusive) and + # 4:34PM 6th March, 2003 (inclusive) + cvs2cl.pl -l "-d'2003-03-06 16:46>2003-03-06 16:34'" + +Some examples (on non-UNIX shells): + + # Reported to work on windows xp/2000 + cvs2cl.pl -l "-d"">2003-10-18;today<""" + +=head1 AUTHORS + +=over 4 + +=item Karl Fogel + +=item Melissa O'Neal + +=item Martyn J. Pearce + +=back + +Contributions from + +=over 4 + +=item Mike Ayers + +=item Tim Bradshaw + +=item Richard Broberg + +=item Nathan Bryant + +=item Oswald Buddenhagen + +=item Arthur de Jong + +=item Mark W. Eichin + +=item Dave Elcock + +=item Reid Ellis + +=item Simon Josefsson + +=item Robin Hugh Johnson + +=item Terry Kane + +=item Akos Kiss + +=item Claus Klein + +=item Eddie Kohler + +=item Richard Laager + +=item Kevin Lilly + +=item Karl-Heinz Marbaise + +=item Mitsuaki Masuhara + +=item Henrik Nordstrom + +=item Joe Orton + +=item Peter Palfrader + +=item Thomas Parmelan + +=item Johanne Stezenbach + +=item Joseph Walton + +=item Ernie Zapata + +=back + +=head1 BUGS + +Please report bugs to C. =head1 PREREQUISITES -This script requires C, C, and -C. -It also seems to require C or higher. +This script requires C, C, and C. It +also seems to require C or higher. -=pod OSNAMES +=head1 OPERATING SYSTEM COMPATIBILITY -any +Should work on any OS. -=pod SCRIPT CATEGORIES +=head1 SCRIPT CATEGORIES Version_Control/CVS -=cut +=head1 COPYRIGHT --*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- -*- +(C) 2001,2002,2003,2004 Martyn J. Pearce , under the GNU GPL. -Note about a bug-slash-opportunity: ------------------------------------ +(C) 1999 Karl Fogel , under the GNU GPL. -There's a bug in Text::Wrap, which affects cvs2cl. This script -reveals it: +cvs2cl.pl is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. - #!/usr/bin/perl -w +cvs2cl.pl is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - use Text::Wrap; +You may have received a copy of the GNU General Public License +along with cvs2cl.pl; see the file COPYING. If not, write to the +Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. - my $test_text = - "This script demonstrates a bug in Text::Wrap. The very long line - following this paragraph will be relocated relative to the surrounding - text: +=head1 SEE ALSO - ==================================================================== - - See? When the bug happens, we'll get the line of equal signs below - this paragraph, even though it should be above."; - - # Print out the test text with no wrapping: - print "$test_text"; - print "\n"; - print "\n"; - - # Now print it out wrapped, and see the bug: - print wrap ("\t", " ", "$test_text"); - print "\n"; - print "\n"; - -If the line of equal signs were one shorter, then the bug doesn't -happen. Interesting. - -Anyway, rather than fix this in Text::Wrap, we might as well write a -new wrap() which has the following much-needed features: - -* initial indentation, like current Text::Wrap() -* subsequent line indentation, like current Text::Wrap() -* user chooses among: force-break long words, leave them alone, or die()? -* preserve existing indentation: chopped chunks from an indented line - are indented by same (like this line, not counting the asterisk!) -* optional list of things to preserve on line starts, default ">" - -Note that the last two are essentially the same concept, so unify in -implementation and give a good interface to controlling them. - -And how about: - -Optionally, when encounter a line pre-indented by same as previous -line, then strip the newline and refill, but indent by the same. -Yeah... +cvs(1)