diff --git a/.gitignore b/.gitignore index 030dc25..63af3c3 100644 --- a/.gitignore +++ b/.gitignore @@ -4,3 +4,9 @@ update*.svn extdata admin-docs/*Bioc2017* .gitignore +admin-docs/presentations/.ipynb_checkpoints/ +admin-docs/presentations/Demo-bioc2017.ipynb +admin-docs/presentations/core-team-transition.ipynb +hooks/repo-specific/test_proj* +hooks/repo-specific/tests/* +hooks/repo-specific/BiocGenerics_test* diff --git a/R/authz_to_conf.R b/R/authz_to_conf.R index 4d109a9..744cb4a 100644 --- a/R/authz_to_conf.R +++ b/R/authz_to_conf.R @@ -13,13 +13,13 @@ fout <- "gitolite-admin/conf/packages.conf" stopifnot(file.exists(fin), file.exists(fin2), !file.exists(fout)) remap_re <- "[']" # re-map userid characters to '_' -group_re <- "^[-[:alnum:]]+ *= (.*)" +group_re <- "^[-.[:alnum:]]+ *= (.*)" software_template <- paste( "repo packages/%s", " RW master = %s", " RW RELEASE_3_5 = %s", - " option hook.pre-receive = disable-large-commits", + " option hook.pre-receive = pre-receive-hook", "", sep="\n" ) @@ -95,14 +95,14 @@ group_merge <- ## bioconductor.authz -repos_re <- "^\\[/trunk/madman/Rpacks/([[:alnum:]]+)]" +repos_re <- "^\\[/trunk/madman/Rpacks/([.[:alnum:]]+)]" reader_id <- "bioconductor-readers" writer_id <- "bioconductor-write0" svn_path <- "/bioconductor/trunk/madman/Rpacks" bioconductor_authz <- process_authz(fin, repos_re, reader_id, writer_id, svn_path) -repos_re <- "^\\[/trunk/experiment/pkgs/([[:alnum:]]+)]" +repos_re <- "^\\[/trunk/experiment/pkgs/([.[:alnum:]]+)]" reader_id <- "bioc-data_readers" writer_id <- "bioc-data-writers" svn_path <- "/bioc-data/trunk/experiment/pkgs" diff --git a/admin-docs/Core_team_transition.Rmd b/admin-docs/Core_team_transition.Rmd new file mode 100644 index 0000000..35f0130 --- /dev/null +++ b/admin-docs/Core_team_transition.Rmd @@ -0,0 +1,66 @@ +--- +title: "Bioconductor Standard Repository Specification" +author: "Nitesh Turaga" +date: "8/15/2017" +output: html_document +--- + +## Maintain on Github + +1. All packages maintained by the Bioconductor core team will be hosted on + Github under the organization account. + +1. Maintainers within the core-team are responsible for keeping their Github and + Bioc-git repo in sync. This allows usage of many Github features, like issues, + pull requests, continuous integration and testing. + +1. Follow the scenario's to keep your package in sync, and push only to `master` + and the most recent release i.e RELEASE_3_5 (as of August 15th 2017). + +## Package Structure + +Every Bioconductor repository being maintained should look like: + +``` +git branch -a +``` + +NOTE: `master` is equivalent to SVN `devel`. Release branches, should be named + `RELEASE_X_Y`, case-sensitive. + +``` +* master + remotes/origin/HEAD -> origin/master + remotes/origin/master + remotes/origin/RELEASE_3_5 + remotes/upstream/RELEASE_2_12 + remotes/upstream/RELEASE_2_13 + remotes/upstream/RELEASE_2_14 + remotes/upstream/RELEASE_3_0 + remotes/upstream/RELEASE_3_1 + remotes/upstream/RELEASE_3_2 + remotes/upstream/RELEASE_3_3 + remotes/upstream/RELEASE_3_4 + remotes/upstream/RELEASE_3_5 + remotes/upstream/master +``` + +Every bioconductor repository should have the remotes configured properly, + +eg: BiocParallel package + +``` +git remote -v +``` + +``` +origin git@github.com:Bioconductor/BiocParallel (fetch) +origin git@github.com:Bioconductor/BiocParallel (push) +upstream git@git.bioconductor.org:packages/BiocParallel (fetch) +upstream git@git.bioconductor.org:packages/BiocParallel (push) +``` + +## Note + +- Avoid `git rebase` if you can help it. It is easier to `git fetch` and + then `git merge`. \ No newline at end of file diff --git a/admin-docs/configure.md b/admin-docs/configure.md index c256290..859649e 100644 --- a/admin-docs/configure.md +++ b/admin-docs/configure.md @@ -7,6 +7,8 @@ - [Smart http](#smarthttp) - [Dumb http](#dumbhttp) - [Push / Pull access via SSH](#ssh) + - [svn 'authz' to gitolite 'conf'](#sshgitolite) + - [SSH locale](#sshlocale) ## Server Specs @@ -200,17 +202,18 @@ out-of-the-box Apache configuration to limit what users can see. user: read, write, execute group: read, execute other: none -- All files under /home/git/repositories should have the following permissions: - user: read, write - group: read - other: none -- Testing: - -- Paste https://git.bioconductor.org/packages/ in a browser and confirm all packages are visible. - -- Download a package with `git clone https://git.bioconductor.org/packages/BiocGenerics.git` +- All files under /home/git/repositories should have the following permissions: + user: read, write + group: read + other: none +- Testing: + -- Paste https://git.bioconductor.org/packages/ in a browser and confirm all packages are visible. + -- Download a package with `git clone https://git.bioconductor.org/packages/BiocGenerics.git` -## Push / pull access via ssh +## Push / pull access via SSH + ### svn 'authz' to gitolite 'conf' The gitolite configuration involves @@ -240,3 +243,28 @@ The gitolite configuration involves neaGUI.git netReg.git pairseqsim.git pgUtils.git prism.git spade.git stam.git virtualArray.git wiggleplotr.git xcmsGUI.git xmapcore.git + +### SSH locale + +When a user ran 'git pull' with a non-C and non-US locale, the remote +server (i.e., git.bioconductor.org) issued a perl warning that +setting the locale failed and the fallback locale ("en_US.UTF-8") would +be used. See this issue for full details: + +https://github.com/Bioconductor/bioc_git_transition/issues/34 + +To prevent this, the git server was modified to prevent clients +from propagating their locale variables via SSH. + +There are 2 SSH config files, one is for clients connecting to the host +(/etc/ssh/ssh_config) and another for the ssh daemon running on the host +(/etc/ssh/sshd_config). + +Modify the config file for the daemon, /etc/ssh/sshd_confg, by commenting out +this line + + AcceptEnv LANG LC_* + +then restart the service + + sudo service sshd restart diff --git a/admin-docs/find_unknowns.sh b/admin-docs/find_unknowns.sh new file mode 100644 index 0000000..bd8fc26 --- /dev/null +++ b/admin-docs/find_unknowns.sh @@ -0,0 +1,9 @@ +for pkg in /home/git/repositories/packages/*git; +do { + cd $pkg + unknown=`git log --all --oneline --committer="unknown"` + latest=`git log --all --since="2017-08-15"` + if [ ! -z "$unknown" ] && [ -z "$latest" ]; then + echo `basename $pkg`; + fi +} done diff --git a/doc/README.md b/doc/README.md new file mode 100644 index 0000000..ff884a3 --- /dev/null +++ b/doc/README.md @@ -0,0 +1,6 @@ +The contents of the documentation are most up to date at + + http://bioconductor.org/developers/how-to/git/ + + + diff --git a/hooks/repo-specific/README.md b/hooks/repo-specific/README.md new file mode 100644 index 0000000..ac9cbc8 --- /dev/null +++ b/hooks/repo-specific/README.md @@ -0,0 +1,58 @@ +Git hooks for Bioconductor +========================= + +This document describes the hooks on the Bioconductor git server.There +are two types of hooks on the Bioconductor git server, + +1. Pre-receive hooks : These hooks intercept the push from the author + of the package and show an error if their commit does not pass the + "check" the hook performs. + + There are three pre-recieve hooks on the system, + + 1. Prevent Large files: This hook prevents large files from + entering the git repository, where each file can have a max + size of 5MB. + + 1. Prevent bad version numbers: This hook prevents bad version + numbers according to the documentation given in + http://bioconductor.org/developers/how-to/version-numbering/. + + 1. Prevent duplicate commits: This hook checks the last 50 commits + to see if there are any duplicate commits. + +1. Post-receive hooks: This hook takes the commit after it is accepted + into the Bioconductor git server, and processes it for other needs. + + There is currently only one post-receive hook on the system, + + 1. RSS feed: Once a commit is accepted into the system, the + post-receive hook takes the commit information, eg: the + message, the date and the author information, and publishes + it to the GIT log on the Bioconductor website. It also makes + builds the RSS file(xml format) for the feed. + + +The hooks are applied differently to both software and +workflow/data-experiment packages. + +Hooks applied to Software packages: + +* Prevent large files + +* Prevent bad version numbers + +* Prevent duplicate commits + +* RSS feed + +Hooks applied to Workflow/Data-Experiment packages: + +* Prevent bad version numbers + +* Prevent duplicate commits + +* RSS feed + + + diff --git a/hooks/repo-specific/disable-large-commits b/hooks/repo-specific/disable-large-commits deleted file mode 100755 index d752dad..0000000 --- a/hooks/repo-specific/disable-large-commits +++ /dev/null @@ -1,42 +0,0 @@ -#!/usr/bin/env bash - -# -# Pre-receive hook that will block any new commits that contain files larger than 5Mb in size. -# - -GITCMD="/usr/bin/git" -zero_commit="0000000000000000000000000000000000000000" -MAXSIZE="5000000" # 5MB limit on file size - -# Read stdin for ref information -while read oldrev newrev refname; do - # Skip branch deletions - if [ "${newrev}" = "${zero_commit}" ]; then - continue; - fi - - # Set oldrev properly if this is branch creation. - if [ "${oldrev}" = "${zero_commit}" ]; then - oldrev="HEAD" - fi - - # Get list of files to look at using git diff - for file in $($GITCMD diff --stat --name-only --diff-filter=ACMRT ${oldrev}..${newrev}); do - # Get the size of this file - size=$($GITCMD cat-file -s ${newrev}:${file}) - # Check to see if for some reason we didn't get a size - if [ ! -z ${size} ]; then - # Compare filesize to MAXSIZE - if [ "${size}" -gt "${MAXSIZE}" ]; then - # Send output back to the user about oversized files. - echo "Error: ${file} larger than 5Mb. Please see Biocondcutor guidelines" - echo " https://bioconductor.org/developers/package-guidelines/" - # Fail here - exit 1 - fi # End size comparison - fi # End check for empty size - done # End list of files -done # End reading stdin - -# If successful, pass -exit 0 diff --git a/hooks/repo-specific/post-receive-hook b/hooks/repo-specific/post-receive-hook new file mode 100755 index 0000000..e69eb50 --- /dev/null +++ b/hooks/repo-specific/post-receive-hook @@ -0,0 +1,126 @@ +#!/usr/bin/env python + +import fileinput +from rss_feed import rss_feed +from xml.etree.ElementTree import parse, fromstring +import subprocess +import fcntl +import sys +import logging +logging.basicConfig(filename='/tmp/post-recieve.log', level=logging.DEBUG) + +ZERO_COMMIT = "0000000000000000000000000000000000000000" +BASE_PATH = "/home/git/rss/" + + +def indent(elem, level=0): + i = "\n" + level*" " + if len(elem): + if not elem.text or not elem.text.strip(): + elem.text = i + " " + if not elem.tail or not elem.tail.strip(): + elem.tail = i + for elem in elem: + indent(elem, level+1) + if not elem.tail or not elem.tail.strip(): + elem.tail = i + else: + if level and (not elem.tail or not elem.tail.strip()): + elem.tail = i + + +def write_and_limit_feed(entry_list, length, feed): + doc = parse(feed) + root = doc.getroot() + + # Get items + channel_root = root.find("channel") + items = channel_root.findall("item") + # Write feed + for entry in entry_list: + # 5 is the entry position in the feed + channel_root.insert(5, entry) + # Remove extra elements + if len(items) > length: + extra_items = items[length:] + for extra_item in extra_items: + channel_root.remove(extra_item) + indent(channel_root) + feed.seek(0) + feed.truncate() + doc.write(feed) + feed.write("\n") + feed.flush() + return feed + + +if False: + fh = "/tmp/gitlog.xml" + feed = open(fh, "r+") + refname = None + revs = subprocess.check_output([ + "git", "log", "-2", "--format=%H" + ]).splitlines() + newrev = revs[0].strip() + oldrev = revs[1].strip() + rss_feed(oldrev, newrev, refname, 5) + sample_entry = """ + + 2309fc133512c4e25d8942c3d0ae6fc198bf9ba9 + https://www.bioconductor.org + + Nitesh + 2017-12-08 17:26:18 + + """ + entry = fromstring(sample_entry) + write_and_limit_feed([entry], 5, fh) + fh.close() + sys.exit(0) + + +if __name__ == "__main__": + # Path to feed.xml + fpath = BASE_PATH + "gitlog.xml" + fpath_release = BASE_PATH + "gitlog.release.xml" + length = 499 + + # Run function for RSS feed + feed = open(fpath, "r+") + feed_release = open(fpath_release, 'r+') + + # Obtain a lock + fcntl.lockf(feed, fcntl.LOCK_EX) + + for line in fileinput.input(): + std_input = line.split(" ") + oldrev, newrev, refname = [item.strip() for item in std_input] + # Check for zero commit, check branch deletions + # also, avoid new package additions + if (oldrev == ZERO_COMMIT or newrev == ZERO_COMMIT): + continue + # Split feed into correct files + try: + if ("RELEASE" in refname): + # RSS-feed post-receive hook + entry = rss_feed(oldrev, newrev, refname, length) + write_and_limit_feed(entry, length, feed_release) + else: + entry = rss_feed(oldrev, newrev, refname, length) + write_and_limit_feed(entry, length, feed) + except Exception as e: + print("Note: failed to update RSS feed; git repository updated successfully.") + logging.error(e) + cmd = ['scp', 'gitlog.xml', 'gitlog.release.xml', + 'biocadmin@staging.bioconductor.org:/home/biocadmin/bioc-test-web/bioconductor.org/assets/developers/rss-feeds/.'] + subprocess.check_call(cmd, cwd=BASE_PATH) + + # Release the lock + fcntl.lockf(feed, fcntl.LOCK_UN) + feed.close() + feed_release.close() + + logging.info("Exit after removing locks") + diff --git a/hooks/repo-specific/pre-receive-hook-dataexp-workflow b/hooks/repo-specific/pre-receive-hook-dataexp-workflow new file mode 100755 index 0000000..6479715 --- /dev/null +++ b/hooks/repo-specific/pre-receive-hook-dataexp-workflow @@ -0,0 +1,99 @@ +#!/usr/bin/env python + +import subprocess +from os import path, getcwd +import fileinput +from prevent_duplicate_commits import prevent_duplicate_commits +from prevent_bad_version_numbers import prevent_bad_version_numbers + +ZERO_COMMIT = "0000000000000000000000000000000000000000" +HOOKS_CONF = "file:///home/git/repositories/admin/hook_maintainer.git" +LOCAL_HOOKS_CONF = "file:////Users/ni41435_ca/Documents/hook_maintainer.git" + + +def get_hooks_conf(): + """This function does a simple 'git archive' clone process of + hooks.conf. + + It clones the file in the /tmp directory. This function ignores + the '#' characters in the file. + + """ + # FIXME: Change LOCAL_HOOKS_CONF to HOOKS_CONF + cmd = "git archive --remote=" + HOOKS_CONF + " HEAD hooks.conf | tar -x" + subprocess.check_output(cmd, shell=True, cwd="/tmp") + if path.exists("/tmp/hooks.conf"): + with open("/tmp/hooks.conf") as f: + txt = f.read() + txt = txt.splitlines() + # Ignore '#' in the file + conf = "\n".join([line for line in txt + if not line.startswith("#")]) + return conf + + +def read_bioc_conf(conf): + """ Read the bioc hooks configuration file. + + This code is run within the 'hooks' folder inside a bare git repo. + + This function reads the hooks.conf file and returns a three tuple + of boolean values, one for each hook if it is toggled False or True. + + Default is (True, True, True) + """ + # Make dictionary with package name as key, values are [list of hooks] + d = {} + res = [pack.strip().split("\n") for pack in conf.split("\n\n")] + for item in res: + d[item[0]] = item[1:] + # Get package name, it works because the script is run inside the package. + package_name = path.basename(getcwd()).replace(".git", "") + package = "Package: " + package_name + # Default values for hooks is (True, True) + hooks_dict = {"pre-receive-hook-version-numbers": True, + "pre-receive-hook-duplicate-commits": True} + # Change values for specific hooks mentioned in hooks.conf + if package in d.keys(): + for hook in d[package]: + [hook, val] = hook.split(": ") + hooks_dict[hook] = (val != "False") + return hooks_dict + + +def apply_hooks(hooks_dict): + """Apply hooks to each package in the category data-experiement or + workflow. + + This function takes in a boolean list of arguments, one for each hook, + 1. prevent_bad_version_numbers, + 2. prevent_duplicate_commits in that order. + + The boolean values toggle True/False to indicate which hook has to be + applied to the package. + """ + newestrev = ZERO_COMMIT + oldestrev = ZERO_COMMIT + for line in fileinput.input(): + std_input = line.split(" ") + oldrev, newrev, refname = [elt.strip() for elt in std_input] + # Check for zero commit, check branch deletions + if newrev == ZERO_COMMIT: + continue + # prevent duplicate commits + if hooks_dict["pre-receive-hook-duplicate-commits"]: # enable hook + prevent_duplicate_commits(oldrev, newrev, refname) + # prevent bad version numbers + if newestrev == ZERO_COMMIT: + newestrev = newrev + oldestrev = oldrev + if (hooks_dict["pre-receive-hook-version-numbers"] and + newestrev != ZERO_COMMIT): + prevent_bad_version_numbers(oldestrev, newestrev, refname) + return + + +if __name__ == "__main__": + conf = get_hooks_conf() + hooks_dict = read_bioc_conf(conf) + apply_hooks(hooks_dict) diff --git a/hooks/repo-specific/pre-receive-hook-software b/hooks/repo-specific/pre-receive-hook-software new file mode 100755 index 0000000..60c4994 --- /dev/null +++ b/hooks/repo-specific/pre-receive-hook-software @@ -0,0 +1,110 @@ +#!/usr/bin/env python + +import subprocess +from os import path, getcwd +import fileinput +from prevent_large_files import prevent_large_files +from prevent_duplicate_commits import prevent_duplicate_commits +from prevent_bad_version_numbers import prevent_bad_version_numbers +from prevent_merge_markers import prevent_merge_markers + +ZERO_COMMIT = "0000000000000000000000000000000000000000" +HOOKS_CONF = "file:///home/git/repositories/admin/hook_maintainer.git" +LOCAL_HOOKS_CONF = "file:////Users/ni41435_ca/Documents/hook_maintainer.git" + + +def get_hooks_conf(): + """This function does a simple 'git archive' clone process of + hooks.conf. + + It clones the file in the /tmp directory. This function ignores + the '#' characters in the file. + + """ + # FIXME: Change LOCAL_HOOKS_CONF to HOOKS_CONF + cmd = "git archive --remote=" + HOOKS_CONF + " HEAD hooks.conf | tar -x" + subprocess.check_output(cmd, shell=True, cwd="/tmp") + if path.exists("/tmp/hooks.conf"): + with open("/tmp/hooks.conf") as f: + txt = f.read() + txt = txt.splitlines() + # Ignore '#' in the file + conf = "\n".join([line for line in txt + if not line.startswith("#")]) + return conf + + +def read_bioc_conf(conf): + """ Read the bioc hooks configuration file. + + This code is run within the 'hooks' folder inside a bare git repo. + + This function reads the hooks.conf file and returns a three tuple + of boolean values, one for each hook if it is toggled False or True. + + Default is (True, True, True, True) + """ + # Make dictionary with package name as key, values are [list of hooks] + d = {} + res = [pack.strip().split("\n") for pack in conf.split("\n\n") ] + for item in res: + d[item[0]] = item[1:] + # Get package name, it works because the script is run inside the package. + package_name = path.basename(getcwd()).replace(".git", "") + package = "Package: " + package_name + # Default values for hooks is (True, True, True) + hooks_dict = {"pre-receive-hook-merge-markers": True, + "pre-receive-hook-large-files": True, + "pre-receive-hook-version-numbers": True, + "pre-receive-hook-duplicate-commits": True} + # Change values for specific hooks mentioned in hooks.conf + if package in d.keys(): + for hook in d[package]: + [hook, val] = hook.split(": ") + hooks_dict[hook] = (val != "False") + return hooks_dict + + +def apply_hooks(hooks_dict): + """Apply hooks to each package. + + This function takes in a boolean list of arguments, one for each hook, + 1. prevent_merge_markers + 2. prevent_large_files + 3. prevent_bad_version_numbers, + 4. prevent_duplicate_commits in that order. + + The boolean values toggle True/False to indicate which hook has to be + applied to the package. + """ + newestrev = ZERO_COMMIT + oldestrev = ZERO_COMMIT + for line in fileinput.input(): + std_input = line.split(" ") + oldrev, newrev, refname = [elt.strip() for elt in std_input] + # Check for zero commit, check branch deletions + if newrev == ZERO_COMMIT: + continue + # prevent merge conflict markers + if hooks_dict["pre-receive-hook-merge-markers"]: # enable hook + prevent_merge_markers(oldrev, newrev, refname) + # prevent large files + if hooks_dict["pre-receive-hook-large-files"]: # enable hook + prevent_large_files(oldrev, newrev, refname) + # prevent duplicate commits + if hooks_dict["pre-receive-hook-duplicate-commits"]: # enable hook + prevent_duplicate_commits(oldrev, newrev, refname) + if newestrev == ZERO_COMMIT: + newestrev = newrev + oldestrev = oldrev + # prevent bad version numbers (enable hook) + if (hooks_dict["pre-receive-hook-version-numbers"] and + newestrev != ZERO_COMMIT): + prevent_bad_version_numbers(oldestrev, newestrev, refname) + return + + +if __name__ == "__main__": + conf = get_hooks_conf() + hooks_dict = read_bioc_conf(conf) + apply_hooks(hooks_dict) diff --git a/hooks/repo-specific/prepare_test.sh b/hooks/repo-specific/prepare_test.sh new file mode 100644 index 0000000..e032d60 --- /dev/null +++ b/hooks/repo-specific/prepare_test.sh @@ -0,0 +1,98 @@ +# INTEGRATION TEST + + +test_repo() { + ## Create bare repo + gittestpath=/tmp/test_bad_version_numbers.git + hooks=/Users/ni41435_ca/Documents/bioc_git_transition/hooks/repo-specific + gittestrepopath=/tmp/test_bad_version_numbers + + ## Clean up + if [ -d "$gittestpath" ]; then + rm -rf $gittestpath + fi + + if [ -d "$gittestrepopath" ]; then + rm -rf $gittestrepopath + fi + + mkdir $gittestpath + cd $gittestpath + + ## Make bare clone + git init --bare + + ## Copy hooks + cp $hooks/prevent_bad_version_numbers.py $hooks/prevent_duplicate_commits.py $hooks/prevent_large_files.py hooks/ + cp $hooks/pre-receive-hook-software hooks/pre-receive + + cp $hooks/test_prevent_bad_version_numbers.py hooks/ + touch hooks/__init__.py + ## Make clone of bare repo + cd /tmp + git clone $gittestpath +} + +## add tests here +################################################################### +## TEST 1: Check the files between multiple commits in the git diff + +## Initiate test repo +test_repo +cd $gittestrepopath + +## 1. Add DESCRIPTION file +cp /tmp/DESCRIPTION . +git add DESCRIPTION +git commit -m "Add DESCRIPTION file" + +## 2. Add dummy file + +touch dummy1 +git add dummy1 +git commit -m "Add dummy1 file" + +## 2. Add dummy file 2 + +touch dummy2 +git add dummy2 +git commit -m "Add dummy2 file" + +## Git push to test + +git push + +################################################################### + +## Test 2: Check bad version bumps + +## Initiate test repo +test_repo +cd $gittestrepopath + +## 1. Add dummy file + +touch dummy1 +git add dummy1 +git commit -m "Add dummy1 file" + +## 2. Add DESCRIPTION file +cp /tmp/DESCRIPTION . +git add DESCRIPTION +git commit -m "Add DESCRIPTION file" + +## 3. Add dummy file 2 + +touch dummy2 +git add dummy2 +git commit -m "Add dummy2 file" + +## 4. Add dummy file 3 + +touch dummy3 +git add dummy3 +git commit -m "Add dummy2 file" + +## Git push to test + +git push diff --git a/hooks/repo-specific/prevent_bad_version_numbers.py b/hooks/repo-specific/prevent_bad_version_numbers.py new file mode 100644 index 0000000..082af5e --- /dev/null +++ b/hooks/repo-specific/prevent_bad_version_numbers.py @@ -0,0 +1,153 @@ +#!/usr/bin/env python +"""Pre-receive hook to check legality of version bumps. + +This version check follows the guidelines of the Bioconductor +project. The guidelines are given at this link, +http://bioconductor.org/developers/how-to/version-numbering/. +""" + +from __future__ import print_function +import subprocess +import sys +import re + + +ZERO_COMMIT = "0000000000000000000000000000000000000000" + + +def eprint(*args, **kwargs): + """Helper function to print to std err.""" + print(*args, file=sys.stderr, **kwargs) + + +def throw_error(prev_version, new_version): + """Throw error message for every version bump failure.""" + message = ("Error: Illegal version bump from '%s' to '%s'. Check \n" + "http://bioconductor.org/developers/how-to/version-numbering/ \n" + "for details" % (prev_version, new_version)) + sys.exit(message) + return + + +def git_diff(oldrev, newrev, fname): + """Git diff between two commits.""" + diff = subprocess.check_output(["git", + "diff", + oldrev + ".." + newrev, + "--", fname]) + return diff.splitlines() + + +def git_diff_pre_commit(fname): + """Git diff for a pre-commit hook.""" + diff = subprocess.check_output(["git", + "diff", + "--cached", fname]) + return diff.splitlines() + + +def git_diff_files(oldrev, newrev): + """Get list of files in diff.""" + files_modified = subprocess.check_output(["git", + "diff", + "--name-only", + oldrev + ".." + newrev]) + return files_modified.splitlines() + + +def get_version_bump(diff): + """Get the version bumps in DESCRIPTION file.""" + prev_version = [line.replace("-Version:", "") + for line in diff + if line.startswith("-Version")] + new_version = [line.replace("+Version:", "") + for line in diff + if line.startswith("+Version")] + ## If versions are equal, no version change + if prev_version == new_version: + return None, None + ## No change in DESCRIPTION file from new package push + if not prev_version or not new_version: + return None, None + return prev_version[0].strip(), new_version[0].strip() + + +def check_version_format(prev_version, new_version): + """Check format of version.""" + regex = re.compile(r'\d+\.\d+\.\d+$') + if not regex.match(new_version): + throw_error(prev_version, new_version) + try: + x0, y0, z0 = map(int, prev_version.split(".")) + x, y, z = map(int, new_version.split(".")) + except ValueError as e: + print('format of version number is wrong', e) + throw_error(prev_version, new_version) + return prev_version, new_version + + +def check_version_in_release(prev_version, new_version): + """Check version in RELEASE_branch.""" + x0, y0, z0 = map(int, prev_version.split(".")) + x, y, z = map(int, new_version.split(".")) + # x should never change, y should be even, y should not be 99 i.e + # no major version change + if (x != x0) or (y % 2 != 0) or (y!=y0): + throw_error(prev_version, new_version) + # z should be incremented + if not z - z0 >= 0: + throw_error(prev_version, new_version) + return + + +def check_version_in_master(prev_version, new_version): + """Check version in master branch.""" + x0, y0, z0 = map(int, prev_version.split(".")) + x, y, z = map(int, new_version.split(".")) + # x should never change + if x != x0: + throw_error(prev_version, new_version) + # y should be odd + if y % 2 == 0: + throw_error(prev_version, new_version) + # y should be the same, and can be 99 + if (y != y0) and (y != 99): + throw_error(prev_version, new_version) + # z should be incremented and cannot be 99 + # to indicate major version change + if not (z - z0 >= 0) and (y != 99): + throw_error(prev_version, new_version) + return + +def check_version_bump(prev_version, new_version, refname): + """Check the version bump for legality.""" + # Check format of version + prev_version, new_version = check_version_format(prev_version, new_version) + if "RELEASE" in refname: + check_version_in_release(prev_version, new_version) + + if "master" in refname: + check_version_in_master(prev_version, new_version) + return 0 + + +def prevent_bad_version_numbers(oldrev, newrev, refname): + """Prevent bad version numbers in DESCRIPTION file. + + This function acts as the wrapper for all the helper functions. + """ + if oldrev == ZERO_COMMIT: + ## https://stackoverflow.com/questions/40883798/how-to-get-git-diff-of-the-first-commit + ## 4b825dc642cb6eb9a060e54bf8d69288fbee4904 is the + ## id of the "empty tree" in Git and it's always + ## available in every repository. + oldrev = "4b825dc642cb6eb9a060e54bf8d69288fbee4904" + files_modified = git_diff_files(oldrev, newrev) + for fname in files_modified: + if "DESCRIPTION" in fname: + diff = git_diff(oldrev, newrev, fname) + prev_version, new_version = get_version_bump(diff) + if (prev_version is None) and (new_version is None): + continue + check_version_bump(prev_version, new_version, refname) + return diff --git a/hooks/repo-specific/prevent_duplicate_commits.py b/hooks/repo-specific/prevent_duplicate_commits.py new file mode 100644 index 0000000..3b300b4 --- /dev/null +++ b/hooks/repo-specific/prevent_duplicate_commits.py @@ -0,0 +1,68 @@ +#!/usr/bin/env python + +import subprocess +import sys +import re + +# Global variables used by pre-recieve hook + +GIT_COMMIT_LIST_LENGTH = "30" +SVN_COMMIT_REGEX = re.compile(".*git-svn-id: .*@([0-9]{6})") +ZERO_COMMIT = "0000000000000000000000000000000000000000" +ERROR_DUPLICATE_COMMITS = """Error: duplicate commits. + +There are duplicate commits in your commit history, These cannot be +pushed to the Bioconductor git server. Please make sure that this is +resolved. + +Take a look at the documentation to fix this, +https://bioconductor.org/developers/how-to/git/sync-existing-repositories/, +particularly, point #8 (force Bioconductor master to Github master). + +For more information, or help resolving this issue, contact +. Provide the error, the package name and +any other details we might need. + +Use + + git show %s + git show %s + +to see body of commits. +""" + +def get_svn_revision(commit): + body = subprocess.check_output([ "git", "show", "--format=%b", commit ]) + revision = SVN_COMMIT_REGEX.match(body) + if revision != None: + revision = revision.group(1) + return revision + + +def prevent_duplicate_commits(oldrev, newrev, refname): + """Pre-receive hook to check for duplicate SVN commits.""" + try: + commit_list = subprocess.check_output([ + "git", "rev-list", newrev, "-n", GIT_COMMIT_LIST_LENGTH + ]) + except Exception as e: + print("Exception: %s" % e) + pass + commit_list = commit_list.split("\n") + commit_list = [item for item in commit_list if len(item)>0] + + # For each of the first GIT_COMMIT_LIST_LENGTH pairs, check diff + for i in xrange(len(commit_list) - 1): + first = commit_list[i] + second = commit_list[i+1] + + rev1 = get_svn_revision(first) + rev2 = get_svn_revision(second) + if rev1 and (rev1 == rev2): + diff = subprocess.check_output(["git", "diff", first, second]) + # If the diff of two commits is empty, means they are the same. + # i.e duplicate + if not diff: + print(ERROR_DUPLICATE_COMMITS % (first, second)) + sys.exit(1) + return diff --git a/hooks/repo-specific/prevent_large_files.py b/hooks/repo-specific/prevent_large_files.py new file mode 100644 index 0000000..03fd918 --- /dev/null +++ b/hooks/repo-specific/prevent_large_files.py @@ -0,0 +1,43 @@ +import subprocess +import sys +# Global variables used by pre-recieve hook + +ZERO_COMMIT = "0000000000000000000000000000000000000000" +MAXSIZE = int(5000000) # 5MB limit on file size +ERROR_MSG = """Error: file larger than %.0f Mb. + + File name: '%s' + File size: %.1f Mb + +Please see Biocondcutor guidelines +https://bioconductor.org/developers/package-guidelines/ +""" + +def prevent_large_files(oldrev, newrev, refname): + """Pre-receive hook to check for large files.""" + + # set oldrev properly if this is branch creation + if oldrev == ZERO_COMMIT: + if refname == "refs/heads/master": + oldrev = subprocess.check_output([ + "git", "rev-list", "--max-parents=0", newrev + ]).split().pop().strip() + else: + oldrev = "HEAD" + + list_files = subprocess.check_output(["git", "diff", + "--name-only", "--diff-filter=ACMRT", + oldrev + ".." + newrev]) + for fl in list_files.splitlines(): + + size = subprocess.check_output(["git", "cat-file", "-s", + newrev + ":" + fl]) + # Check to see if for some reason we didn't get a size + size = int(size.strip()) + if size: + # Compare filesize to MAXSIZE + mb = 1024.0 * 1024.0 + if size > MAXSIZE: + print(ERROR_MSG % (MAXSIZE / mb, fl, size / mb) ) + sys.exit(1) + return diff --git a/hooks/repo-specific/prevent_merge_markers.py b/hooks/repo-specific/prevent_merge_markers.py new file mode 100644 index 0000000..4ab4982 --- /dev/null +++ b/hooks/repo-specific/prevent_merge_markers.py @@ -0,0 +1,40 @@ +#!/usr/bin/env python +"""Pre-receive hook to check for merge markers in commits. + +This merge marker and merge conflict check pre-receive hook +tries to prevent maintainers from commiting files with <<<, +>>>, === merge markers in them. This keeps the commit history +clean. +""" + +from __future__ import print_function +import subprocess +import sys + + +ZERO_COMMIT = "0000000000000000000000000000000000000000" + + +def git_diff_files_with_conflicts(oldrev, newrev): + """Get list of files in diff.""" + files_modified = subprocess.check_output(['git', + 'diff', + '--name-only', + '-G"<<<<<|=====|>>>>>"', + oldrev + ".." + newrev]) + return files_modified.splitlines() + + +def prevent_merge_markers(oldrev, newrev, refname): + """Prevent merge markers in files. + + This function prevents merge markers in commits. + """ + conflicts = git_diff_files_with_conflicts(oldrev, newrev) + # If number of files with conflicts is > 0 + if conflicts: + message = ("Error: You cannot commit without resolving merge conflicts.\n" + "Unresolved merge conlicts in these files: \n" + + ", ".join(conflicts)) + sys.exit(message) + return diff --git a/hooks/repo-specific/rss_feed.py b/hooks/repo-specific/rss_feed.py new file mode 100644 index 0000000..f914d09 --- /dev/null +++ b/hooks/repo-specific/rss_feed.py @@ -0,0 +1,59 @@ +import subprocess +import datetime +# import re +from os.path import basename, abspath +from xml.etree.ElementTree import fromstring +import logging + + +ENTRY=""" + + %s + https://bioconductor.org/packages/%s/ + + + %s + %s + +""" + + +def rss_feed(oldrev, newrev, refname, length): + """Post receive hook to check start Git RSS feed""" + entry_list = [] + try: + latest_commit = subprocess.check_output([ + "git", "log", oldrev + ".." + newrev, + "--pretty=format:%H|%an|%ae|%ai" + ]) + # Get package name + package_path = subprocess.check_output([ + "git", "rev-parse", "--show-toplevel"]).strip() + package_name = basename(abspath(package_path)).replace(".git", "") + except Exception as e: + logging.error("Exception: %s" % e) + pass + if latest_commit: + # If more than one commit to unpack + latest_commit = latest_commit.split("\n") + # Reverse if there are multiple commits + for commit in latest_commit[::-1]: + commit_id, author, email, timestamp = commit.split("|") + #pubDate = datetime.datetime.fromtimestamp( + # float(timestamp)).strftime('%Y-%m-%d %H:%M:%S') + commit_msg = subprocess.check_output(["git", "log" , + "--pretty=format:%B", + "-n", "1", commit_id]) + if "RELEASE" in refname: + link = package_name + else: + link = "devel/" + package_name + entry = ENTRY % (package_name, + link, + commit_msg, + author + " <" + email + ">", + timestamp, + commit_id) + # Add entry as element in xml.etree + entry_list.append(fromstring(entry)) + return entry_list diff --git a/hooks/repo-specific/test_prevent_bad_version_numbers.py b/hooks/repo-specific/test_prevent_bad_version_numbers.py new file mode 100644 index 0000000..debb097 --- /dev/null +++ b/hooks/repo-specific/test_prevent_bad_version_numbers.py @@ -0,0 +1,256 @@ +"""Tests for the pre-receive hook to check version numbers.""" + +import subprocess +import re +import os +import pytest +from prevent_bad_version_numbers import check_version_bump + +CWD = "/Users/ni41435_ca/Documents/bioc_git_transition/hooks/repo-specific/test_proj" +DESC = "DESCRIPTION" + + +def change_version(new_version, cwd=CWD): + filename = DESC + path = os.path.join(cwd, filename) + s = open(path).read() + x = re.sub(r"Version: .+\n", "Version: " + new_version + "\n", s) + f = open(path, 'w') + f.write(x) + f.close() + return + + +def git_add(path, cwd=CWD): + cmd = ['git', 'add', path] + subprocess.check_call(cmd, cwd=cwd) + return + + +def git_checkout(branch, cwd=CWD): + cmd = ['git', 'checkout', branch] + subprocess.check_call(cmd, cwd=cwd) + return + + +def git_commit(message, cwd=CWD): + cmd = ['git', 'commit', '-m', message] + subprocess.check_call(cmd, cwd=cwd) + return + + +def git_push(cwd=CWD): + cmd = ['git', 'push'] + out = subprocess.check_output(cmd, stderr=subprocess.STDOUT) + return out + + +def test_master_check_version_bump(): + # Master + refname = "master" + # y should be odd + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("0.25.1", "0.26.1", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + # x should not change + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("0.25.1", "1.25.1", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + # z should change by increment only + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("0.25.5", "0.25.4", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + # z can be 99 + res = check_version_bump("0.25.4", "0.99.0", refname) + assert res == 0 + + return + + +def test_release_check_version_bump(): + refname = "RELEASE_3_6" + # y should be even + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("0.26.1", "0.27.1", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + # x should not change + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("0.26.1", "1.26.1", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + # x should not change, even if y changes, it should + # throw the same error. + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("0.25.1", "1.25.1", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + # z should not decrement + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("0.26.4", "0.25.3", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + # z can be 99 + with pytest.raises(SystemExit) as pytest_wrapped_e: + res = check_version_bump("0.26.4", "0.99.0", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + return + + +def test_version_bumps_martin(): + refname = "RELEASE_3_6" + # Tests with bad version number format + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "2.2.2-1", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "2.2", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "2.2.2.2", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "2.2-1.2-1", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "2.2.a", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + # x0 != x1 + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "1.2.2", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + res = check_version_bump("2.2.2", "2.2.2", refname) + assert res == 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "0.2.2", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + return + + +def test_release_version_bumps_martin(): + refname = "RELEASE_3_6" + + res = check_version_bump("2.2.2", "2.2.2", refname) + assert res == 0 + + res = check_version_bump("2.2.2", "2.2.3", refname) + assert res == 0 + + res = check_version_bump("2.2.2", "2.2.10", refname) + assert res == 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "2.1.2", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "2.3.2", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.2.2", "2.99.0", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.3.2", "2.3.2", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.3.2", "2.3.3", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + return + + +def test_devel_version_bumps_martin(): + refname = "master" + + res = check_version_bump("2.3.2", "2.3.2", refname) + assert res == 0 + + res = check_version_bump("2.3.2", "2.3.3", refname) + assert res == 0 + + res = check_version_bump("2.3.2", "2.3.10", refname) + assert res == 0 + + res = check_version_bump("2.3.2", "2.99.0", refname) + assert res == 0 + + res = check_version_bump("2.3.2", "2.99.2", refname) + assert res == 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.3.2", "2.4.0", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.3.2", "2.4.2", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.3.2", "2.2.2", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.3.2", "2.3.1", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("2.3.2", "2.3.0", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + return + + +def test_devel_version_bumps_sep2019(): + refname = "master" + + res = check_version_bump("2.3.2", "2.3.999", refname) + assert res == 0 + + res = check_version_bump("1.7.999", "1.7.1000", refname) + assert res == 0 + + res = check_version_bump("1.7.999", "1.7.9991", refname) + assert res == 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("1.7.999", "1.7.10", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 + + with pytest.raises(SystemExit) as pytest_wrapped_e: + check_version_bump("a1.7.999", "a1.7.1000", refname) + assert pytest_wrapped_e.type == SystemExit + assert pytest_wrapped_e.value.code != 0 diff --git a/misc/detect_bad_version.py b/misc/detect_bad_version.py new file mode 100644 index 0000000..6c0dc3d --- /dev/null +++ b/misc/detect_bad_version.py @@ -0,0 +1,63 @@ +""" +Usage: + + python detect_bad_version.py + + Passing 'even' as the second argument results in a success if the + version is even. Packages with odd versions will be output. +""" + +import os +import sys + + +def find_description(directory): + description_files = [] + # Walk directories only; skip those without DESCRIPTION files + for f in os.walk(directory).next()[1]: + dfile = os.path.join(directory, f, "DESCRIPTION") + if os.path.exists(dfile): + description_files.append(dfile) + return description_files + + +def check_version(version, parity): + version_number = version.replace("Version :","").split(".") + y = int(version_number[1]) + ## Add rules here + if parity == "odd": + if y % 2 == 0: + return False + elif parity == "even": + if y % 2 != 0: + return False + if y > 99: + return False + else: + return True + + +def read_description(DESCRIPTION_path): + with open(DESCRIPTION_path) as f: + txt = f.read() + lines = txt.splitlines() + version = [line for line in lines if line.startswith("Version")][0] + package_name = DESCRIPTION_path.replace("/DESCRIPTION","").replace("packages/","") + return (package_name, version) + + +def run(directory, parity): + descriptions = find_description(directory) + counter = [] + for description in descriptions: + package_name, version = read_description(description) + if not check_version(version, parity): + print(package_name, version) + counter.append(package_name) + return counter + + +if __name__ == "__main__": + print("Directory passed: ", sys.argv[1], sys.argv[2]) + counter = run(str(sys.argv[1]), str(sys.argv[2])) + print(len(counter), " packages have version which is not ", sys.argv[2]) diff --git a/misc/detect_duplicate_commits.py b/misc/detect_duplicate_commits.py new file mode 100644 index 0000000..a5f9acb --- /dev/null +++ b/misc/detect_duplicate_commits.py @@ -0,0 +1,100 @@ +#!/usr/bin/env python3 + +import subprocess +import sys +import re +import os + +# Global variables used by pre-recieve hook + + +SVN_COMMIT_REGEX = re.compile(".*git-svn-id: .*@([0-9]{6})") +ZERO_COMMIT = "0000000000000000000000000000000000000000" +ERROR_DUPLICATE_COMMITS = """Error: duplicate commits. + +There are duplicate commits in your commit history, These cannot be +pushed to the Bioconductor git server. Please make sure that this is +resolved. + +Take a look at the documentation to fix this, +https://bioconductor.org/developers/how-to/git/sync-existing-repositories/, +particularly, point #8 (force Bioconductor master to Github master). + +For more information, or help resolving this issue, contact +. Provide the error, the package name and +any other details we might need. + +Use + + git show %s + git show %s + +to see body of commits. +""" + +def bytes2str(line): + if isinstance(line, str): + return line + try: + line = line.decode() # decode() uses utf-8 encoding by default + except UnicodeDecodeError: + line = line.decode("iso8859") # typical Windows encoding + return line + +def get_svn_revision(commit): + body = subprocess.check_output([ "git", "show", "--format=%b", commit ]) + body = bytes2str(body) + revision = SVN_COMMIT_REGEX.match(body) + if revision != None: + revision = revision.group(1) + return revision + +def prevent_duplicate_commits(newrev): + """Pre-receive hook to check for duplicate SVN commits.""" + try: + commit_list = subprocess.check_output([ + "git", "rev-list", newrev, "-n", GIT_COMMIT_LIST_LENGTH + ]) + except Exception as e: + print("Exception: %s" % e) + pass + commit_list = bytes2str(commit_list) + commit_list = commit_list.split("\n") + commit_list = [item for item in commit_list if len(item)>0] + + # For each of the first GIT_COMMIT_LIST_LENGTH pairs, check diff + for i in range(len(commit_list) - 1): + first = commit_list[i] + second = commit_list[i+1] + + rev1 = get_svn_revision(first) + rev2 = get_svn_revision(second) + if rev1 and (rev1 == rev2): + diff = subprocess.check_output(["git", "diff", first, second]) + # If the diff of two commits is empty, means they are the same. + # i.e duplicate + if not diff: + print(ERROR_DUPLICATE_COMMITS % (first, second)) + sys.exit(1) + return + + +if __name__ == "__main__": + print("""Usage: + python detect_duplicate_commits.py + + example: + + 'python detect_duplicate_commits.py /mypath/BiocGenerics 100' + + NOTE: this script will stop at the first instance of a duplicate commit. + """) + package_path = sys.argv[1] + GIT_COMMIT_LIST_LENGTH = sys.argv[2] + os.chdir(package_path) + + revs = subprocess.check_output([ + "git", "log", "-2", "--format=%H" + ]).splitlines() + newrev = revs[0].strip() + prevent_duplicate_commits(newrev) diff --git a/src/git_experiment_repository.py b/src/git_experiment_repository.py index 96bbdbe..702dd4d 100644 --- a/src/git_experiment_repository.py +++ b/src/git_experiment_repository.py @@ -15,6 +15,8 @@ import subprocess from git_api.git_api import git_add from git_api.git_api import git_commit +from git_api.git_api import git_branch_exists +from git_api.git_api import git_checkout import logging @@ -45,7 +47,7 @@ def list_files(self, path): for f in files] return [item[len(path) + 1:] for item in ans] - def add_data(self, package): + def add_data(self, package, release_3_5=False): """Add data from SVN data source to each package.""" package_dir = os.path.join(self.temp_git_repo, package) before_files = self.list_files(package_dir) @@ -60,6 +62,11 @@ def add_data(self, package): for ref in refs: src = (self.svn_root + self.trunk + self.data_store_path + "/" + package + "/" + ref) + if release_3_5: + src = (self.svn_root + "/" + "branches" + "/" + + "RELEASE_3_5" + + self.data_store_path + "/" + + package + "/" + ref) dest = "/".join([package_dir, ref]) try: cmd = ['svn', 'export', '--force', '--username', 'readonly', @@ -105,6 +112,7 @@ def run_data_transition(self, temp_git_repo): """Run data transition on all package.""" for package in os.listdir(os.path.abspath(temp_git_repo)): try: + # Skip manifest files, by checking "if" if "bioc-data-experiment" not in package: logging.info("Experiment data: Add data to package %s" % package) @@ -120,4 +128,24 @@ def run_data_transition(self, temp_git_repo): % package) logging.error(e) pass + # Checkout RELEASE_3_5 and add_data + try: + if "bioc-data-experiment" not in package: + package_dir = os.path.join(self.temp_git_repo, package) + if git_branch_exists("RELEASE_3_5", cwd=package_dir): + # checkout RELEASE_3_5 in package dir + git_checkout("RELEASE_3_5", cwd=package_dir) + # Add data from branch release_3_5 + logging.info("Add data from RELEASE_3_5 %s" % package) + self.add_data(package, release_3_5=True) + logging.info("git add data to %s" % package) + self.add_data_as_git_objects(package) + logging.info("git commit data to %s" % package) + self.commit_data_as_git_objects(package) + # checkout master in package dir + git_checkout("master", cwd=package_dir) + except Exception as e: + logging.error("Experiment data: Error in add data to " + + "RELEASE_3_5 branch in " + package) + logging.error(e) return diff --git a/src/helper/helper.py b/src/helper/helper.py index a98299d..b9536f9 100644 --- a/src/helper/helper.py +++ b/src/helper/helper.py @@ -95,6 +95,28 @@ def get_union(svn_root, package_path, manifest_dictionary): return list(set(release_3_5 + release_3_6)) +def union_of_data_manifest(): +# svn_root = "file:///home/git/bioc-data.hedgehog.fhcrc.org/" + svn_root = "https://hedgehog.fhcrc.org/bioc-data/" + release_3_5 = (svn_root + "branches/" + + "RELEASE_3_5/experiment/pkgs/" + + "bioc-data-experiment.3.5.manifest") + trunk = (svn_root + + "trunk/experiment/pkgs/" + + "bioc-data-experiment.3.6.manifest") + + def get_list(manifest): + cmd = ['svn', 'cat', manifest] + out = subprocess.check_output(cmd) + doc = out.split("\n") + package_list = [line.replace("Package: ","").strip() + for line in doc if line.startswith("Package")] + return package_list + release_3_6 = get_list(trunk) + release_3_5 = get_list(release_3_5) + return list(set(release_3_6 + release_3_5)) + + def setup_logger(logger_name, log_file): l = logging.getLogger(logger_name) formatter = logging.Formatter('%(levelname)s : %(asctime)s : %(message)s') diff --git a/src/run_transition.py b/src/run_transition.py index ae83b61..7b3d34d 100644 --- a/src/run_transition.py +++ b/src/run_transition.py @@ -17,6 +17,7 @@ from src.helper.helper import get_branch_list from src.helper.helper import get_union from src.helper.helper import populate_manifest_dictionary +from src.helper.helper import union_of_data_manifest import os import shutil import logging @@ -81,7 +82,6 @@ def run_software_transition(configfile, new_svn_dump=False): # Step 1: Initial set up, get list of packs from trunk dump = LocalSvnDump(svn_root, temp_git_repo, users_db, remote_svn_server, package_path) - # packs = dump.get_pack_list(branch="trunk") manifest_dictionary = populate_manifest_dictionary(svn_root, package_path) packs = get_union(svn_root, package_path, manifest_dictionary) ################################################## @@ -138,7 +138,10 @@ def run_experiment_data_transition(configfile, new_svn_dump=False): # Step 1: Initial set up, get list of packs from trunk dump = LocalSvnDump(svn_root, temp_git_repo, users_db, remote_svn_server, package_path) - packs = dump.get_pack_list(branch="trunk") + + # packs = dump.get_pack_list(branch="trunk") + # TODO: replace this hack + packs = union_of_data_manifest() ################################################### # Create a local dump of SVN packages in a location if new_svn_dump: @@ -156,7 +159,7 @@ def run_experiment_data_transition(configfile, new_svn_dump=False): lfs = Lfs(svn_root, trunk, data_store_path, ref_file, temp_git_repo) # Run make_git_repo, with new LFS object make_git_repo(svn_root, temp_git_repo, bare_git_repo, - remote_url, package_path,lfs_object=lfs) + remote_url, package_path, lfs_object=lfs) # EOF message logging.info("Completed bare git repo for experiment data packages") # FIXME: delete singleton instances @@ -279,6 +282,7 @@ def run_workflow_transition(configfile, new_svn_dump=False): ###################################### dump = LocalSvnDump(svn_root, temp_git_repo, users_db, remote_svn_server, package_path) + ## TODO: Use union of manifest files for workflow packages. packs = dump.get_pack_list(branch="trunk") # Git svn clone workflow packages if new_svn_dump: