From d3044d8e61fe5698a9e1daec9401af37da03eda4 Mon Sep 17 00:00:00 2001 From: furkanonder Date: Mon, 18 Aug 2025 03:25:53 +0300 Subject: [PATCH 1/6] Add command-line interface for dbm module --- Doc/library/dbm.rst | 67 ++++++++++++++++++++++++++++ Lib/dbm/__init__.py | 6 --- Lib/dbm/__main__.py | 89 ++++++++++++++++++++++++++++++++++++++ Lib/test/test_dbm.py | 101 ++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 255 insertions(+), 8 deletions(-) create mode 100644 Lib/dbm/__main__.py diff --git a/Doc/library/dbm.rst b/Doc/library/dbm.rst index 39e287b15214e4..8c1a3f7754467f 100644 --- a/Doc/library/dbm.rst +++ b/Doc/library/dbm.rst @@ -501,3 +501,70 @@ The :mod:`!dbm.dumb` module defines the following: that this factor changes for each :mod:`dbm` submodule. .. versionadded:: next + + +.. _dbm-commandline: +.. program:: dbm + +Command-line interface +---------------------- + +.. module:: dbm.__main__ + :synopsis: A command-line interface for DBM database operations. + +**Source code:** :source:`Lib/dbm/__main__.py` + +-------------- + +The :mod:`dbm` module can be invoked as a script via ``python -m dbm`` +to identify, examine, and reorganize DBM database files. + +Command-line options +^^^^^^^^^^^^^^^^^^^^ + +.. option:: --whichdb file [file ...] + + Identify the database type for one or more database files: + + .. code-block:: shell-session + + $ python -m dbm --whichdb *.db + dbm.gnu - database1.db + dbm.sqlite3 - database2.db + UNKNOWN - corrupted.db + + This command uses the :func:`whichdb` function to determine the type + of each database file. Files that cannot be identified are marked as + ``UNKNOWN``. + +.. option:: --dump file + + Display the contents of a database file: + + .. code-block:: shell-session + + $ python -m dbm --dump mydb.db + username: john_doe + email: john@example.com + last_login: 2024-01-15 + + Keys and values are displayed in ``key: value`` format. Binary data + is decoded using UTF-8 with error replacement for display purposes. + +.. option:: --reorganize file + + Reorganize and compact a database file to reduce disk space: + + .. code-block:: shell-session + + $ python -m dbm --reorganize mydb.db + Reorganized database 'mydb.db' + + This operation uses the database's native :meth:`!reorganize` method + when available (:mod:`dbm.sqlite3`, :mod:`dbm.gnu`, :mod:`dbm.dumb`). + For database types that don't support reorganization, an error message + is displayed. + +.. option:: -h, --help + + Show the help message. diff --git a/Lib/dbm/__init__.py b/Lib/dbm/__init__.py index 4fdbc54e74cfb6..7bb78b846fbef3 100644 --- a/Lib/dbm/__init__.py +++ b/Lib/dbm/__init__.py @@ -32,7 +32,6 @@ import io import os import struct -import sys class error(Exception): @@ -187,8 +186,3 @@ def whichdb(filename): # Unknown return "" - - -if __name__ == "__main__": - for filename in sys.argv[1:]: - print(whichdb(filename) or "UNKNOWN", filename) diff --git a/Lib/dbm/__main__.py b/Lib/dbm/__main__.py new file mode 100644 index 00000000000000..b96de478e10404 --- /dev/null +++ b/Lib/dbm/__main__.py @@ -0,0 +1,89 @@ +import argparse +import os +import sys + +from . import open as dbm_open, whichdb, error + + +def _whichdb_command(filenames): + exit_code = 0 + + for filename in filenames: + if os.path.exists(filename): + db_type = whichdb(filename) + print(f"{db_type or 'UNKNOWN'} - {filename}") + else: + print(f"Error: File '{filename}' not found", file=sys.stderr) + exit_code = 1 + + return exit_code + + +def _dump_command(filename): + try: + with dbm_open(filename, "r") as db: + for key in db.keys(): + key_str = key.decode("utf-8", errors="replace") + value_str = db[key].decode("utf-8", errors="replace") + print(f"{key_str}: {value_str}") + return 0 + except error: + print(f"Error: Database '{filename}' not found", file=sys.stderr) + return 1 + + +def _reorganize_command(filename): + try: + with dbm_open(filename, "c") as db: + if whichdb(filename) in ["dbm.sqlite3", "dbm.gnu", "dbm.dumb"]: + db.reorganize() + print(f"Reorganized database '{filename}'") + else: + print( + f"Database type doesn't support reorganize method", + file=sys.stderr, + ) + return 1 + return 0 + except error: + print( + f"Error: Database '{filename}' not found or cannot be opened", + file=sys.stderr, + ) + return 1 + + +def main(): + parser = argparse.ArgumentParser( + prog="python -m dbm", description="DBM toolkit" + ) + group = parser.add_mutually_exclusive_group(required=True) + group.add_argument( + "--whichdb", + nargs="+", + metavar="file", + help="Identify database type for one or more files", + ) + group.add_argument( + "--dump", metavar="file", help="Display database contents" + ) + group.add_argument( + "--reorganize", + metavar="file", + help="Reorganize the database", + ) + options = parser.parse_args() + + try: + if options.whichdb: + return _whichdb_command(options.whichdb) + elif options.dump: + return _dump_command(options.dump) + elif options.reorganize: + return _reorganize_command(options.reorganize) + except KeyboardInterrupt: + return 1 + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index ae9faabd536a6c..34cabf544a71c9 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -1,11 +1,15 @@ -"""Test script for the dbm.open function based on testdumbdbm.py""" - import unittest import dbm import os +import contextlib +import io +import sys + from test.support import import_helper from test.support import os_helper +from test.support.script_helper import assert_python_ok, assert_python_failure +from dbm.__main__ import main as dbm_main try: from dbm import sqlite3 as dbm_sqlite3 @@ -309,6 +313,99 @@ def setUp(self): self.dbm = import_helper.import_fresh_module('dbm') +class DBMCommandLineTestCase(unittest.TestCase): + + def setUp(self): + self.addCleanup(cleaunup_test_dir) + setup_test_dir() + self.test_db = os.path.join(dirname, 'test.db') + with dbm.open(self.test_db, 'c') as db: + db[b'key1'] = b'value1' + db[b'key2'] = b'value2' + self.empty_db = os.path.join(dirname, 'empty.db') + with dbm.open(self.empty_db, 'c'): + pass + self.dbm = import_helper.import_fresh_module('dbm') + + def run_cmd_ok(self, *args): + return assert_python_ok('-m', 'dbm', *args).out + + def run_cmd_error(self, *args): + return assert_python_failure('-m', 'dbm', *args) + + def test_help(self): + output = self.run_cmd_ok('-h') + self.assertIn(b'usage:', output) + self.assertIn(b'python -m dbm', output) + self.assertIn(b'--help', output) + self.assertIn(b'whichdb', output) + self.assertIn(b'dump', output) + self.assertIn(b'reorganize', output) + + def test_whichdb_command(self): + output = self.run_cmd_ok('--whichdb', self.test_db) + self.assertIn(self.test_db.encode(), output) + output = self.run_cmd_ok('--whichdb', self.test_db, self.empty_db) + self.assertIn(self.test_db.encode(), output) + self.assertIn(self.empty_db.encode(), output) + + def test_whichdb_nonexistent_file(self): + rc, _, stderr = self.run_cmd_error('--whichdb', "nonexistent_db") + self.assertEqual(rc, 1) + self.assertIn(b'not found', stderr) + + def test_whichdb_unknown_format(self): + text_file = os.path.join(dirname, 'text.txt') + with open(text_file, 'w') as f: + f.write('This is not a database file') + output = self.run_cmd_ok('--whichdb', text_file) + self.assertIn(b'UNKNOWN', output) + self.assertIn(text_file.encode(), output) + + def test_whichdb_output_format(self): + output = self.run_cmd_ok('--whichdb', self.test_db) + output_str = output.decode('utf-8', errors='replace').strip() + # Should be "TYPE - FILENAME" format + self.assertIn(' - ', output_str) + parts = output_str.split(' - ', 1) + self.assertEqual(len(parts), 2) + self.assertEqual(parts[1], self.test_db) + + def test_dump_command(self): + output = self.run_cmd_ok('--dump', self.test_db) + self.assertIn(b'key1: value1', output) + self.assertIn(b'key2: value2', output) + + def test_dump_empty_database(self): + output = self.run_cmd_ok('--dump', self.empty_db) + self.assertEqual(output.strip(), b'') + + def test_dump_nonexistent_database(self): + rc, _, stderr = self.run_cmd_error('--dump', "nonexistent_db") + self.assertEqual(rc, 1) + self.assertIn(b'not found', stderr) + + def test_reorganize_command(self): + self.addCleanup(setattr, dbm, '_defaultmod', dbm._defaultmod) + for module in dbm_iterator(): + setup_test_dir() + dbm._defaultmod = module + with module.open(_fname, 'c') as f: + f[b"1"] = b"1" + if hasattr(module, 'reorganize'): + with module.open(_fname, 'c') as db: + output = self.run_cmd_ok('--reorganize', db) + self.assertIn(b'Reorganized', output) + + def test_output_format_consistency(self): + output = self.run_cmd_ok('--dump', self.test_db) + lines = output.decode('utf-8', errors='replace').strip().split('\n') + for line in lines: + if line.strip(): # Skip empty lines + self.assertIn(':', line) + parts = line.split(':', 1) + self.assertEqual(len(parts), 2) + for mod in dbm_iterator(): assert mod.__name__.startswith('dbm.') suffix = mod.__name__[4:] From ddfa563421572996c3bbfb86f664a4483c604c6e Mon Sep 17 00:00:00 2001 From: furkanonder Date: Mon, 18 Aug 2025 03:32:52 +0300 Subject: [PATCH 2/6] Remove unused imports from test_dbm --- Lib/test/test_dbm.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index 34cabf544a71c9..5e92778659d0f7 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -1,15 +1,11 @@ import unittest import dbm import os -import contextlib -import io -import sys from test.support import import_helper from test.support import os_helper from test.support.script_helper import assert_python_ok, assert_python_failure -from dbm.__main__ import main as dbm_main try: from dbm import sqlite3 as dbm_sqlite3 From 31ad2c41a8fee6460bb5e8f0c4de850f087bfd68 Mon Sep 17 00:00:00 2001 From: furkanonder Date: Mon, 18 Aug 2025 23:23:28 +0300 Subject: [PATCH 3/6] Change output format separator in whichdb command --- Lib/dbm/__main__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/dbm/__main__.py b/Lib/dbm/__main__.py index b96de478e10404..edc6fccad6ba9a 100644 --- a/Lib/dbm/__main__.py +++ b/Lib/dbm/__main__.py @@ -11,7 +11,7 @@ def _whichdb_command(filenames): for filename in filenames: if os.path.exists(filename): db_type = whichdb(filename) - print(f"{db_type or 'UNKNOWN'} - {filename}") + print(f"{db_type or 'UNKNOWN'} {filename}") else: print(f"Error: File '{filename}' not found", file=sys.stderr) exit_code = 1 From 7059a0c5217d99d521e32cac452b820ce30293d7 Mon Sep 17 00:00:00 2001 From: furkanonder Date: Mon, 18 Aug 2025 23:29:41 +0300 Subject: [PATCH 4/6] update whichdb output format test --- Lib/test/test_dbm.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index 5e92778659d0f7..cc3b717909d0b7 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -359,13 +359,8 @@ def test_whichdb_unknown_format(self): self.assertIn(text_file.encode(), output) def test_whichdb_output_format(self): - output = self.run_cmd_ok('--whichdb', self.test_db) - output_str = output.decode('utf-8', errors='replace').strip() - # Should be "TYPE - FILENAME" format - self.assertIn(' - ', output_str) - parts = output_str.split(' - ', 1) - self.assertEqual(len(parts), 2) - self.assertEqual(parts[1], self.test_db) + output = self.run_cmd_ok('--whichdb', self.test_db).decode() + self.assertIn(self.test_db, output) def test_dump_command(self): output = self.run_cmd_ok('--dump', self.test_db) @@ -395,7 +390,7 @@ def test_reorganize_command(self): def test_output_format_consistency(self): output = self.run_cmd_ok('--dump', self.test_db) - lines = output.decode('utf-8', errors='replace').strip().split('\n') + lines = output.decode().strip().split('\n') for line in lines: if line.strip(): # Skip empty lines self.assertIn(':', line) From a547468dcf5f234d5ba30d8b24171665500660bf Mon Sep 17 00:00:00 2001 From: furkanonder Date: Mon, 18 Aug 2025 23:37:30 +0300 Subject: [PATCH 5/6] simplify reorganize detection --- Lib/dbm/__main__.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/Lib/dbm/__main__.py b/Lib/dbm/__main__.py index edc6fccad6ba9a..50ecb5b6cc9833 100644 --- a/Lib/dbm/__main__.py +++ b/Lib/dbm/__main__.py @@ -35,21 +35,17 @@ def _dump_command(filename): def _reorganize_command(filename): try: with dbm_open(filename, "c") as db: - if whichdb(filename) in ["dbm.sqlite3", "dbm.gnu", "dbm.dumb"]: + if db.hasattr("reorganize"): db.reorganize() - print(f"Reorganized database '{filename}'") + print(f"Reorganized database: '{filename}'", file=sys.stderr) else: - print( - f"Database type doesn't support reorganize method", - file=sys.stderr, - ) + print("Database type doesn't support reorganize method", + file=sys.stderr) return 1 return 0 except error: - print( - f"Error: Database '{filename}' not found or cannot be opened", - file=sys.stderr, - ) + print(f"Error: Database '{filename}' not found or cannot be opened", + file=sys.stderr) return 1 From 740f5bc77230dea00a3c315fb8c8faa526aac3e1 Mon Sep 17 00:00:00 2001 From: furkanonder Date: Mon, 18 Aug 2025 23:47:07 +0300 Subject: [PATCH 6/6] Fix _dump_command to use repr() and avoid data loss --- Lib/dbm/__main__.py | 6 ++---- Lib/test/test_dbm.py | 4 ++-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Lib/dbm/__main__.py b/Lib/dbm/__main__.py index 50ecb5b6cc9833..79e8bb1a0fdd62 100644 --- a/Lib/dbm/__main__.py +++ b/Lib/dbm/__main__.py @@ -22,10 +22,8 @@ def _whichdb_command(filenames): def _dump_command(filename): try: with dbm_open(filename, "r") as db: - for key in db.keys(): - key_str = key.decode("utf-8", errors="replace") - value_str = db[key].decode("utf-8", errors="replace") - print(f"{key_str}: {value_str}") + for key in db: + print(f"{key!r}: {db[key]!r}") return 0 except error: print(f"Error: Database '{filename}' not found", file=sys.stderr) diff --git a/Lib/test/test_dbm.py b/Lib/test/test_dbm.py index cc3b717909d0b7..c6c51d9c624a49 100644 --- a/Lib/test/test_dbm.py +++ b/Lib/test/test_dbm.py @@ -364,8 +364,8 @@ def test_whichdb_output_format(self): def test_dump_command(self): output = self.run_cmd_ok('--dump', self.test_db) - self.assertIn(b'key1: value1', output) - self.assertIn(b'key2: value2', output) + self.assertIn(b"b'key1': b'value1'", output) + self.assertIn(b"b'key2': b'value2'", output) def test_dump_empty_database(self): output = self.run_cmd_ok('--dump', self.empty_db)