From b36972d7834ba954eaef4c390abcd78f401e96ee Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Sat, 16 Aug 2025 16:25:54 -0400 Subject: [PATCH 01/23] first steps: don't show help output for dunders we've grabbed from builtins --- Lib/pydoc.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index d508fb70ea429e..50a9b7a6d27cd1 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1706,6 +1706,11 @@ def describe(thing): def locate(path, forceload=0): """Locate an object by name or dotted path, importing as necessary.""" + if re.match(r"^__\w+__$", path): + # if we're looking up a special variable, don't grab the result from + # the builtins module, because it's probably not what the user wanted + # (if it is, they can look up builtins.whatever) + return None parts = [part for part in path.split('.') if part] module, n = None, 0 while n < len(parts): From 93ac0fa32db9f4e91d1dd0f7d31c93d0474d2eb5 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Sat, 16 Aug 2025 17:16:29 -0400 Subject: [PATCH 02/23] better help for built-in help function --- Lib/pydoc.py | 39 +++++++++++++++++++++++++++++++++------ 1 file changed, 33 insertions(+), 6 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 50a9b7a6d27cd1..736c7f41d7ea36 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -78,6 +78,8 @@ class or function within a module or module in a package. If the from reprlib import Repr from traceback import format_exception_only +import _sitebuiltins + from _pyrepl.pager import (get_pager, pipe_pager, plain_pager, tempfile_pager, tty_pager) @@ -1832,10 +1834,10 @@ def _introdoc(): Python, you should definitely check out the tutorial at https://docs.python.org/{ver}/tutorial/. - Enter the name of any module, keyword, or topic to get help on writing - Python programs and using Python modules. To get a list of available - modules, keywords, symbols, or topics, enter "modules", "keywords", - "symbols", or "topics". + Enter the name of any module, keyword, symbol, or topic to get help on + writing Python programs and using Python modules. To get a list of + available modules, keywords, symbols, or topics, enter "modules", + "keywords", "symbols", or "topics". {pyrepl_keys} Each module also comes with a one-line summary of what it does; to list the modules whose name or summary contain a given string such as "spam", @@ -2092,7 +2094,9 @@ def getline(self, prompt): def help(self, request, is_cli=False): if isinstance(request, str): request = request.strip() - if request == 'keywords': self.listkeywords() + if request == 'help': + self.helphelp() + elif request == 'keywords': self.listkeywords() elif request == 'symbols': self.listsymbols() elif request == 'topics': self.listtopics() elif request == 'modules': self.listmodules() @@ -2106,10 +2110,33 @@ def help(self, request, is_cli=False): elif request in self.topics: self.showtopic(request) elif request: doc(request, 'Help on %s:', output=self._output, is_cli=is_cli) else: doc(str, 'Help on %s:', output=self._output, is_cli=is_cli) - elif isinstance(request, Helper): self() + elif request is builtins.help: + self.helphelp() else: doc(request, 'Help on %s:', output=self._output, is_cli=is_cli) self.output.write('\n') + def helphelp(self): + pager(textwrap.dedent("""\ + help - Interactive Help + ======================= + + The built-in help function implements an interactive help utility. You + can make use of it in a few different ways: + + * Calling help() with no arguments starts an interactive help session. + + * Calling help(x) will have one of two behaviors depending on the type + of the argument: + + * If x is a string, help(x) provides information about the given + topic. For example, help("class") will provide information about + the "class" keyword, and help("math.sqrt") will provide + information about the "math.sqrt" function. + + * If x is not a string, help(x) prints information about x's type. + For example, help(20) will provide information about the int type. + """)) + def intro(self): self.output.write(_introdoc()) From 0c0f04c14f86453e401ddb9811f2573c5119b457 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Sat, 16 Aug 2025 17:45:53 -0400 Subject: [PATCH 03/23] add __main__ and DUNDERMETHODS to help --- Doc/library/__main__.rst | 2 + Doc/tools/extensions/pydoc_topics.py | 1 + Lib/pydoc.py | 7 +- Lib/pydoc_data/topics.py | 2000 +++++++++++++++++++++----- 4 files changed, 1676 insertions(+), 334 deletions(-) diff --git a/Doc/library/__main__.rst b/Doc/library/__main__.rst index 4407ba2f7714dd..36b884f975804f 100644 --- a/Doc/library/__main__.rst +++ b/Doc/library/__main__.rst @@ -1,3 +1,5 @@ +.. _`__main__`: + :mod:`!__main__` --- Top-level code environment =============================================== diff --git a/Doc/tools/extensions/pydoc_topics.py b/Doc/tools/extensions/pydoc_topics.py index 01efbba628324f..9b5aaf87bb2a57 100644 --- a/Doc/tools/extensions/pydoc_topics.py +++ b/Doc/tools/extensions/pydoc_topics.py @@ -100,6 +100,7 @@ "while", "with", "yield", + "__main__", }) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 736c7f41d7ea36..3e5d5c058e9cbb 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -78,8 +78,6 @@ class or function within a module or module in a package. If the from reprlib import Repr from traceback import format_exception_only -import _sitebuiltins - from _pyrepl.pager import (get_pager, pipe_pager, plain_pager, tempfile_pager, tty_pager) @@ -2023,6 +2021,9 @@ class Helper: 'TRUTHVALUE': ('truth', 'if while and or not BASICMETHODS'), 'DEBUGGING': ('debugger', 'pdb'), 'CONTEXTMANAGERS': ('context-managers', 'with'), + 'DUNDERMETHODS': 'SPECIALMETHODS', + 'MAINMODULE': '__main__', + '__main__': ('__main__', ''), } def __init__(self, input=None, output=None): @@ -2173,7 +2174,7 @@ def listtopics(self): Here is a list of available topics. Enter any topic name to get more help. ''') - self.list(self.topics.keys(), columns=3) + self.list([k for k in self.topics.keys() if k.isupper()], columns=3) def showtopic(self, topic, more_xrefs=''): try: diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 5f7e14a79d3356..12e85a3f6993c2 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,7 +1,357 @@ -# Autogenerated by Sphinx on Tue May 6 18:33:44 2025 +# Autogenerated by Sphinx on Sat Aug 16 17:38:04 2025 # as part of the release process. topics = { + '__main__': r'''"__main__" — Top-level code environment +*************************************** + +====================================================================== + +In Python, the special name "__main__" is used for two important +constructs: + +1. the name of the top-level environment of the program, which can be + checked using the "__name__ == '__main__'" expression; and + +2. the "__main__.py" file in Python packages. + +Both of these mechanisms are related to Python modules; how users +interact with them and how they interact with each other. They are +explained in detail below. If you’re new to Python modules, see the +tutorial section Modules for an introduction. + + +"__name__ == '__main__'" +======================== + +When a Python module or package is imported, "__name__" is set to the +module’s name. Usually, this is the name of the Python file itself +without the ".py" extension: + + >>> import configparser + >>> configparser.__name__ + 'configparser' + +If the file is part of a package, "__name__" will also include the +parent package’s path: + + >>> from concurrent.futures import process + >>> process.__name__ + 'concurrent.futures.process' + +However, if the module is executed in the top-level code environment, +its "__name__" is set to the string "'__main__'". + + +What is the “top-level code environment”? +----------------------------------------- + +"__main__" is the name of the environment where top-level code is run. +“Top-level code” is the first user-specified Python module that starts +running. It’s “top-level” because it imports all other modules that +the program needs. Sometimes “top-level code” is called an *entry +point* to the application. + +The top-level code environment can be: + +* the scope of an interactive prompt: + + >>> __name__ + '__main__' + +* the Python module passed to the Python interpreter as a file + argument: + + $ python helloworld.py + Hello, world! + +* the Python module or package passed to the Python interpreter with + the "-m" argument: + + $ python -m tarfile + usage: tarfile.py [-h] [-v] (...) + +* Python code read by the Python interpreter from standard input: + + $ echo "import this" | python + The Zen of Python, by Tim Peters + + Beautiful is better than ugly. + Explicit is better than implicit. + ... + +* Python code passed to the Python interpreter with the "-c" argument: + + $ python -c "import this" + The Zen of Python, by Tim Peters + + Beautiful is better than ugly. + Explicit is better than implicit. + ... + +In each of these situations, the top-level module’s "__name__" is set +to "'__main__'". + +As a result, a module can discover whether or not it is running in the +top-level environment by checking its own "__name__", which allows a +common idiom for conditionally executing code when the module is not +initialized from an import statement: + + if __name__ == '__main__': + # Execute when the module is not initialized from an import statement. + ... + +See also: + + For a more detailed look at how "__name__" is set in all situations, + see the tutorial section Modules. + + +Idiomatic Usage +--------------- + +Some modules contain code that is intended for script use only, like +parsing command-line arguments or fetching data from standard input. +If a module like this was imported from a different module, for +example to unit test it, the script code would unintentionally execute +as well. + +This is where using the "if __name__ == '__main__'" code block comes +in handy. Code within this block won’t run unless the module is +executed in the top-level environment. + +Putting as few statements as possible in the block below "if __name__ +== '__main__'" can improve code clarity and correctness. Most often, a +function named "main" encapsulates the program’s primary behavior: + + # echo.py + + import shlex + import sys + + def echo(phrase: str) -> None: + """A dummy wrapper around print.""" + # for demonstration purposes, you can imagine that there is some + # valuable and reusable logic inside this function + print(phrase) + + def main() -> int: + """Echo the input arguments to standard output""" + phrase = shlex.join(sys.argv) + echo(phrase) + return 0 + + if __name__ == '__main__': + sys.exit(main()) # next section explains the use of sys.exit + +Note that if the module didn’t encapsulate code inside the "main" +function but instead put it directly within the "if __name__ == +'__main__'" block, the "phrase" variable would be global to the entire +module. This is error-prone as other functions within the module +could be unintentionally using the global variable instead of a local +name. A "main" function solves this problem. + +Using a "main" function has the added benefit of the "echo" function +itself being isolated and importable elsewhere. When "echo.py" is +imported, the "echo" and "main" functions will be defined, but neither +of them will be called, because "__name__ != '__main__'". + + +Packaging Considerations +------------------------ + +"main" functions are often used to create command-line tools by +specifying them as entry points for console scripts. When this is +done, pip inserts the function call into a template script, where the +return value of "main" is passed into "sys.exit()". For example: + + sys.exit(main()) + +Since the call to "main" is wrapped in "sys.exit()", the expectation +is that your function will return some value acceptable as an input to +"sys.exit()"; typically, an integer or "None" (which is implicitly +returned if your function does not have a return statement). + +By proactively following this convention ourselves, our module will +have the same behavior when run directly (i.e. "python echo.py") as it +will have if we later package it as a console script entry-point in a +pip-installable package. + +In particular, be careful about returning strings from your "main" +function. "sys.exit()" will interpret a string argument as a failure +message, so your program will have an exit code of "1", indicating +failure, and the string will be written to "sys.stderr". The +"echo.py" example from earlier exemplifies using the +"sys.exit(main())" convention. + +See also: + + Python Packaging User Guide contains a collection of tutorials and + references on how to distribute and install Python packages with + modern tools. + + +"__main__.py" in Python Packages +================================ + +If you are not familiar with Python packages, see section Packages of +the tutorial. Most commonly, the "__main__.py" file is used to +provide a command-line interface for a package. Consider the following +hypothetical package, “bandclass”: + + bandclass + ├── __init__.py + ├── __main__.py + └── student.py + +"__main__.py" will be executed when the package itself is invoked +directly from the command line using the "-m" flag. For example: + + $ python -m bandclass + +This command will cause "__main__.py" to run. How you utilize this +mechanism will depend on the nature of the package you are writing, +but in this hypothetical case, it might make sense to allow the +teacher to search for students: + + # bandclass/__main__.py + + import sys + from .student import search_students + + student_name = sys.argv[1] if len(sys.argv) >= 2 else '' + print(f'Found student: {search_students(student_name)}') + +Note that "from .student import search_students" is an example of a +relative import. This import style can be used when referencing +modules within a package. For more details, see Intra-package +References in the Modules section of the tutorial. + + +Idiomatic Usage +--------------- + +The content of "__main__.py" typically isn’t fenced with an "if +__name__ == '__main__'" block. Instead, those files are kept short +and import functions to execute from other modules. Those other +modules can then be easily unit-tested and are properly reusable. + +If used, an "if __name__ == '__main__'" block will still work as +expected for a "__main__.py" file within a package, because its +"__name__" attribute will include the package’s path if imported: + + >>> import asyncio.__main__ + >>> asyncio.__main__.__name__ + 'asyncio.__main__' + +This won’t work for "__main__.py" files in the root directory of a +".zip" file though. Hence, for consistency, a minimal "__main__.py" +without a "__name__" check is preferred. + +See also: + + See "venv" for an example of a package with a minimal "__main__.py" + in the standard library. It doesn’t contain a "if __name__ == + '__main__'" block. You can invoke it with "python -m venv + [directory]". + + See "runpy" for more details on the "-m" flag to the interpreter + executable. + + See "zipapp" for how to run applications packaged as *.zip* files. + In this case Python looks for a "__main__.py" file in the root + directory of the archive. + + +"import __main__" +================= + +Regardless of which module a Python program was started with, other +modules running within that same program can import the top-level +environment’s scope (*namespace*) by importing the "__main__" module. +This doesn’t import a "__main__.py" file but rather whichever module +that received the special name "'__main__'". + +Here is an example module that consumes the "__main__" namespace: + + # namely.py + + import __main__ + + def did_user_define_their_name(): + return 'my_name' in dir(__main__) + + def print_user_name(): + if not did_user_define_their_name(): + raise ValueError('Define the variable `my_name`!') + + print(__main__.my_name) + +Example usage of this module could be as follows: + + # start.py + + import sys + + from namely import print_user_name + + # my_name = "Dinsdale" + + def main(): + try: + print_user_name() + except ValueError as ve: + return str(ve) + + if __name__ == "__main__": + sys.exit(main()) + +Now, if we started our program, the result would look like this: + + $ python start.py + Define the variable `my_name`! + +The exit code of the program would be 1, indicating an error. +Uncommenting the line with "my_name = "Dinsdale"" fixes the program +and now it exits with status code 0, indicating success: + + $ python start.py + Dinsdale + +Note that importing "__main__" doesn’t cause any issues with +unintentionally running top-level code meant for script use which is +put in the "if __name__ == "__main__"" block of the "start" module. +Why does this work? + +Python inserts an empty "__main__" module in "sys.modules" at +interpreter startup, and populates it by running top-level code. In +our example this is the "start" module which runs line by line and +imports "namely". In turn, "namely" imports "__main__" (which is +really "start"). That’s an import cycle! Fortunately, since the +partially populated "__main__" module is present in "sys.modules", +Python passes that to "namely". See Special considerations for +__main__ in the import system’s reference for details on how this +works. + +The Python REPL is another example of a “top-level environment”, so +anything defined in the REPL becomes part of the "__main__" scope: + + >>> import namely + >>> namely.did_user_define_their_name() + False + >>> namely.print_user_name() + Traceback (most recent call last): + ... + ValueError: Define the variable `my_name`! + >>> my_name = 'Jabberwocky' + >>> namely.did_user_define_their_name() + True + >>> namely.print_user_name() + Jabberwocky + +The "__main__" scope is used in the implementation of "pdb" and +"rlcompleter". +''', 'assert': r'''The "assert" statement ********************** @@ -435,9 +785,9 @@ async def func(param1, param2): 'atom-identifiers': r'''Identifiers (Names) ******************* -An identifier occurring as an atom is a name. See section Identifiers -and keywords for lexical definition and section Naming and binding for -documentation of naming and binding. +An identifier occurring as an atom is a name. See section Names +(identifiers and keywords) for lexical definition and section Naming +and binding for documentation of naming and binding. When the name is bound to an object, evaluation of the atom yields that object. When a name is not bound, an attempt to evaluate it @@ -492,19 +842,65 @@ async def func(param1, param2): Python supports string and bytes literals and various numeric literals: - literal: stringliteral | bytesliteral - | integer | floatnumber | imagnumber + literal: strings | NUMBER Evaluation of a literal yields an object of the given type (string, bytes, integer, floating-point number, complex number) with the given value. The value may be approximated in the case of floating-point -and imaginary (complex) literals. See section Literals for details. +and imaginary (complex) literals. See section Literals for details. +See section String literal concatenation for details on "strings". All literals correspond to immutable data types, and hence the object’s identity is less important than its value. Multiple evaluations of literals with the same value (either the same occurrence in the program text or a different occurrence) may obtain the same object or a different object with the same value. + + +String literal concatenation +============================ + +Multiple adjacent string or bytes literals (delimited by whitespace), +possibly using different quoting conventions, are allowed, and their +meaning is the same as their concatenation: + + >>> "hello" 'world' + "helloworld" + +Formally: + + strings: ( STRING | fstring)+ | tstring+ + +This feature is defined at the syntactical level, so it only works +with literals. To concatenate string expressions at run time, the ‘+’ +operator may be used: + + >>> greeting = "Hello" + >>> space = " " + >>> name = "Blaise" + >>> print(greeting + space + name) # not: print(greeting space name) + Hello Blaise + +Literal concatenation can freely mix raw strings, triple-quoted +strings, and formatted string literals. For example: + + >>> "Hello" r', ' f"{name}!" + "Hello, Blaise!" + +This feature can be used to reduce the number of backslashes needed, +to split long strings conveniently across long lines, or even to add +comments to parts of strings. For example: + + re.compile("[A-Za-z_]" # letter or underscore + "[A-Za-z0-9_]*" # letter, digit or underscore + ) + +However, bytes literals may only be combined with other byte literals; +not with string literals of any kind. Also, template string literals +may only be combined with other template string literals: + + >>> t"Hello" t"{name}!" + Template(strings=('Hello', '!'), interpolations=(...)) ''', 'attribute-access': r'''Customizing attribute access **************************** @@ -1314,6 +1710,9 @@ class Foo: class Foo(object): pass +There may be one or more base classes; see Multiple inheritance below +for more information. + The class’s suite is then executed in a new execution frame (see Naming and binding), using a newly created local namespace and the original global namespace. (Usually, the suite contains mostly @@ -1377,6 +1776,115 @@ class attributes; they are shared by instances. Instance attributes **PEP 3129** - Class Decorators The proposal that added class decorators. Function and method decorators were introduced in **PEP 318**. + + +Multiple inheritance +==================== + +Python classes may have multiple base classes, a technique known as +*multiple inheritance*. The base classes are specified in the class +definition by listing them in parentheses after the class name, +separated by commas. For example, the following class definition: + + >>> class A: pass + >>> class B: pass + >>> class C(A, B): pass + +defines a class "C" that inherits from classes "A" and "B". + +The *method resolution order* (MRO) is the order in which base classes +are searched when looking up an attribute on a class. See The Python +2.3 Method Resolution Order for a description of how Python determines +the MRO for a class. + +Multiple inheritance is not always allowed. Attempting to define a +class with multiple inheritance will raise an error if one of the +bases does not allow subclassing, if a consistent MRO cannot be +created, if no valid metaclass can be determined, or if there is an +instance layout conflict. We’ll discuss each of these in turn. + +First, all base classes must allow subclassing. While most classes +allow subclassing, some built-in classes do not, such as "bool": + + >>> class SubBool(bool): # TypeError + ... pass + Traceback (most recent call last): + ... + TypeError: type 'bool' is not an acceptable base type + +In the resolved MRO of a class, the class’s bases appear in the order +they were specified in the class’s bases list. Additionally, the MRO +always lists a child class before any of its bases. A class definition +will fail if it is impossible to resolve a consistent MRO that +satisfies these rules from the list of bases provided: + + >>> class Base: pass + >>> class Child(Base): pass + >>> class Grandchild(Base, Child): pass # TypeError + Traceback (most recent call last): + ... + TypeError: Cannot create a consistent method resolution order (MRO) for bases Base, Child + +In the MRO of "Grandchild", "Base" must appear before "Child" because +it is first in the base class list, but it must also appear after +"Child" because it is a parent of "Child". This is a contradiction, so +the class cannot be defined. + +If some of the bases have a custom *metaclass*, the metaclass of the +resulting class is chosen among the metaclasses of the bases and the +explicitly specified metaclass of the child class. It must be a +metaclass that is a subclass of all other candidate metaclasses. If no +such metaclass exists among the candidates, the class cannot be +created, as explained in Determining the appropriate metaclass. + +Finally, the instance layouts of the bases must be compatible. This +means that it must be possible to compute a *solid base* for the +class. Exactly which classes are solid bases depends on the Python +implementation. + +**CPython implementation detail:** In CPython, a class is a solid base +if it has a nonempty "__slots__" definition. Many but not all classes +defined in C are also solid bases, including most builtins (such as +"int" or "BaseException") but excluding most concrete "Exception" +classes. Generally, a C class is a solid base if its underlying struct +is different in size from its base class. + +Every class has a solid base. "object", the base class, has itself as +its solid base. If there is a single base, the child class’s solid +base is that class if it is a solid base, or else the base class’s +solid base. If there are multiple bases, we first find the solid base +for each base class to produce a list of candidate solid bases. If +there is a unique solid base that is a subclass of all others, then +that class is the solid base. Otherwise, class creation fails. + +Example: + + >>> class Solid1: + ... __slots__ = ("solid1",) + >>> + >>> class Solid2: + ... __slots__ = ("solid2",) + >>> + >>> class SolidChild(Solid1): + ... __slots__ = ("solid_child",) + >>> + >>> class C1: # solid base is `object` + ... pass + >>> + >>> # OK: solid bases are `Solid1` and `object`, and `Solid1` is a subclass of `object`. + >>> class C2(Solid1, C1): # solid base is `Solid1` + ... pass + >>> + >>> # OK: solid bases are `SolidChild` and `Solid1`, and `SolidChild` is a subclass of `Solid1`. + >>> class C3(SolidChild, Solid1): # solid base is `SolidChild` + ... pass + >>> + >>> # Error: solid bases are `Solid1` and `Solid2`, but neither is a subclass of the other. + >>> class C4(Solid1, Solid2): # error: no single solid base + ... pass + Traceback (most recent call last): + ... + TypeError: multiple bases have instance lay-out conflict ''', 'comparisons': r'''Comparisons *********** @@ -1724,16 +2232,16 @@ class attributes; they are shared by instances. Instance attributes The "for" statement is used to iterate over the elements of a sequence (such as a string, tuple or list) or other iterable object: - for_stmt: "for" target_list "in" starred_list ":" suite + for_stmt: "for" target_list "in" starred_expression_list ":" suite ["else" ":" suite] -The "starred_list" expression is evaluated once; it should yield an -*iterable* object. An *iterator* is created for that iterable. The -first item provided by the iterator is then assigned to the target -list using the standard rules for assignments (see Assignment -statements), and the suite is executed. This repeats for each item -provided by the iterator. When the iterator is exhausted, the suite -in the "else" clause, if present, is executed, and the loop +The "starred_expression_list" expression is evaluated once; it should +yield an *iterable* object. An *iterator* is created for that +iterable. The first item provided by the iterator is then assigned to +the target list using the standard rules for assignments (see +Assignment statements), and the suite is executed. This repeats for +each item provided by the iterator. When the iterator is exhausted, +the suite in the "else" clause, if present, is executed, and the loop terminates. A "break" statement executed in the first suite terminates the loop @@ -1922,7 +2430,8 @@ class attributes; they are shared by instances. Instance attributes group types, because that would have ambiguous semantics. It is not possible to mix "except" and "except*" in the same "try". -"break", "continue" and "return" cannot appear in an "except*" clause. +The "break", "continue", and "return" statements cannot appear in an +"except*" clause. "else" clause @@ -2329,7 +2838,8 @@ def foo(): The rule "strings" and the token "NUMBER" are defined in the standard Python grammar. Triple-quoted strings are supported. Raw strings and -byte strings are supported. f-strings are not supported. +byte strings are supported. f-strings and t-strings are not +supported. The forms "signed_number '+' NUMBER" and "signed_number '-' NUMBER" are for expressing complex numbers; they require a real number on the @@ -2866,6 +3376,9 @@ class Foo: class Foo(object): pass +There may be one or more base classes; see Multiple inheritance below +for more information. + The class’s suite is then executed in a new execution frame (see Naming and binding), using a newly created local namespace and the original global namespace. (Usually, the suite contains mostly @@ -2931,6 +3444,115 @@ class attributes; they are shared by instances. Instance attributes decorators were introduced in **PEP 318**. +Multiple inheritance +-------------------- + +Python classes may have multiple base classes, a technique known as +*multiple inheritance*. The base classes are specified in the class +definition by listing them in parentheses after the class name, +separated by commas. For example, the following class definition: + + >>> class A: pass + >>> class B: pass + >>> class C(A, B): pass + +defines a class "C" that inherits from classes "A" and "B". + +The *method resolution order* (MRO) is the order in which base classes +are searched when looking up an attribute on a class. See The Python +2.3 Method Resolution Order for a description of how Python determines +the MRO for a class. + +Multiple inheritance is not always allowed. Attempting to define a +class with multiple inheritance will raise an error if one of the +bases does not allow subclassing, if a consistent MRO cannot be +created, if no valid metaclass can be determined, or if there is an +instance layout conflict. We’ll discuss each of these in turn. + +First, all base classes must allow subclassing. While most classes +allow subclassing, some built-in classes do not, such as "bool": + + >>> class SubBool(bool): # TypeError + ... pass + Traceback (most recent call last): + ... + TypeError: type 'bool' is not an acceptable base type + +In the resolved MRO of a class, the class’s bases appear in the order +they were specified in the class’s bases list. Additionally, the MRO +always lists a child class before any of its bases. A class definition +will fail if it is impossible to resolve a consistent MRO that +satisfies these rules from the list of bases provided: + + >>> class Base: pass + >>> class Child(Base): pass + >>> class Grandchild(Base, Child): pass # TypeError + Traceback (most recent call last): + ... + TypeError: Cannot create a consistent method resolution order (MRO) for bases Base, Child + +In the MRO of "Grandchild", "Base" must appear before "Child" because +it is first in the base class list, but it must also appear after +"Child" because it is a parent of "Child". This is a contradiction, so +the class cannot be defined. + +If some of the bases have a custom *metaclass*, the metaclass of the +resulting class is chosen among the metaclasses of the bases and the +explicitly specified metaclass of the child class. It must be a +metaclass that is a subclass of all other candidate metaclasses. If no +such metaclass exists among the candidates, the class cannot be +created, as explained in Determining the appropriate metaclass. + +Finally, the instance layouts of the bases must be compatible. This +means that it must be possible to compute a *solid base* for the +class. Exactly which classes are solid bases depends on the Python +implementation. + +**CPython implementation detail:** In CPython, a class is a solid base +if it has a nonempty "__slots__" definition. Many but not all classes +defined in C are also solid bases, including most builtins (such as +"int" or "BaseException") but excluding most concrete "Exception" +classes. Generally, a C class is a solid base if its underlying struct +is different in size from its base class. + +Every class has a solid base. "object", the base class, has itself as +its solid base. If there is a single base, the child class’s solid +base is that class if it is a solid base, or else the base class’s +solid base. If there are multiple bases, we first find the solid base +for each base class to produce a list of candidate solid bases. If +there is a unique solid base that is a subclass of all others, then +that class is the solid base. Otherwise, class creation fails. + +Example: + + >>> class Solid1: + ... __slots__ = ("solid1",) + >>> + >>> class Solid2: + ... __slots__ = ("solid2",) + >>> + >>> class SolidChild(Solid1): + ... __slots__ = ("solid_child",) + >>> + >>> class C1: # solid base is `object` + ... pass + >>> + >>> # OK: solid bases are `Solid1` and `object`, and `Solid1` is a subclass of `object`. + >>> class C2(Solid1, C1): # solid base is `Solid1` + ... pass + >>> + >>> # OK: solid bases are `SolidChild` and `Solid1`, and `SolidChild` is a subclass of `Solid1`. + >>> class C3(SolidChild, Solid1): # solid base is `SolidChild` + ... pass + >>> + >>> # Error: solid bases are `Solid1` and `Solid2`, but neither is a subclass of the other. + >>> class C4(Solid1, Solid2): # error: no single solid base + ... pass + Traceback (most recent call last): + ... + TypeError: multiple bases have instance lay-out conflict + + Coroutines ========== @@ -3304,7 +3926,7 @@ def f() -> annotation: ... introspects and uses the annotations (such as "dataclasses" or "functools.singledispatch()"). -By default, annotations are lazily evaluated in a annotation scope. +By default, annotations are lazily evaluated in an annotation scope. This means that they are not evaluated when the code containing the annotation is evaluated. Instead, the interpreter saves information that can be used to evaluate the annotation later if requested. The @@ -3318,6 +3940,12 @@ def f() -> annotation: ... >>> f.__annotations__ {'param': 'annotation'} +This future statement will be deprecated and removed in a future +version of Python, but not before Python 3.13 reaches its end of life +(see **PEP 749**). When it is used, introspection tools like +"annotationlib.get_annotations()" and "typing.get_type_hints()" are +less likely to be able to resolve annotations at runtime. + -[ Footnotes ]- [1] The exception is propagated to the invocation stack unless there @@ -3832,7 +4460,7 @@ def double(x): You can also invoke "pdb" from the command line to debug other scripts. For example: - python -m pdb [-c command] (-m module | pyfile) [args ...] + python -m pdb [-c command] (-m module | -p pid | pyfile) [args ...] When invoked as a module, pdb will automatically enter post-mortem debugging if the program being debugged exits abnormally. After post- @@ -3856,6 +4484,23 @@ def double(x): Changed in version 3.7: Added the "-m" option. +-p, --pid + + Attach to the process with the specified PID. + + Added in version 3.14. + +To attach to a running Python process for remote debugging, use the +"-p" or "--pid" option with the target process’s PID: + + python -m pdb -p 1234 + +Note: + + Attaching to a process that is blocked in a system call or waiting + for I/O will only work once the next bytecode instruction is + executed or when the process receives a signal. + Typical usage to execute a statement under control of the debugger is: >>> import pdb @@ -5077,7 +5722,7 @@ class of the instance or a *non-virtual base class* thereof. The 'exprlists': r'''Expression lists **************** - starred_expression: ["*"] or_expr + starred_expression: "*" or_expr | expression flexible_expression: assignment_expression | starred_expression flexible_expression_list: flexible_expression ("," flexible_expression)* [","] starred_expression_list: starred_expression ("," starred_expression)* [","] @@ -5109,25 +5754,54 @@ class of the instance or a *non-virtual base class* thereof. The 'floating': r'''Floating-point literals *********************** -Floating-point literals are described by the following lexical -definitions: +Floating-point (float) literals, such as "3.14" or "1.5", denote +approximations of real numbers. - floatnumber: pointfloat | exponentfloat - pointfloat: [digitpart] fraction | digitpart "." - exponentfloat: (digitpart | pointfloat) exponent - digitpart: digit (["_"] digit)* - fraction: "." digitpart - exponent: ("e" | "E") ["+" | "-"] digitpart +They consist of *integer* and *fraction* parts, each composed of +decimal digits. The parts are separated by a decimal point, ".": -Note that the integer and exponent parts are always interpreted using -radix 10. For example, "077e010" is legal, and denotes the same number -as "77e10". The allowed range of floating-point literals is -implementation-dependent. As in integer literals, underscores are -supported for digit grouping. + 2.71828 + 4.0 -Some examples of floating-point literals: +Unlike in integer literals, leading zeros are allowed in the numeric +parts. For example, "077.010" is legal, and denotes the same number as +"77.10". - 3.14 10. .001 1e100 3.14e-10 0e0 3.14_15_93 +As in integer literals, single underscores may occur between digits to +help readability: + + 96_485.332_123 + 3.14_15_93 + +Either of these parts, but not both, can be empty. For example: + + 10. # (equivalent to 10.0) + .001 # (equivalent to 0.001) + +Optionally, the integer and fraction may be followed by an *exponent*: +the letter "e" or "E", followed by an optional sign, "+" or "-", and a +number in the same format as the integer and fraction parts. The "e" +or "E" represents “times ten raised to the power of”: + + 1.0e3 # (represents 1.0×10³, or 1000.0) + 1.166e-5 # (represents 1.166×10⁻⁵, or 0.00001166) + 6.02214076e+23 # (represents 6.02214076×10²³, or 602214076000000000000000.) + +In floats with only integer and exponent parts, the decimal point may +be omitted: + + 1e3 # (equivalent to 1.e3 and 1.0e3) + 0e0 # (equivalent to 0.) + +Formally, floating-point literals are described by the following +lexical definitions: + + floatnumber: + | digitpart "." [digitpart] [exponent] + | "." digitpart [exponent] + | digitpart exponent + digitpart: digit (["_"] digit)* + exponent: ("e" | "E") ["+" | "-"] digitpart Changed in version 3.6: Underscores are now allowed for grouping purposes in literals. @@ -5138,16 +5812,16 @@ class of the instance or a *non-virtual base class* thereof. The The "for" statement is used to iterate over the elements of a sequence (such as a string, tuple or list) or other iterable object: - for_stmt: "for" target_list "in" starred_list ":" suite + for_stmt: "for" target_list "in" starred_expression_list ":" suite ["else" ":" suite] -The "starred_list" expression is evaluated once; it should yield an -*iterable* object. An *iterator* is created for that iterable. The -first item provided by the iterator is then assigned to the target -list using the standard rules for assignments (see Assignment -statements), and the suite is executed. This repeats for each item -provided by the iterator. When the iterator is exhausted, the suite -in the "else" clause, if present, is executed, and the loop +The "starred_expression_list" expression is evaluated once; it should +yield an *iterable* object. An *iterator* is created for that +iterable. The first item provided by the iterator is then assigned to +the target list using the standard rules for assignments (see +Assignment statements), and the suite is executed. This repeats for +each item provided by the iterator. When the iterator is exhausted, +the suite in the "else" clause, if present, is executed, and the loop terminates. A "break" statement executed in the first suite terminates the loop @@ -5181,9 +5855,9 @@ class of the instance or a *non-virtual base class* thereof. The The "str.format()" method and the "Formatter" class share the same syntax for format strings (although in the case of "Formatter", subclasses can define their own format string syntax). The syntax is -related to that of formatted string literals, but it is less -sophisticated and, in particular, does not support arbitrary -expressions. +related to that of formatted string literals and template string +literals, but it is less sophisticated and, in particular, does not +support arbitrary expressions in interpolations. Format strings contain “replacement fields” surrounded by curly braces "{}". Anything that is not contained in braces is considered literal @@ -5283,9 +5957,9 @@ class of the instance or a *non-virtual base class* thereof. The “Format specifications” are used within replacement fields contained within a format string to define how individual values are presented -(see Format String Syntax and f-strings). They can also be passed -directly to the built-in "format()" function. Each formattable type -may define how the format specification is to be interpreted. +(see Format String Syntax, f-strings, and t-strings). They can also be +passed directly to the built-in "format()" function. Each formattable +type may define how the format specification is to be interpreted. Most built-in types implement the following options for format specifications, although some of the formatting options are only @@ -5304,7 +5978,7 @@ class of the instance or a *non-virtual base class* thereof. The sign: "+" | "-" | " " width_and_precision: [width_with_grouping][precision_with_grouping] width_with_grouping: [width][grouping] - precision_with_grouping: "." [precision][grouping] + precision_with_grouping: "." [precision][grouping] | "." grouping width: digit+ precision: digit+ grouping: "," | "_" @@ -5942,73 +6616,92 @@ class body. A "SyntaxError" is raised if a variable is used or to help avoid name clashes between “private” attributes of base and derived classes. See section Identifiers (Names). ''', - 'identifiers': r'''Identifiers and keywords -************************ + 'identifiers': r'''Names (identifiers and keywords) +******************************** -Identifiers (also referred to as *names*) are described by the -following lexical definitions. - -The syntax of identifiers in Python is based on the Unicode standard -annex UAX-31, with elaboration and changes as defined below; see also -**PEP 3131** for further details. +"NAME" tokens represent *identifiers*, *keywords*, and *soft +keywords*. Within the ASCII range (U+0001..U+007F), the valid characters for -identifiers include the uppercase and lowercase letters "A" through -"Z", the underscore "_" and, except for the first character, the -digits "0" through "9". Python 3.0 introduced additional characters -from outside the ASCII range (see **PEP 3131**). For these -characters, the classification uses the version of the Unicode -Character Database as included in the "unicodedata" module. +names include the uppercase and lowercase letters ("A-Z" and "a-z"), +the underscore "_" and, except for the first character, the digits "0" +through "9". + +Names must contain at least one character, but have no upper length +limit. Case is significant. + +Besides "A-Z", "a-z", "_" and "0-9", names can also use “letter-like” +and “number-like” characters from outside the ASCII range, as detailed +below. + +All identifiers are converted into the normalization form NFKC while +parsing; comparison of identifiers is based on NFKC. + +Formally, the first character of a normalized identifier must belong +to the set "id_start", which is the union of: -Identifiers are unlimited in length. Case is significant. +* Unicode category "" - uppercase letters (includes "A" to "Z") - identifier: xid_start xid_continue* - id_start: - id_continue: - xid_start: - xid_continue: +* Unicode category "" - lowercase letters (includes "a" to "z") -The Unicode category codes mentioned above stand for: +* Unicode category "" - titlecase letters -* *Lu* - uppercase letters +* Unicode category "" - modifier letters -* *Ll* - lowercase letters +* Unicode category "" - other letters -* *Lt* - titlecase letters +* Unicode category "" - letter numbers -* *Lm* - modifier letters +* {""_""} - the underscore -* *Lo* - other letters +* "" - an explicit set of characters in PropList.txt + to support backwards compatibility -* *Nl* - letter numbers +The remaining characters must belong to the set "id_continue", which +is the union of: -* *Mn* - nonspacing marks +* all characters in "id_start" -* *Mc* - spacing combining marks +* Unicode category "" - decimal numbers (includes "0" to "9") -* *Nd* - decimal numbers +* Unicode category "" - connector punctuations -* *Pc* - connector punctuations +* Unicode category "" - nonspacing marks -* *Other_ID_Start* - explicit list of characters in PropList.txt to - support backwards compatibility +* Unicode category "" - spacing combining marks -* *Other_ID_Continue* - likewise +* "" - another explicit set of characters in + PropList.txt to support backwards compatibility -All identifiers are converted into the normal form NFKC while parsing; -comparison of identifiers is based on NFKC. +Unicode categories use the version of the Unicode Character Database +as included in the "unicodedata" module. -A non-normative HTML file listing all valid identifier characters for -Unicode 16.0.0 can be found at -https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt +These sets are based on the Unicode standard annex UAX-31. See also +**PEP 3131** for further details. + +Even more formally, names are described by the following lexical +definitions: + + NAME: xid_start xid_continue* + id_start: | | | | | | "_" | + id_continue: id_start | | | | | + xid_start: + xid_continue: + identifier: + +A non-normative listing of all valid identifier characters as defined +by Unicode is available in the DerivedCoreProperties.txt file in the +Unicode Character Database. Keywords ======== -The following identifiers are used as reserved words, or *keywords* of -the language, and cannot be used as ordinary identifiers. They must -be spelled exactly as written here: +The following names are used as reserved words, or *keywords* of the +language, and cannot be used as ordinary identifiers. They must be +spelled exactly as written here: False await else import pass None break except in raise @@ -6024,18 +6717,20 @@ class body. A "SyntaxError" is raised if a variable is used or Added in version 3.10. -Some identifiers are only reserved under specific contexts. These are -known as *soft keywords*. The identifiers "match", "case", "type" and -"_" can syntactically act as keywords in certain contexts, but this -distinction is done at the parser level, not when tokenizing. +Some names are only reserved under specific contexts. These are known +as *soft keywords*: + +* "match", "case", and "_", when used in the "match" statement. + +* "type", when used in the "type" statement. + +These syntactically act as keywords in their specific contexts, but +this distinction is done at the parser level, not when tokenizing. As soft keywords, their use in the grammar is possible while still preserving compatibility with existing code that uses these names as identifier names. -"match", "case", and "_" are used in the "match" statement. "type" is -used in the "type" statement. - Changed in version 3.12: "type" is now a soft keyword. @@ -6101,17 +6796,53 @@ class body. A "SyntaxError" is raised if a variable is used or 'imaginary': r'''Imaginary literals ****************** -Imaginary literals are described by the following lexical definitions: +Python has complex number objects, but no complex literals. Instead, +*imaginary literals* denote complex numbers with a zero real part. - imagnumber: (floatnumber | digitpart) ("j" | "J") +For example, in math, the complex number 3+4.2*i* is written as the +real number 3 added to the imaginary number 4.2*i*. Python uses a +similar syntax, except the imaginary unit is written as "j" rather +than *i*: + + 3+4.2j + +This is an expression composed of the integer literal "3", the +operator ‘"+"’, and the imaginary literal "4.2j". Since these are +three separate tokens, whitespace is allowed between them: + + 3 + 4.2j + +No whitespace is allowed *within* each token. In particular, the "j" +suffix, may not be separated from the number before it. + +The number before the "j" has the same syntax as a floating-point +literal. Thus, the following are valid imaginary literals: + + 4.2j + 3.14j + 10.j + .001j + 1e100j + 3.14e-10j + 3.14_15_93j -An imaginary literal yields a complex number with a real part of 0.0. -Complex numbers are represented as a pair of floating-point numbers -and have the same restrictions on their range. To create a complex -number with a nonzero real part, add a floating-point number to it, -e.g., "(3+4j)". Some examples of imaginary literals: +Unlike in a floating-point literal the decimal point can be omitted if +the imaginary number only has an integer part. The number is still +evaluated as a floating-point number, not an integer: - 3.14j 10.j 10j .001j 1e100j 3.14e-10j 3.14_15_93j + 10j + 0j + 1000000000000000000000000j # equivalent to 1e+24j + +The "j" suffix is case-insensitive. That means you can use "J" +instead: + + 3.14J # equivalent to 3.14j + +Formally, imaginary literals are described by the following lexical +definition: + + imagnumber: (floatnumber | digitpart) ("j" | "J") ''', 'import': r'''The "import" statement ********************** @@ -6353,39 +7084,64 @@ class body. A "SyntaxError" is raised if a variable is used or 'integers': r'''Integer literals **************** -Integer literals are described by the following lexical definitions: +Integer literals denote whole numbers. For example: - integer: decinteger | bininteger | octinteger | hexinteger - decinteger: nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")* - bininteger: "0" ("b" | "B") (["_"] bindigit)+ - octinteger: "0" ("o" | "O") (["_"] octdigit)+ - hexinteger: "0" ("x" | "X") (["_"] hexdigit)+ - nonzerodigit: "1"..."9" - digit: "0"..."9" - bindigit: "0" | "1" - octdigit: "0"..."7" - hexdigit: digit | "a"..."f" | "A"..."F" + 7 + 3 + 2147483647 There is no limit for the length of integer literals apart from what -can be stored in available memory. +can be stored in available memory: -Underscores are ignored for determining the numeric value of the -literal. They can be used to group digits for enhanced readability. -One underscore can occur between digits, and after base specifiers -like "0x". + 7922816251426433759354395033679228162514264337593543950336 -Note that leading zeros in a non-zero decimal number are not allowed. -This is for disambiguation with C-style octal literals, which Python -used before version 3.0. +Underscores can be used to group digits for enhanced readability, and +are ignored for determining the numeric value of the literal. For +example, the following literals are equivalent: -Some examples of integer literals: + 100_000_000_000 + 100000000000 + 1_00_00_00_00_000 - 7 2147483647 0o177 0b100110111 - 3 79228162514264337593543950336 0o377 0xdeadbeef - 100_000_000_000 0b_1110_0101 +Underscores can only occur between digits. For example, "_123", +"321_", and "123__321" are *not* valid literals. -Changed in version 3.6: Underscores are now allowed for grouping -purposes in literals. +Integers can be specified in binary (base 2), octal (base 8), or +hexadecimal (base 16) using the prefixes "0b", "0o" and "0x", +respectively. Hexadecimal digits 10 through 15 are represented by +letters "A"-"F", case-insensitive. For example: + + 0b100110111 + 0b_1110_0101 + 0o177 + 0o377 + 0xdeadbeef + 0xDead_Beef + +An underscore can follow the base specifier. For example, "0x_1f" is a +valid literal, but "0_x1f" and "0x__1f" are not. + +Leading zeros in a non-zero decimal number are not allowed. For +example, "0123" is not a valid literal. This is for disambiguation +with C-style octal literals, which Python used before version 3.0. + +Formally, integer literals are described by the following lexical +definitions: + + integer: decinteger | bininteger | octinteger | hexinteger | zerointeger + decinteger: nonzerodigit (["_"] digit)* + bininteger: "0" ("b" | "B") (["_"] bindigit)+ + octinteger: "0" ("o" | "O") (["_"] octdigit)+ + hexinteger: "0" ("x" | "X") (["_"] hexdigit)+ + zerointeger: "0"+ (["_"] "0")* + nonzerodigit: "1"..."9" + digit: "0"..."9" + bindigit: "0" | "1" + octdigit: "0"..."7" + hexdigit: digit | "a"..."f" | "A"..."F" + +Changed in version 3.6: Underscores are now allowed for grouping +purposes in literals. ''', 'lambda': r'''Lambdas ******* @@ -6730,14 +7486,190 @@ class body. A "SyntaxError" is raised if a variable is used or 'numbers': r'''Numeric literals **************** -There are three types of numeric literals: integers, floating-point -numbers, and imaginary numbers. There are no complex literals -(complex numbers can be formed by adding a real number and an -imaginary number). +"NUMBER" tokens represent numeric literals, of which there are three +types: integers, floating-point numbers, and imaginary numbers. + + NUMBER: integer | floatnumber | imagnumber + +The numeric value of a numeric literal is the same as if it were +passed as a string to the "int", "float" or "complex" class +constructor, respectively. Note that not all valid inputs for those +constructors are also valid literals. + +Numeric literals do not include a sign; a phrase like "-1" is actually +an expression composed of the unary operator ‘"-"’ and the literal +"1". + + +Integer literals +================ + +Integer literals denote whole numbers. For example: + + 7 + 3 + 2147483647 + +There is no limit for the length of integer literals apart from what +can be stored in available memory: + + 7922816251426433759354395033679228162514264337593543950336 + +Underscores can be used to group digits for enhanced readability, and +are ignored for determining the numeric value of the literal. For +example, the following literals are equivalent: + + 100_000_000_000 + 100000000000 + 1_00_00_00_00_000 + +Underscores can only occur between digits. For example, "_123", +"321_", and "123__321" are *not* valid literals. + +Integers can be specified in binary (base 2), octal (base 8), or +hexadecimal (base 16) using the prefixes "0b", "0o" and "0x", +respectively. Hexadecimal digits 10 through 15 are represented by +letters "A"-"F", case-insensitive. For example: + + 0b100110111 + 0b_1110_0101 + 0o177 + 0o377 + 0xdeadbeef + 0xDead_Beef + +An underscore can follow the base specifier. For example, "0x_1f" is a +valid literal, but "0_x1f" and "0x__1f" are not. + +Leading zeros in a non-zero decimal number are not allowed. For +example, "0123" is not a valid literal. This is for disambiguation +with C-style octal literals, which Python used before version 3.0. + +Formally, integer literals are described by the following lexical +definitions: + + integer: decinteger | bininteger | octinteger | hexinteger | zerointeger + decinteger: nonzerodigit (["_"] digit)* + bininteger: "0" ("b" | "B") (["_"] bindigit)+ + octinteger: "0" ("o" | "O") (["_"] octdigit)+ + hexinteger: "0" ("x" | "X") (["_"] hexdigit)+ + zerointeger: "0"+ (["_"] "0")* + nonzerodigit: "1"..."9" + digit: "0"..."9" + bindigit: "0" | "1" + octdigit: "0"..."7" + hexdigit: digit | "a"..."f" | "A"..."F" + +Changed in version 3.6: Underscores are now allowed for grouping +purposes in literals. + + +Floating-point literals +======================= + +Floating-point (float) literals, such as "3.14" or "1.5", denote +approximations of real numbers. + +They consist of *integer* and *fraction* parts, each composed of +decimal digits. The parts are separated by a decimal point, ".": + + 2.71828 + 4.0 + +Unlike in integer literals, leading zeros are allowed in the numeric +parts. For example, "077.010" is legal, and denotes the same number as +"77.10". + +As in integer literals, single underscores may occur between digits to +help readability: + + 96_485.332_123 + 3.14_15_93 + +Either of these parts, but not both, can be empty. For example: + + 10. # (equivalent to 10.0) + .001 # (equivalent to 0.001) + +Optionally, the integer and fraction may be followed by an *exponent*: +the letter "e" or "E", followed by an optional sign, "+" or "-", and a +number in the same format as the integer and fraction parts. The "e" +or "E" represents “times ten raised to the power of”: + + 1.0e3 # (represents 1.0×10³, or 1000.0) + 1.166e-5 # (represents 1.166×10⁻⁵, or 0.00001166) + 6.02214076e+23 # (represents 6.02214076×10²³, or 602214076000000000000000.) + +In floats with only integer and exponent parts, the decimal point may +be omitted: + + 1e3 # (equivalent to 1.e3 and 1.0e3) + 0e0 # (equivalent to 0.) + +Formally, floating-point literals are described by the following +lexical definitions: + + floatnumber: + | digitpart "." [digitpart] [exponent] + | "." digitpart [exponent] + | digitpart exponent + digitpart: digit (["_"] digit)* + exponent: ("e" | "E") ["+" | "-"] digitpart + +Changed in version 3.6: Underscores are now allowed for grouping +purposes in literals. + + +Imaginary literals +================== + +Python has complex number objects, but no complex literals. Instead, +*imaginary literals* denote complex numbers with a zero real part. + +For example, in math, the complex number 3+4.2*i* is written as the +real number 3 added to the imaginary number 4.2*i*. Python uses a +similar syntax, except the imaginary unit is written as "j" rather +than *i*: + + 3+4.2j + +This is an expression composed of the integer literal "3", the +operator ‘"+"’, and the imaginary literal "4.2j". Since these are +three separate tokens, whitespace is allowed between them: + + 3 + 4.2j -Note that numeric literals do not include a sign; a phrase like "-1" -is actually an expression composed of the unary operator ‘"-"’ and the -literal "1". +No whitespace is allowed *within* each token. In particular, the "j" +suffix, may not be separated from the number before it. + +The number before the "j" has the same syntax as a floating-point +literal. Thus, the following are valid imaginary literals: + + 4.2j + 3.14j + 10.j + .001j + 1e100j + 3.14e-10j + 3.14_15_93j + +Unlike in a floating-point literal the decimal point can be omitted if +the imaginary number only has an integer part. The number is still +evaluated as a floating-point number, not an integer: + + 10j + 0j + 1000000000000000000000000j # equivalent to 1e+24j + +The "j" suffix is case-insensitive. That means you can use "J" +instead: + + 3.14J # equivalent to 3.14j + +Formally, imaginary literals are described by the following lexical +definition: + + imagnumber: (floatnumber | digitpart) ("j" | "J") ''', 'numeric-types': r'''Emulating numeric types *********************** @@ -6807,9 +7739,9 @@ class that has an "__rsub__()" method, "type(y).__rsub__(y, x)" is third argument if the three-argument version of the built-in "pow()" function is to be supported. - Changed in version 3.14.0a7 (unreleased): Three-argument "pow()" - now try calling "__rpow__()" if necessary. Previously it was only - called in two-argument "pow()" and the binary power operator. + Changed in version 3.14: Three-argument "pow()" now try calling + "__rpow__()" if necessary. Previously it was only called in two- + argument "pow()" and the binary power operator. Note: @@ -8845,9 +9777,9 @@ class that has an "__rsub__()" method, "type(y).__rsub__(y, x)" is third argument if the three-argument version of the built-in "pow()" function is to be supported. - Changed in version 3.14.0a7 (unreleased): Three-argument "pow()" - now try calling "__rpow__()" if necessary. Previously it was only - called in two-argument "pow()" and the binary power operator. + Changed in version 3.14: Three-argument "pow()" now try calling + "__rpow__()" if necessary. Previously it was only called in two- + argument "pow()" and the binary power operator. Note: @@ -9215,7 +10147,14 @@ class is used in a class pattern with positional arguments, each Return centered in a string of length *width*. Padding is done using the specified *fillchar* (default is an ASCII space). The original string is returned if *width* is less than or equal to - "len(s)". + "len(s)". For example: + + >>> 'Python'.center(10) + ' Python ' + >>> 'Python'.center(10, '-') + '--Python--' + >>> 'Python'.center(4) + 'Python' str.count(sub[, start[, end]]) @@ -9224,7 +10163,18 @@ class is used in a class pattern with positional arguments, each *end* are interpreted as in slice notation. If *sub* is empty, returns the number of empty strings between - characters which is the length of the string plus one. + characters which is the length of the string plus one. For example: + + >>> 'spam, spam, spam'.count('spam') + 3 + >>> 'spam, spam, spam'.count('spam', 5) + 2 + >>> 'spam, spam, spam'.count('spam', 5, 10) + 1 + >>> 'spam, spam, spam'.count('eggs') + 0 + >>> 'spam, spam, spam'.count('') + 17 str.encode(encoding='utf-8', errors='strict') @@ -9241,7 +10191,13 @@ class is used in a class pattern with positional arguments, each For performance reasons, the value of *errors* is not checked for validity unless an encoding error actually occurs, Python - Development Mode is enabled or a debug build is used. + Development Mode is enabled or a debug build is used. For example: + + >>> encoded_str_to_bytes = 'Python'.encode() + >>> type(encoded_str_to_bytes) + + >>> encoded_str_to_bytes + b'Python' Changed in version 3.1: Added support for keyword arguments. @@ -9254,6 +10210,19 @@ class is used in a class pattern with positional arguments, each otherwise return "False". *suffix* can also be a tuple of suffixes to look for. With optional *start*, test beginning at that position. With optional *end*, stop comparing at that position. + Using *start* and *end* is equivalent to + "str[start:end].endswith(suffix)". For example: + + >>> 'Python'.endswith('on') + True + >>> 'a tuple of suffixes'.endswith(('at', 'in')) + False + >>> 'a tuple of suffixes'.endswith(('at', 'es')) + True + >>> 'Python is amazing'.endswith('is', 0, 9) + True + + See also "startswith()" and "removesuffix()". str.expandtabs(tabsize=8) @@ -9269,12 +10238,15 @@ class is used in a class pattern with positional arguments, each ("\n") or return ("\r"), it is copied and the current column is reset to zero. Any other character is copied unchanged and the current column is incremented by one regardless of how the - character is represented when printed. + character is represented when printed. For example: - >>> '01\t012\t0123\t01234'.expandtabs() - '01 012 0123 01234' - >>> '01\t012\t0123\t01234'.expandtabs(4) - '01 012 0123 01234' + >>> '01\t012\t0123\t01234'.expandtabs() + '01 012 0123 01234' + >>> '01\t012\t0123\t01234'.expandtabs(4) + '01 012 0123 01234' + >>> print('01\t012\n0123\t01234'.expandtabs(4)) + 01 012 + 0123 01234 str.find(sub[, start[, end]]) @@ -9389,7 +10361,7 @@ class is used in a class pattern with positional arguments, each str.isidentifier() Return "True" if the string is a valid identifier according to the - language definition, section Identifiers and keywords. + language definition, section Names (identifiers and keywords). "keyword.iskeyword()" can be used to test whether string "s" is a reserved identifier, such as "def" and "class". @@ -9421,8 +10393,8 @@ class is used in a class pattern with positional arguments, each str.isprintable() - Return true if all characters in the string are printable, false if - it contains at least one non-printable character. + Return "True" if all characters in the string are printable, + "False" if it contains at least one non-printable character. Here “printable” means the character is suitable for "repr()" to use in its output; “non-printable” means that "repr()" on built-in @@ -9669,6 +10641,18 @@ class is used in a class pattern with positional arguments, each >>> ' 1 2 3 '.split() ['1', '2', '3'] + If *sep* is not specified or is "None" and *maxsplit* is "0", only + leading runs of consecutive whitespace are considered. + + For example: + + >>> "".split(None, 0) + [] + >>> " ".split(None, 0) + [] + >>> " foo ".split(maxsplit=0) + ['foo '] + str.splitlines(keepends=False) Return a list of the lines in the string, breaking at line @@ -9847,174 +10831,313 @@ class is used in a class pattern with positional arguments, each 'strings': '''String and Bytes literals ************************* -String literals are described by the following lexical definitions: - - stringliteral: [stringprefix](shortstring | longstring) - stringprefix: "r" | "u" | "R" | "U" | "f" | "F" - | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" - shortstring: "'" shortstringitem* "'" | '"' shortstringitem* '"' - longstring: "\'\'\'" longstringitem* "\'\'\'" | '"""' longstringitem* '"""' - shortstringitem: shortstringchar | stringescapeseq - longstringitem: longstringchar | stringescapeseq - shortstringchar: - longstringchar: - stringescapeseq: "\\" - - bytesliteral: bytesprefix(shortbytes | longbytes) - bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" - shortbytes: "'" shortbytesitem* "'" | '"' shortbytesitem* '"' - longbytes: "\'\'\'" longbytesitem* "\'\'\'" | '"""' longbytesitem* '"""' - shortbytesitem: shortbyteschar | bytesescapeseq - longbytesitem: longbyteschar | bytesescapeseq - shortbyteschar: - longbyteschar: - bytesescapeseq: "\\" - -One syntactic restriction not indicated by these productions is that -whitespace is not allowed between the "stringprefix" or "bytesprefix" -and the rest of the literal. The source character set is defined by -the encoding declaration; it is UTF-8 if no encoding declaration is -given in the source file; see section Encoding declarations. - -In plain English: Both types of literals can be enclosed in matching -single quotes ("'") or double quotes ("""). They can also be enclosed -in matching groups of three single or double quotes (these are -generally referred to as *triple-quoted strings*). The backslash ("\\") -character is used to give special meaning to otherwise ordinary -characters like "n", which means ‘newline’ when escaped ("\\n"). It can -also be used to escape characters that otherwise have a special -meaning, such as newline, backslash itself, or the quote character. -See escape sequences below for examples. - -Bytes literals are always prefixed with "'b'" or "'B'"; they produce -an instance of the "bytes" type instead of the "str" type. They may -only contain ASCII characters; bytes with a numeric value of 128 or -greater must be expressed with escapes. +String literals are text enclosed in single quotes ("'") or double +quotes ("""). For example: -Both string and bytes literals may optionally be prefixed with a -letter "'r'" or "'R'"; such constructs are called *raw string -literals* and *raw bytes literals* respectively and treat backslashes -as literal characters. As a result, in raw string literals, "'\\U'" -and "'\\u'" escapes are not treated specially. + "spam" + 'eggs' + +The quote used to start the literal also terminates it, so a string +literal can only contain the other quote (except with escape +sequences, see below). For example: + + 'Say "Hello", please.' + "Don't do that!" + +Except for this limitation, the choice of quote character ("'" or """) +does not affect how the literal is parsed. + +Inside a string literal, the backslash ("\\") character introduces an +*escape sequence*, which has special meaning depending on the +character after the backslash. For example, "\\"" denotes the double +quote character, and does *not* end the string: + + >>> print("Say \\"Hello\\" to everyone!") + Say "Hello" to everyone! + +See escape sequences below for a full list of such sequences, and more +details. + + +Triple-quoted strings +===================== + +Strings can also be enclosed in matching groups of three single or +double quotes. These are generally referred to as *triple-quoted +strings*: + + """This is a triple-quoted string.""" + +In triple-quoted literals, unescaped quotes are allowed (and are +retained), except that three unescaped quotes in a row terminate the +literal, if they are of the same kind ("'" or """) used at the start: + + """This string has "quotes" inside.""" + +Unescaped newlines are also allowed and retained: + + \'\'\'This triple-quoted string + continues on the next line.\'\'\' + + +String prefixes +=============== + +String literals can have an optional *prefix* that influences how the +content of the literal is parsed, for example: + + b"data" + f'{result=}' + +The allowed prefixes are: + +* "b": Bytes literal + +* "r": Raw string + +* "f": Formatted string literal (“f-string”) + +* "t": Template string literal (“t-string”) + +* "u": No effect (allowed for backwards compatibility) + +See the linked sections for details on each type. + +Prefixes are case-insensitive (for example, ‘"B"’ works the same as +‘"b"’). The ‘"r"’ prefix can be combined with ‘"f"’, ‘"t"’ or ‘"b"’, +so ‘"fr"’, ‘"rf"’, ‘"tr"’, ‘"rt"’, ‘"br"’, and ‘"rb"’ are also valid +prefixes. Added in version 3.3: The "'rb'" prefix of raw bytes literals has been added as a synonym of "'br'".Support for the unicode legacy literal ("u'value'") was reintroduced to simplify the maintenance of dual Python 2.x and 3.x codebases. See **PEP 414** for more information. -A string literal with "'f'" or "'F'" in its prefix is a *formatted -string literal*; see f-strings. The "'f'" may be combined with "'r'", -but not with "'b'" or "'u'", therefore raw formatted strings are -possible, but formatted bytes literals are not. -In triple-quoted literals, unescaped newlines and quotes are allowed -(and are retained), except that three unescaped quotes in a row -terminate the literal. (A “quote” is the character used to open the -literal, i.e. either "'" or """.) +Formal grammar +============== + +String literals, except “f-strings” and “t-strings”, are described by +the following lexical definitions. + +These definitions use negative lookaheads ("!") to indicate that an +ending quote ends the literal. + + STRING: [stringprefix] (stringcontent) + stringprefix: <("r" | "u" | "b" | "br" | "rb"), case-insensitive> + stringcontent: + | "'" ( !"'" stringitem)* "'" + | '"' ( !'"' stringitem)* '"' + | "\'\'\'" ( !"\'\'\'" longstringitem)* "\'\'\'" + | '"""' ( !'"""' longstringitem)* '"""' + stringitem: stringchar | stringescapeseq + stringchar: + longstringitem: stringitem | newline + stringescapeseq: "\\" + +Note that as in all lexical definitions, whitespace is significant. In +particular, the prefix (if any) must be immediately followed by the +starting quote. Escape sequences ================ -Unless an "'r'" or "'R'" prefix is present, escape sequences in string +Unless an ‘"r"’ or ‘"R"’ prefix is present, escape sequences in string and bytes literals are interpreted according to rules similar to those used by Standard C. The recognized escape sequences are: -+---------------------------+-----------------------------------+---------+ -| Escape Sequence | Meaning | Notes | -|===========================|===================================|=========| -| "\\" | Backslash and newline ignored | (1) | -+---------------------------+-----------------------------------+---------+ -| "\\\\" | Backslash ("\\") | | -+---------------------------+-----------------------------------+---------+ -| "\\'" | Single quote ("'") | | -+---------------------------+-----------------------------------+---------+ -| "\\"" | Double quote (""") | | -+---------------------------+-----------------------------------+---------+ -| "\\a" | ASCII Bell (BEL) | | -+---------------------------+-----------------------------------+---------+ -| "\\b" | ASCII Backspace (BS) | | -+---------------------------+-----------------------------------+---------+ -| "\\f" | ASCII Formfeed (FF) | | -+---------------------------+-----------------------------------+---------+ -| "\\n" | ASCII Linefeed (LF) | | -+---------------------------+-----------------------------------+---------+ -| "\\r" | ASCII Carriage Return (CR) | | -+---------------------------+-----------------------------------+---------+ -| "\\t" | ASCII Horizontal Tab (TAB) | | -+---------------------------+-----------------------------------+---------+ -| "\\v" | ASCII Vertical Tab (VT) | | -+---------------------------+-----------------------------------+---------+ -| "\\*ooo*" | Character with octal value *ooo* | (2,4) | -+---------------------------+-----------------------------------+---------+ -| "\\x*hh*" | Character with hex value *hh* | (3,4) | -+---------------------------+-----------------------------------+---------+ - -Escape sequences only recognized in string literals are: - -+---------------------------+-----------------------------------+---------+ -| Escape Sequence | Meaning | Notes | -|===========================|===================================|=========| -| "\\N{*name*}" | Character named *name* in the | (5) | -| | Unicode database | | -+---------------------------+-----------------------------------+---------+ -| "\\u*xxxx*" | Character with 16-bit hex value | (6) | -| | *xxxx* | | -+---------------------------+-----------------------------------+---------+ -| "\\U*xxxxxxxx*" | Character with 32-bit hex value | (7) | -| | *xxxxxxxx* | | -+---------------------------+-----------------------------------+---------+ ++----------------------------------------------------+----------------------------------------------------+ +| Escape Sequence | Meaning | +|====================================================|====================================================| +| "\\" | Ignored end of line | ++----------------------------------------------------+----------------------------------------------------+ +| "\\\\" | Backslash | ++----------------------------------------------------+----------------------------------------------------+ +| "\\'" | Single quote | ++----------------------------------------------------+----------------------------------------------------+ +| "\\"" | Double quote | ++----------------------------------------------------+----------------------------------------------------+ +| "\\a" | ASCII Bell (BEL) | ++----------------------------------------------------+----------------------------------------------------+ +| "\\b" | ASCII Backspace (BS) | ++----------------------------------------------------+----------------------------------------------------+ +| "\\f" | ASCII Formfeed (FF) | ++----------------------------------------------------+----------------------------------------------------+ +| "\\n" | ASCII Linefeed (LF) | ++----------------------------------------------------+----------------------------------------------------+ +| "\\r" | ASCII Carriage Return (CR) | ++----------------------------------------------------+----------------------------------------------------+ +| "\\t" | ASCII Horizontal Tab (TAB) | ++----------------------------------------------------+----------------------------------------------------+ +| "\\v" | ASCII Vertical Tab (VT) | ++----------------------------------------------------+----------------------------------------------------+ +| "\\*ooo*" | Octal character | ++----------------------------------------------------+----------------------------------------------------+ +| "\\x*hh*" | Hexadecimal character | ++----------------------------------------------------+----------------------------------------------------+ +| "\\N{*name*}" | Named Unicode character | ++----------------------------------------------------+----------------------------------------------------+ +| "\\u*xxxx*" | Hexadecimal Unicode character | ++----------------------------------------------------+----------------------------------------------------+ +| "\\U*xxxxxxxx*" | Hexadecimal Unicode character | ++----------------------------------------------------+----------------------------------------------------+ -Notes: -1. A backslash can be added at the end of a line to ignore the - newline: +Ignored end of line +------------------- - >>> 'This string will not include \\ - ... backslashes or newline characters.' - 'This string will not include backslashes or newline characters.' +A backslash can be added at the end of a line to ignore the newline: - The same result can be achieved using triple-quoted strings, or - parentheses and string literal concatenation. + >>> 'This string will not include \\ + ... backslashes or newline characters.' + 'This string will not include backslashes or newline characters.' -2. As in Standard C, up to three octal digits are accepted. +The same result can be achieved using triple-quoted strings, or +parentheses and string literal concatenation. - Changed in version 3.11: Octal escapes with value larger than - "0o377" produce a "DeprecationWarning". - Changed in version 3.12: Octal escapes with value larger than - "0o377" produce a "SyntaxWarning". In a future Python version they - will be eventually a "SyntaxError". +Escaped characters +------------------ + +To include a backslash in a non-raw Python string literal, it must be +doubled. The "\\\\" escape sequence denotes a single backslash +character: + + >>> print('C:\\\\Program Files') + C:\\Program Files + +Similarly, the "\\'" and "\\"" sequences denote the single and double +quote character, respectively: + + >>> print('\\' and \\"') + ' and " + + +Octal character +--------------- + +The sequence "\\*ooo*" denotes a *character* with the octal (base 8) +value *ooo*: -3. Unlike in Standard C, exactly two hex digits are required. + >>> '\\120' + 'P' -4. In a bytes literal, hexadecimal and octal escapes denote the byte - with the given value. In a string literal, these escapes denote a - Unicode character with the given value. +Up to three octal digits (0 through 7) are accepted. -5. Changed in version 3.3: Support for name aliases [1] has been - added. +In a bytes literal, *character* means a *byte* with the given value. +In a string literal, it means a Unicode character with the given +value. -6. Exactly four hex digits are required. +Changed in version 3.11: Octal escapes with value larger than "0o377" +(255) produce a "DeprecationWarning". -7. Any Unicode character can be encoded this way. Exactly eight hex - digits are required. +Changed in version 3.12: Octal escapes with value larger than "0o377" +(255) produce a "SyntaxWarning". In a future Python version they will +raise a "SyntaxError". -Unlike Standard C, all unrecognized escape sequences are left in the -string unchanged, i.e., *the backslash is left in the result*. (This -behavior is useful when debugging: if an escape sequence is mistyped, -the resulting output is more easily recognized as broken.) It is also -important to note that the escape sequences only recognized in string -literals fall into the category of unrecognized escapes for bytes -literals. + +Hexadecimal character +--------------------- + +The sequence "\\x*hh*" denotes a *character* with the hex (base 16) +value *hh*: + + >>> '\\x50' + 'P' + +Unlike in Standard C, exactly two hex digits are required. + +In a bytes literal, *character* means a *byte* with the given value. +In a string literal, it means a Unicode character with the given +value. + + +Named Unicode character +----------------------- + +The sequence "\\N{*name*}" denotes a Unicode character with the given +*name*: + + >>> '\\N{LATIN CAPITAL LETTER P}' + 'P' + >>> '\\N{SNAKE}' + '🐍' + +This sequence cannot appear in bytes literals. + +Changed in version 3.3: Support for name aliases has been added. + + +Hexadecimal Unicode characters +------------------------------ + +These sequences "\\u*xxxx*" and "\\U*xxxxxxxx*" denote the Unicode +character with the given hex (base 16) value. Exactly four digits are +required for "\\u"; exactly eight digits are required for "\\U". The +latter can encode any Unicode character. + + >>> '\\u1234' + 'ሴ' + >>> '\\U0001f40d' + '🐍' + +These sequences cannot appear in bytes literals. + + +Unrecognized escape sequences +----------------------------- + +Unlike in Standard C, all unrecognized escape sequences are left in +the string unchanged, that is, *the backslash is left in the result*: + + >>> print('\\q') + \\q + >>> list('\\q') + ['\\\\', 'q'] + +Note that for bytes literals, the escape sequences only recognized in +string literals ("\\N...", "\\u...", "\\U...") fall into the category of +unrecognized escapes. Changed in version 3.6: Unrecognized escape sequences produce a "DeprecationWarning". Changed in version 3.12: Unrecognized escape sequences produce a -"SyntaxWarning". In a future Python version they will be eventually a +"SyntaxWarning". In a future Python version they will raise a "SyntaxError". + +Bytes literals +============== + +*Bytes literals* are always prefixed with ‘"b"’ or ‘"B"’; they produce +an instance of the "bytes" type instead of the "str" type. They may +only contain ASCII characters; bytes with a numeric value of 128 or +greater must be expressed with escape sequences (typically Hexadecimal +character or Octal character): + + >>> b'\\x89PNG\\r\\n\\x1a\\n' + b'\\x89PNG\\r\\n\\x1a\\n' + >>> list(b'\\x89PNG\\r\\n\\x1a\\n') + [137, 80, 78, 71, 13, 10, 26, 10] + +Similarly, a zero byte must be expressed using an escape sequence +(typically "\\0" or "\\x00"). + + +Raw string literals +=================== + +Both string and bytes literals may optionally be prefixed with a +letter ‘"r"’ or ‘"R"’; such constructs are called *raw string +literals* and *raw bytes literals* respectively and treat backslashes +as literal characters. As a result, in raw string literals, escape +sequences are not treated specially: + + >>> r'\\d{4}-\\d{2}-\\d{2}' + '\\\\d{4}-\\\\d{2}-\\\\d{2}' + Even in a raw literal, quotes can be escaped with a backslash, but the backslash remains in the result; for example, "r"\\""" is a valid string literal consisting of two characters: a backslash and a double @@ -10024,6 +11147,199 @@ class is used in a class pattern with positional arguments, each the following quote character). Note also that a single backslash followed by a newline is interpreted as those two characters as part of the literal, *not* as a line continuation. + + +f-strings +========= + +Added in version 3.6. + +A *formatted string literal* or *f-string* is a string literal that is +prefixed with ‘"f"’ or ‘"F"’. These strings may contain replacement +fields, which are expressions delimited by curly braces "{}". While +other string literals always have a constant value, formatted strings +are really expressions evaluated at run time. + +Escape sequences are decoded like in ordinary string literals (except +when a literal is also marked as a raw string). After decoding, the +grammar for the contents of the string is: + + f_string: (literal_char | "{{" | "}}" | replacement_field)* + replacement_field: "{" f_expression ["="] ["!" conversion] [":" format_spec] "}" + f_expression: (conditional_expression | "*" or_expr) + ("," conditional_expression | "," "*" or_expr)* [","] + | yield_expression + conversion: "s" | "r" | "a" + format_spec: (literal_char | replacement_field)* + literal_char: + +The parts of the string outside curly braces are treated literally, +except that any doubled curly braces "'{{'" or "'}}'" are replaced +with the corresponding single curly brace. A single opening curly +bracket "'{'" marks a replacement field, which starts with a Python +expression. To display both the expression text and its value after +evaluation, (useful in debugging), an equal sign "'='" may be added +after the expression. A conversion field, introduced by an exclamation +point "'!'" may follow. A format specifier may also be appended, +introduced by a colon "':'". A replacement field ends with a closing +curly bracket "'}'". + +Expressions in formatted string literals are treated like regular +Python expressions surrounded by parentheses, with a few exceptions. +An empty expression is not allowed, and both "lambda" and assignment +expressions ":=" must be surrounded by explicit parentheses. Each +expression is evaluated in the context where the formatted string +literal appears, in order from left to right. Replacement expressions +can contain newlines in both single-quoted and triple-quoted f-strings +and they can contain comments. Everything that comes after a "#" +inside a replacement field is a comment (even closing braces and +quotes). In that case, replacement fields must be closed in a +different line. + + >>> f"abc{a # This is a comment }" + ... + 3}" + 'abc5' + +Changed in version 3.7: Prior to Python 3.7, an "await" expression and +comprehensions containing an "async for" clause were illegal in the +expressions in formatted string literals due to a problem with the +implementation. + +Changed in version 3.12: Prior to Python 3.12, comments were not +allowed inside f-string replacement fields. + +When the equal sign "'='" is provided, the output will have the +expression text, the "'='" and the evaluated value. Spaces after the +opening brace "'{'", within the expression and after the "'='" are all +retained in the output. By default, the "'='" causes the "repr()" of +the expression to be provided, unless there is a format specified. +When a format is specified it defaults to the "str()" of the +expression unless a conversion "'!r'" is declared. + +Added in version 3.8: The equal sign "'='". + +If a conversion is specified, the result of evaluating the expression +is converted before formatting. Conversion "'!s'" calls "str()" on +the result, "'!r'" calls "repr()", and "'!a'" calls "ascii()". + +The result is then formatted using the "format()" protocol. The +format specifier is passed to the "__format__()" method of the +expression or conversion result. An empty string is passed when the +format specifier is omitted. The formatted result is then included in +the final value of the whole string. + +Top-level format specifiers may include nested replacement fields. +These nested fields may include their own conversion fields and format +specifiers, but may not include more deeply nested replacement fields. +The format specifier mini-language is the same as that used by the +"str.format()" method. + +Formatted string literals may be concatenated, but replacement fields +cannot be split across literals. + +Some examples of formatted string literals: + + >>> name = "Fred" + >>> f"He said his name is {name!r}." + "He said his name is 'Fred'." + >>> f"He said his name is {repr(name)}." # repr() is equivalent to !r + "He said his name is 'Fred'." + >>> width = 10 + >>> precision = 4 + >>> value = decimal.Decimal("12.34567") + >>> f"result: {value:{width}.{precision}}" # nested fields + 'result: 12.35' + >>> today = datetime(year=2017, month=1, day=27) + >>> f"{today:%B %d, %Y}" # using date format specifier + 'January 27, 2017' + >>> f"{today=:%B %d, %Y}" # using date format specifier and debugging + 'today=January 27, 2017' + >>> number = 1024 + >>> f"{number:#0x}" # using integer format specifier + '0x400' + >>> foo = "bar" + >>> f"{ foo = }" # preserves whitespace + " foo = 'bar'" + >>> line = "The mill's closed" + >>> f"{line = }" + 'line = "The mill\\'s closed"' + >>> f"{line = :20}" + "line = The mill's closed " + >>> f"{line = !r:20}" + 'line = "The mill\\'s closed" ' + +Reusing the outer f-string quoting type inside a replacement field is +permitted: + + >>> a = dict(x=2) + >>> f"abc {a["x"]} def" + 'abc 2 def' + +Changed in version 3.12: Prior to Python 3.12, reuse of the same +quoting type of the outer f-string inside a replacement field was not +possible. + +Backslashes are also allowed in replacement fields and are evaluated +the same way as in any other context: + + >>> a = ["a", "b", "c"] + >>> print(f"List a contains:\\n{"\\n".join(a)}") + List a contains: + a + b + c + +Changed in version 3.12: Prior to Python 3.12, backslashes were not +permitted inside an f-string replacement field. + +Formatted string literals cannot be used as docstrings, even if they +do not include expressions. + + >>> def foo(): + ... f"Not a docstring" + ... + >>> foo.__doc__ is None + True + +See also **PEP 498** for the proposal that added formatted string +literals, and "str.format()", which uses a related format string +mechanism. + + +t-strings +========= + +Added in version 3.14. + +A *template string literal* or *t-string* is a string literal that is +prefixed with ‘"t"’ or ‘"T"’. These strings follow the same syntax and +evaluation rules as formatted string literals, with the following +differences: + +* Rather than evaluating to a "str" object, template string literals + evaluate to a "string.templatelib.Template" object. + +* The "format()" protocol is not used. Instead, the format specifier + and conversions (if any) are passed to a new "Interpolation" object + that is created for each evaluated expression. It is up to code that + processes the resulting "Template" object to decide how to handle + format specifiers and conversions. + +* Format specifiers containing nested replacement fields are evaluated + eagerly, prior to being passed to the "Interpolation" object. For + instance, an interpolation of the form "{amount:.{precision}f}" will + evaluate the inner expression "{precision}" to determine the value + of the "format_spec" attribute. If "precision" were to be "2", the + resulting format specifier would be "'.2f'". + +* When the equals sign "'='" is provided in an interpolation + expression, the text of the expression is appended to the literal + string that precedes the relevant interpolation. This includes the + equals sign and any surrounding whitespace. The "Interpolation" + instance for the expression will be created as normal, except that + "conversion" will be set to ‘"r"’ ("repr()") by default. If an + explicit conversion or format specifier are provided, this will + override the default behaviour. ''', 'subscriptions': r'''Subscriptions ************* @@ -10266,7 +11582,8 @@ class is used in a class pattern with positional arguments, each group types, because that would have ambiguous semantics. It is not possible to mix "except" and "except*" in the same "try". -"break", "continue" and "return" cannot appear in an "except*" clause. +The "break", "continue", and "return" statements cannot appear in an +"except*" clause. "else" clause @@ -10979,7 +12296,7 @@ def foo(): "ImportWarning" when falling back to "__package__" during import resolution. - Deprecated since version 3.13, will be removed in version 3.15: + Deprecated since version 3.13, removed in version 3.15: "__package__" will cease to be set or taken into consideration by the import system or standard library. @@ -11053,11 +12370,10 @@ def foo(): It is **strongly** recommended that you use "module.__spec__.cached" instead of "module.__cached__". - Deprecated since version 3.13, will be removed in version 3.15: - Setting "__cached__" on a module while failing to set - "__spec__.cached" is deprecated. In Python 3.15, "__cached__" will - cease to be set or taken into consideration by the import system or - standard library. + Deprecated since version 3.13, removed in version 3.15: Setting + "__cached__" on a module while failing to set "__spec__.cached" is + deprecated. In Python 3.15, "__cached__" will cease to be set or + taken into consideration by the import system or standard library. Other writable attributes on module objects @@ -11168,11 +12484,20 @@ class method object, it is transformed into an instance method object | | collected during class body execution. See also: | | | "__annotations__ attributes". For best practices | | | on working with "__annotations__", please see | -| | "annotationlib". Where possible, use | +| | "annotationlib". Use | | | "annotationlib.get_annotations()" instead of | -| | accessing this attribute directly. Changed in | -| | version 3.14: Annotations are now lazily | -| | evaluated. See **PEP 649**. | +| | accessing this attribute directly. Warning: | +| | Accessing the "__annotations__" attribute directly | +| | on a class object may return annotations for the | +| | wrong class, specifically in certain cases where | +| | the class, its base class, or a metaclass is | +| | defined under "from __future__ import | +| | annotations". See **749** for details.This | +| | attribute does not exist on certain builtin | +| | classes. On user-defined classes without | +| | "__annotations__", it is an empty dictionary. | +| | Changed in version 3.14: Annotations are now | +| | lazily evaluated. See **PEP 649**. | +----------------------------------------------------+----------------------------------------------------+ | type.__annotate__() | The *annotate function* for this class, or "None" | | | if the class has no annotations. See also: | @@ -11724,8 +13049,15 @@ class dict(iterable, **kwargs) the keyword argument replaces the value from the positional argument. - To illustrate, the following examples all return a dictionary equal - to "{"one": 1, "two": 2, "three": 3}": + Providing keyword arguments as in the first example only works for + keys that are valid Python identifiers. Otherwise, any valid keys + can be used. + + Dictionaries compare equal if and only if they have the same "(key, + value)" pairs (regardless of ordering). Order comparisons (‘<’, + ‘<=’, ‘>=’, ‘>’) raise "TypeError". To illustrate dictionary + creation and equality, the following examples all return a + dictionary equal to "{"one": 1, "two": 2, "three": 3}": >>> a = dict(one=1, two=2, three=3) >>> b = {'one': 1, 'two': 2, 'three': 3} @@ -11740,6 +13072,29 @@ class dict(iterable, **kwargs) keys that are valid Python identifiers. Otherwise, any valid keys can be used. + Dictionaries preserve insertion order. Note that updating a key + does not affect the order. Keys added after deletion are inserted + at the end. + + >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} + >>> d + {'one': 1, 'two': 2, 'three': 3, 'four': 4} + >>> list(d) + ['one', 'two', 'three', 'four'] + >>> list(d.values()) + [1, 2, 3, 4] + >>> d["one"] = 42 + >>> d + {'one': 42, 'two': 2, 'three': 3, 'four': 4} + >>> del d["two"] + >>> d["two"] = None + >>> d + {'one': 42, 'three': 3, 'four': 4, 'two': None} + + Changed in version 3.7: Dictionary order is guaranteed to be + insertion order. This behavior was an implementation detail of + CPython from 3.6. + These are the operations that dictionaries support (and therefore, custom mapping types should support too): @@ -11910,33 +13265,6 @@ class dict(iterable, **kwargs) Added in version 3.9. - Dictionaries compare equal if and only if they have the same "(key, - value)" pairs (regardless of ordering). Order comparisons (‘<’, - ‘<=’, ‘>=’, ‘>’) raise "TypeError". - - Dictionaries preserve insertion order. Note that updating a key - does not affect the order. Keys added after deletion are inserted - at the end. - - >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} - >>> d - {'one': 1, 'two': 2, 'three': 3, 'four': 4} - >>> list(d) - ['one', 'two', 'three', 'four'] - >>> list(d.values()) - [1, 2, 3, 4] - >>> d["one"] = 42 - >>> d - {'one': 42, 'two': 2, 'three': 3, 'four': 4} - >>> del d["two"] - >>> d["two"] = None - >>> d - {'one': 42, 'three': 3, 'four': 4, 'two': None} - - Changed in version 3.7: Dictionary order is guaranteed to be - insertion order. This behavior was an implementation detail of - CPython from 3.6. - Dictionaries and dictionary views are reversible. >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} @@ -12163,7 +13491,7 @@ class dict(iterable, **kwargs) | "s * n" or "n * s" | equivalent to adding *s* to | (2)(7) | | | itself *n* times | | +----------------------------+----------------------------------+------------+ -| "s[i]" | *i*th item of *s*, origin 0 | (3) | +| "s[i]" | *i*th item of *s*, origin 0 | (3)(9) | +----------------------------+----------------------------------+------------+ | "s[i:j]" | slice of *s* from *i* to *j* | (3)(4) | +----------------------------+----------------------------------+------------+ @@ -12287,6 +13615,8 @@ class dict(iterable, **kwargs) returned index being relative to the start of the sequence rather than the start of the slice. +9. An "IndexError" is raised if *i* is outside the sequence range. + Immutable Sequence Types ======================== @@ -12321,11 +13651,15 @@ class dict(iterable, **kwargs) | "s[i] = x" | item *i* of *s* is replaced by | | | | *x* | | +--------------------------------+----------------------------------+-----------------------+ +| "del s[i]" | removes item *i* of *s* | | ++--------------------------------+----------------------------------+-----------------------+ | "s[i:j] = t" | slice of *s* from *i* to *j* is | | | | replaced by the contents of the | | | | iterable *t* | | +--------------------------------+----------------------------------+-----------------------+ -| "del s[i:j]" | same as "s[i:j] = []" | | +| "del s[i:j]" | removes the elements of "s[i:j]" | | +| | from the list (same as "s[i:j] = | | +| | []") | | +--------------------------------+----------------------------------+-----------------------+ | "s[i:j:k] = t" | the elements of "s[i:j:k]" are | (1) | | | replaced by those of *t* | | @@ -12649,11 +13983,15 @@ class range(start, stop[, step]) | "s[i] = x" | item *i* of *s* is replaced by | | | | *x* | | +--------------------------------+----------------------------------+-----------------------+ +| "del s[i]" | removes item *i* of *s* | | ++--------------------------------+----------------------------------+-----------------------+ | "s[i:j] = t" | slice of *s* from *i* to *j* is | | | | replaced by the contents of the | | | | iterable *t* | | +--------------------------------+----------------------------------+-----------------------+ -| "del s[i:j]" | same as "s[i:j] = []" | | +| "del s[i:j]" | removes the elements of "s[i:j]" | | +| | from the list (same as "s[i:j] = | | +| | []") | | +--------------------------------+----------------------------------+-----------------------+ | "s[i:j:k] = t" | the elements of "s[i:j:k]" are | (1) | | | replaced by those of *t* | | From 255b603c3215824013872844192805a2b70c8c7a Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Sat, 16 Aug 2025 19:11:09 -0400 Subject: [PATCH 04/23] try to associate all dunder methods with relevant pages from datamodel.rst --- Doc/reference/datamodel.rst | 2 + Doc/tools/extensions/pydoc_topics.py | 1 + Lib/pydoc.py | 109 +++++++++++++++++++++++++-- Lib/pydoc_data/topics.py | 39 +++++++++- 4 files changed, 145 insertions(+), 6 deletions(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index 7af3457070b84a..c0222b4ad8cf66 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -2911,6 +2911,8 @@ automatic property creation, proxies, frameworks, and automatic resource locking/synchronization. +.. _custom-instance-subclass: + Customizing instance and subclass checks ---------------------------------------- diff --git a/Doc/tools/extensions/pydoc_topics.py b/Doc/tools/extensions/pydoc_topics.py index 9b5aaf87bb2a57..c321ab2b2339c9 100644 --- a/Doc/tools/extensions/pydoc_topics.py +++ b/Doc/tools/extensions/pydoc_topics.py @@ -47,6 +47,7 @@ "continue", "conversions", "customization", + "custom-instance-subclass", "debugger", "del", "dict", diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 3e5d5c058e9cbb..b36308cd054047 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1739,10 +1739,14 @@ def resolve(thing, forceload=0): if isinstance(thing, str): object = locate(thing, forceload) if object is None: + if re.match(r'^__\w+__$', thing): + special = "Use help('specialnames') for a list of special names for which help is available.\n" + else: + special = "" raise ImportError('''\ No Python documentation found for %r. -Use help() to get the interactive help utility. -Use help(str) for help on the str class.''' % thing) +%sUse help() to get the interactive help utility. +Use help(str) for help on the str class.''' % (thing, special)) return object, thing else: name = getattr(thing, '__name__', None) @@ -1845,6 +1849,87 @@ def _introdoc(): enter "q", "quit" or "exit". ''') +def collect_dunders(symbols): + dunders = { + '__main__': ('__main__', ''), + '__call__': ('callable-types', 'SPECIALMETHODS'), + } + + basic_dunders = [ + '__new__', '__init__', '__del__', '__repr__', '__str__', '__bytes__', + '__format__', '__hash__', '__bool__', + ] + for bd in basic_dunders: + dunders[bd] = ('customization', 'SPECIALNAMES') + + attribute_dunders = [ + '__getattr__', '__getattribute__', '__setattr__', '__delattr__', + '__dir__', '__get__', '__set__', '__delete__', '__objclass__', + ] + for ad in attribute_dunders: + dunders[ad] = ('attribute-access', 'SPECIALNAMES') + + class_dunders = [ + '__init_subclass__', '__set_names__', '__mro_entries__', + ] + for cd in class_dunders: + dunders[cd] = ('class-customization', 'SPECIALNAMES') + + instance_dunders = [ + '__instancecheck__', '__subclasscheck__' + ] + for d in instance_dunders: + dunders[d] = ('custom-instance-subclass', 'SPECIALNAMES') + + sequence_dunders = [ + '__len__', '__length_hint__', '__getitem__', '__setitem__', + '__delitem__', '__missing__', '__iter__', '__reversed__', + '__contains__', + ] + for sd in sequence_dunders: + dunders[sd] = ('SEQUENCEMETHODS', 'SPECIALMETHODS') + + comparison_dunders = { + '__lt__': '<', + '__le__': '<=', + '__eq__': '==', + '__ne__': '!=', + '__gt__': '>', + '__ge__': '>=', + } + for dunder, symbol in comparison_dunders.items(): + dunders[dunder] = ('customization', f'{symbol} SPECIALMETHODS') + if symbol in symbols: + symbols[symbol] += f' {dunder}' + + arithmetic_dunders = { + '__add__': '+', + '__sub__': '-', + '__mul__': '*', + '__matmul__': '@', + '__truediv__': '/', + '__floordiv__': '//', + '__mod__': '%', + '__pow__': '**', + '__lshift__': '<<', + '__rshift__': '>>', + '__and__': '&', + '__or__': '|', + '__xor__': '^', + } + for dunder, symbol in arithmetic_dunders.items(): + rname = "__r" + dunder[2:] + iname = "__i" + dunder[2:] + dunders[dunder] = ('numeric-types', f'{symbol} {rname} {iname} SPECIALMETHODS') + dunders[rname] = ('numeric-types', f'{symbol} {dunder} SPECIALMETHODS') + dunders[iname] = ('numeric-types', f'{symbol} {dunder} SPECIALMETHODS') + if symbol in symbols: + symbols[symbol] += f' {dunder}' + + dunders['__divmod__'] = ('numeric-types', 'divmod') + + return dunders + class Helper: # These dictionaries map a topic name to either an alias, or a tuple @@ -1925,7 +2010,8 @@ class Helper: '(': 'TUPLES FUNCTIONS CALLS', ')': 'TUPLES FUNCTIONS CALLS', '[': 'LISTS SUBSCRIPTS SLICINGS', - ']': 'LISTS SUBSCRIPTS SLICINGS' + ']': 'LISTS SUBSCRIPTS SLICINGS', + } for topic, symbols_ in _symbols_inverse.items(): for symbol in symbols_: @@ -2023,9 +2109,13 @@ class Helper: 'CONTEXTMANAGERS': ('context-managers', 'with'), 'DUNDERMETHODS': 'SPECIALMETHODS', 'MAINMODULE': '__main__', - '__main__': ('__main__', ''), } + # add dunder methods + dunders = collect_dunders(symbols) + topics |= dunders + + def __init__(self, input=None, output=None): self._input = input self._output = output @@ -2100,6 +2190,8 @@ def help(self, request, is_cli=False): elif request == 'keywords': self.listkeywords() elif request == 'symbols': self.listsymbols() elif request == 'topics': self.listtopics() + elif request in {'specialnames', 'dunders'}: + self.listdunders() elif request == 'modules': self.listmodules() elif request[:8] == 'modules ': self.listmodules(request.split()[1]) @@ -2174,7 +2266,14 @@ def listtopics(self): Here is a list of available topics. Enter any topic name to get more help. ''') - self.list([k for k in self.topics.keys() if k.isupper()], columns=3) + self.list([k for k in self.topics if k not in self.dunders], columns=3) + + def listdunders(self): + self.output.write(''' +Here is a list of special names for which help is available. Enter any one to get more help. + +''') + self.list(self.dunders.keys(), columns=3) def showtopic(self, topic, more_xrefs=''): try: diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 12e85a3f6993c2..82592a07e9bd01 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,4 +1,4 @@ -# Autogenerated by Sphinx on Sat Aug 16 17:38:04 2025 +# Autogenerated by Sphinx on Sat Aug 16 20:05:29 2025 # as part of the release process. topics = { @@ -4083,6 +4083,43 @@ def f() -> annotation: ... Some additional rules apply for certain operators (e.g., a string as a left argument to the ‘%’ operator). Extensions must define their own conversion behavior. +''', + 'custom-instance-subclass': r'''Customizing instance and subclass checks +**************************************** + +The following methods are used to override the default behavior of the +"isinstance()" and "issubclass()" built-in functions. + +In particular, the metaclass "abc.ABCMeta" implements these methods in +order to allow the addition of Abstract Base Classes (ABCs) as +“virtual base classes” to any class or type (including built-in +types), including other ABCs. + +type.__instancecheck__(self, instance) + + Return true if *instance* should be considered a (direct or + indirect) instance of *class*. If defined, called to implement + "isinstance(instance, class)". + +type.__subclasscheck__(self, subclass) + + Return true if *subclass* should be considered a (direct or + indirect) subclass of *class*. If defined, called to implement + "issubclass(subclass, class)". + +Note that these methods are looked up on the type (metaclass) of a +class. They cannot be defined as class methods in the actual class. +This is consistent with the lookup of special methods that are called +on instances, only in this case the instance is itself a class. + +See also: + + **PEP 3119** - Introducing Abstract Base Classes + Includes the specification for customizing "isinstance()" and + "issubclass()" behavior through "__instancecheck__()" and + "__subclasscheck__()", with motivation for this functionality in + the context of adding Abstract Base Classes (see the "abc" + module) to the language. ''', 'customization': r'''Basic customization ******************* From da7f55ca743ba62815bdf23c1cd677bd5951e20d Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Sat, 16 Aug 2025 20:26:18 -0400 Subject: [PATCH 05/23] show help for, e.g., __import__ --- Lib/pydoc.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index b36308cd054047..7fcf8bcdd65750 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1706,11 +1706,6 @@ def describe(thing): def locate(path, forceload=0): """Locate an object by name or dotted path, importing as necessary.""" - if re.match(r"^__\w+__$", path): - # if we're looking up a special variable, don't grab the result from - # the builtins module, because it's probably not what the user wanted - # (if it is, they can look up builtins.whatever) - return None parts = [part for part in path.split('.') if part] module, n = None, 0 while n < len(parts): @@ -1726,6 +1721,11 @@ def locate(path, forceload=0): object = getattr(object, part) except AttributeError: return None + if re.match(r"^__\w+__$", path) and not isinstance(object, (type, type(__import__))): + # if we're looking up a special variable, don't grab the result from + # the builtins module, because it's probably not what the user wanted + # (if it is, they can look up builtins.whatever) + return None return object # --------------------------------------- interactive interpreter interface From 3294f924e2576da5f87befae67a525e508ee85ef Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Sat, 16 Aug 2025 20:35:59 -0400 Subject: [PATCH 06/23] bugfix for sequence-related dunders --- Lib/pydoc.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 7fcf8bcdd65750..7cee8ddbfa39af 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1887,7 +1887,7 @@ def collect_dunders(symbols): '__contains__', ] for sd in sequence_dunders: - dunders[sd] = ('SEQUENCEMETHODS', 'SPECIALMETHODS') + dunders[sd] = ('sequence-types', 'SPECIALMETHODS') comparison_dunders = { '__lt__': '<', @@ -2061,8 +2061,7 @@ class Helper: 'BASICMETHODS': ('customization', 'hash repr str SPECIALMETHODS'), 'ATTRIBUTEMETHODS': ('attribute-access', 'ATTRIBUTES SPECIALMETHODS'), 'CALLABLEMETHODS': ('callable-types', 'CALLS SPECIALMETHODS'), - 'SEQUENCEMETHODS': ('sequence-types', 'SEQUENCES SEQUENCEMETHODS ' - 'SPECIALMETHODS'), + 'SEQUENCEMETHODS': ('sequence-types', 'SEQUENCES SPECIALMETHODS'), 'MAPPINGMETHODS': ('sequence-types', 'MAPPINGS SPECIALMETHODS'), 'NUMBERMETHODS': ('numeric-types', 'NUMBERS AUGMENTEDASSIGNMENT ' 'SPECIALMETHODS'), From 2aee131c528a21e15835e6f1eb8fff1aacfee3a3 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Sat, 16 Aug 2025 20:38:20 -0400 Subject: [PATCH 07/23] list 'specialnames' as a special topic --- Lib/pydoc.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 7cee8ddbfa39af..1da482cbfb721e 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1838,8 +1838,8 @@ def _introdoc(): Enter the name of any module, keyword, symbol, or topic to get help on writing Python programs and using Python modules. To get a list of - available modules, keywords, symbols, or topics, enter "modules", - "keywords", "symbols", or "topics". + available modules, keywords, symbols, special names, or topics, enter + "modules", "keywords", "symbols", "specialnames", or "topics". {pyrepl_keys} Each module also comes with a one-line summary of what it does; to list the modules whose name or summary contain a given string such as "spam", @@ -2189,7 +2189,7 @@ def help(self, request, is_cli=False): elif request == 'keywords': self.listkeywords() elif request == 'symbols': self.listsymbols() elif request == 'topics': self.listtopics() - elif request in {'specialnames', 'dunders'}: + elif request == 'specialnames': self.listdunders() elif request == 'modules': self.listmodules() elif request[:8] == 'modules ': @@ -2976,7 +2976,8 @@ class BadUsage(Exception): pass reference to a class or function within a module or module in a package. If contains a '{sep}', it is used as the path to a Python source file to document. If name is 'keywords', 'topics', - or 'modules', a listing of these things is displayed. + 'symbols', 'specialnames', or 'modules', a listing of these things is + displayed. {cmd} -k Search for a keyword in the synopsis lines of all available modules. From bff3522197149c552ae8772147ea8b790c568386 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Mon, 18 Aug 2025 00:57:37 -0400 Subject: [PATCH 08/23] add help for __name__ --- Doc/tools/extensions/pydoc_topics.py | 1 + Lib/pydoc.py | 1 + Lib/pydoc_data/topics.py | 171 ++++++++++++++++++++++++++- 3 files changed, 172 insertions(+), 1 deletion(-) diff --git a/Doc/tools/extensions/pydoc_topics.py b/Doc/tools/extensions/pydoc_topics.py index c321ab2b2339c9..995c5e989f08ea 100644 --- a/Doc/tools/extensions/pydoc_topics.py +++ b/Doc/tools/extensions/pydoc_topics.py @@ -70,6 +70,7 @@ "integers", "lambda", "lists", + "name_equals_main", "naming", "nonlocal", "numbers", diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 1da482cbfb721e..14e0156c5ded2b 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1851,6 +1851,7 @@ def _introdoc(): def collect_dunders(symbols): dunders = { + '__name__': ('name_equals_main', ''), '__main__': ('__main__', ''), '__call__': ('callable-types', 'SPECIALMETHODS'), } diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index 82592a07e9bd01..cefc1e35451558 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,4 +1,4 @@ -# Autogenerated by Sphinx on Sat Aug 16 20:05:29 2025 +# Autogenerated by Sphinx on Mon Aug 18 00:55:48 2025 # as part of the release process. topics = { @@ -7211,6 +7211,175 @@ def (parameters): from left to right and placed into the list object in that order. When a comprehension is supplied, the list is constructed from the elements resulting from the comprehension. +''', + 'name_equals_main': r'''"__name__ == '__main__'" +************************ + +When a Python module or package is imported, "__name__" is set to the +module’s name. Usually, this is the name of the Python file itself +without the ".py" extension: + + >>> import configparser + >>> configparser.__name__ + 'configparser' + +If the file is part of a package, "__name__" will also include the +parent package’s path: + + >>> from concurrent.futures import process + >>> process.__name__ + 'concurrent.futures.process' + +However, if the module is executed in the top-level code environment, +its "__name__" is set to the string "'__main__'". + + +What is the “top-level code environment”? +========================================= + +"__main__" is the name of the environment where top-level code is run. +“Top-level code” is the first user-specified Python module that starts +running. It’s “top-level” because it imports all other modules that +the program needs. Sometimes “top-level code” is called an *entry +point* to the application. + +The top-level code environment can be: + +* the scope of an interactive prompt: + + >>> __name__ + '__main__' + +* the Python module passed to the Python interpreter as a file + argument: + + $ python helloworld.py + Hello, world! + +* the Python module or package passed to the Python interpreter with + the "-m" argument: + + $ python -m tarfile + usage: tarfile.py [-h] [-v] (...) + +* Python code read by the Python interpreter from standard input: + + $ echo "import this" | python + The Zen of Python, by Tim Peters + + Beautiful is better than ugly. + Explicit is better than implicit. + ... + +* Python code passed to the Python interpreter with the "-c" argument: + + $ python -c "import this" + The Zen of Python, by Tim Peters + + Beautiful is better than ugly. + Explicit is better than implicit. + ... + +In each of these situations, the top-level module’s "__name__" is set +to "'__main__'". + +As a result, a module can discover whether or not it is running in the +top-level environment by checking its own "__name__", which allows a +common idiom for conditionally executing code when the module is not +initialized from an import statement: + + if __name__ == '__main__': + # Execute when the module is not initialized from an import statement. + ... + +See also: + + For a more detailed look at how "__name__" is set in all situations, + see the tutorial section Modules. + + +Idiomatic Usage +=============== + +Some modules contain code that is intended for script use only, like +parsing command-line arguments or fetching data from standard input. +If a module like this was imported from a different module, for +example to unit test it, the script code would unintentionally execute +as well. + +This is where using the "if __name__ == '__main__'" code block comes +in handy. Code within this block won’t run unless the module is +executed in the top-level environment. + +Putting as few statements as possible in the block below "if __name__ +== '__main__'" can improve code clarity and correctness. Most often, a +function named "main" encapsulates the program’s primary behavior: + + # echo.py + + import shlex + import sys + + def echo(phrase: str) -> None: + """A dummy wrapper around print.""" + # for demonstration purposes, you can imagine that there is some + # valuable and reusable logic inside this function + print(phrase) + + def main() -> int: + """Echo the input arguments to standard output""" + phrase = shlex.join(sys.argv) + echo(phrase) + return 0 + + if __name__ == '__main__': + sys.exit(main()) # next section explains the use of sys.exit + +Note that if the module didn’t encapsulate code inside the "main" +function but instead put it directly within the "if __name__ == +'__main__'" block, the "phrase" variable would be global to the entire +module. This is error-prone as other functions within the module +could be unintentionally using the global variable instead of a local +name. A "main" function solves this problem. + +Using a "main" function has the added benefit of the "echo" function +itself being isolated and importable elsewhere. When "echo.py" is +imported, the "echo" and "main" functions will be defined, but neither +of them will be called, because "__name__ != '__main__'". + + +Packaging Considerations +======================== + +"main" functions are often used to create command-line tools by +specifying them as entry points for console scripts. When this is +done, pip inserts the function call into a template script, where the +return value of "main" is passed into "sys.exit()". For example: + + sys.exit(main()) + +Since the call to "main" is wrapped in "sys.exit()", the expectation +is that your function will return some value acceptable as an input to +"sys.exit()"; typically, an integer or "None" (which is implicitly +returned if your function does not have a return statement). + +By proactively following this convention ourselves, our module will +have the same behavior when run directly (i.e. "python echo.py") as it +will have if we later package it as a console script entry-point in a +pip-installable package. + +In particular, be careful about returning strings from your "main" +function. "sys.exit()" will interpret a string argument as a failure +message, so your program will have an exit code of "1", indicating +failure, and the string will be written to "sys.stderr". The +"echo.py" example from earlier exemplifies using the +"sys.exit(main())" convention. + +See also: + + Python Packaging User Guide contains a collection of tutorials and + references on how to distribute and install Python packages with + modern tools. ''', 'naming': r'''Naming and binding ****************** From d1959d2d5edfd0ffe0fe6d6af350366ce317eb22 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Sun, 17 Aug 2025 13:21:08 -0400 Subject: [PATCH 09/23] change title of added reference --- Doc/reference/datamodel.rst | 2 +- Doc/tools/extensions/pydoc_topics.py | 2 +- Lib/pydoc.py | 2 +- Lib/pydoc_data/topics.py | 76 ++++++++++++++-------------- 4 files changed, 41 insertions(+), 41 deletions(-) diff --git a/Doc/reference/datamodel.rst b/Doc/reference/datamodel.rst index c0222b4ad8cf66..fb40f16c1d76c2 100644 --- a/Doc/reference/datamodel.rst +++ b/Doc/reference/datamodel.rst @@ -2911,7 +2911,7 @@ automatic property creation, proxies, frameworks, and automatic resource locking/synchronization. -.. _custom-instance-subclass: +.. _customize-instance-subclass-checks: Customizing instance and subclass checks ---------------------------------------- diff --git a/Doc/tools/extensions/pydoc_topics.py b/Doc/tools/extensions/pydoc_topics.py index 995c5e989f08ea..df5d426db07b26 100644 --- a/Doc/tools/extensions/pydoc_topics.py +++ b/Doc/tools/extensions/pydoc_topics.py @@ -47,7 +47,7 @@ "continue", "conversions", "customization", - "custom-instance-subclass", + "customize-instance-subclass-checks", "debugger", "del", "dict", diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 14e0156c5ded2b..9f196c230a1af2 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1880,7 +1880,7 @@ def collect_dunders(symbols): '__instancecheck__', '__subclasscheck__' ] for d in instance_dunders: - dunders[d] = ('custom-instance-subclass', 'SPECIALNAMES') + dunders[d] = ('customize-instance-subclass-checks', 'SPECIALNAMES') sequence_dunders = [ '__len__', '__length_hint__', '__getitem__', '__setitem__', diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index cefc1e35451558..c75c61840daaa2 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,4 +1,4 @@ -# Autogenerated by Sphinx on Mon Aug 18 00:55:48 2025 +# Autogenerated by Sphinx on Mon Aug 18 01:09:13 2025 # as part of the release process. topics = { @@ -4083,43 +4083,6 @@ def f() -> annotation: ... Some additional rules apply for certain operators (e.g., a string as a left argument to the ‘%’ operator). Extensions must define their own conversion behavior. -''', - 'custom-instance-subclass': r'''Customizing instance and subclass checks -**************************************** - -The following methods are used to override the default behavior of the -"isinstance()" and "issubclass()" built-in functions. - -In particular, the metaclass "abc.ABCMeta" implements these methods in -order to allow the addition of Abstract Base Classes (ABCs) as -“virtual base classes” to any class or type (including built-in -types), including other ABCs. - -type.__instancecheck__(self, instance) - - Return true if *instance* should be considered a (direct or - indirect) instance of *class*. If defined, called to implement - "isinstance(instance, class)". - -type.__subclasscheck__(self, subclass) - - Return true if *subclass* should be considered a (direct or - indirect) subclass of *class*. If defined, called to implement - "issubclass(subclass, class)". - -Note that these methods are looked up on the type (metaclass) of a -class. They cannot be defined as class methods in the actual class. -This is consistent with the lookup of special methods that are called -on instances, only in this case the instance is itself a class. - -See also: - - **PEP 3119** - Introducing Abstract Base Classes - Includes the specification for customizing "isinstance()" and - "issubclass()" behavior through "__instancecheck__()" and - "__subclasscheck__()", with motivation for this functionality in - the context of adding Abstract Base Classes (see the "abc" - module) to the language. ''', 'customization': r'''Basic customization ******************* @@ -4429,6 +4392,43 @@ def __hash__(self): considered true if its result is nonzero. If a class defines neither "__len__()" nor "__bool__()" (which is true of the "object" class itself), all its instances are considered true. +''', + 'customize-instance-subclass-checks': r'''Customizing instance and subclass checks +**************************************** + +The following methods are used to override the default behavior of the +"isinstance()" and "issubclass()" built-in functions. + +In particular, the metaclass "abc.ABCMeta" implements these methods in +order to allow the addition of Abstract Base Classes (ABCs) as +“virtual base classes” to any class or type (including built-in +types), including other ABCs. + +type.__instancecheck__(self, instance) + + Return true if *instance* should be considered a (direct or + indirect) instance of *class*. If defined, called to implement + "isinstance(instance, class)". + +type.__subclasscheck__(self, subclass) + + Return true if *subclass* should be considered a (direct or + indirect) subclass of *class*. If defined, called to implement + "issubclass(subclass, class)". + +Note that these methods are looked up on the type (metaclass) of a +class. They cannot be defined as class methods in the actual class. +This is consistent with the lookup of special methods that are called +on instances, only in this case the instance is itself a class. + +See also: + + **PEP 3119** - Introducing Abstract Base Classes + Includes the specification for customizing "isinstance()" and + "issubclass()" behavior through "__instancecheck__()" and + "__subclasscheck__()", with motivation for this functionality in + the context of adding Abstract Base Classes (see the "abc" + module) to the language. ''', 'debugger': r'''"pdb" — The Python Debugger *************************** From 1bc022765c721938a10c7e4180fb2df717616e49 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Mon, 18 Aug 2025 01:09:51 -0400 Subject: [PATCH 10/23] faster check for dunder name --- Lib/pydoc.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 9f196c230a1af2..d3fdd04580b867 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1721,10 +1721,10 @@ def locate(path, forceload=0): object = getattr(object, part) except AttributeError: return None - if re.match(r"^__\w+__$", path) and not isinstance(object, (type, type(__import__))): - # if we're looking up a special variable, don't grab the result from - # the builtins module, because it's probably not what the user wanted - # (if it is, they can look up builtins.whatever) + if _is_dunder_name(path) and not isinstance(object, (type, type(__import__))): + # if we're looking up a special variable and we don't find a class or a + # function, it's probably not what the user wanted (if it is, they can + # look up builtins.whatever) return None return object @@ -1739,7 +1739,7 @@ def resolve(thing, forceload=0): if isinstance(thing, str): object = locate(thing, forceload) if object is None: - if re.match(r'^__\w+__$', thing): + if _is_dunder_name(thing): special = "Use help('specialnames') for a list of special names for which help is available.\n" else: special = "" @@ -1849,6 +1849,9 @@ def _introdoc(): enter "q", "quit" or "exit". ''') +def _is_dunder_name(x): + return isinstance(x, str) and len(x) > 4 and x[:2] == x[-2:] == '__' + def collect_dunders(symbols): dunders = { '__name__': ('name_equals_main', ''), From 4ed0e72b2ee7c5f9a48fd2507020188750103d92 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Mon, 18 Aug 2025 01:16:21 -0400 Subject: [PATCH 11/23] suggest online help as well if we can't find something --- Lib/pydoc.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index d3fdd04580b867..c85f6d2a859ed5 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1746,7 +1746,8 @@ def resolve(thing, forceload=0): raise ImportError('''\ No Python documentation found for %r. %sUse help() to get the interactive help utility. -Use help(str) for help on the str class.''' % (thing, special)) +Use help(str) for help on the str class. +Additional documentation is available online at https://docs.python.org/%s.%s/''' % (thing, special, *sys.version_info[:2])) return object, thing else: name = getattr(thing, '__name__', None) From 5800535ad989c8a60e88f729bbc212d1d5ecd497 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Mon, 18 Aug 2025 01:21:22 -0400 Subject: [PATCH 12/23] fix bad reference (SPECIALNAMES doesn't exist) --- Lib/pydoc.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index c85f6d2a859ed5..9e43074fe4a1ba 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1865,26 +1865,26 @@ def collect_dunders(symbols): '__format__', '__hash__', '__bool__', ] for bd in basic_dunders: - dunders[bd] = ('customization', 'SPECIALNAMES') + dunders[bd] = ('customization', 'SPECIALMETHODS') attribute_dunders = [ '__getattr__', '__getattribute__', '__setattr__', '__delattr__', '__dir__', '__get__', '__set__', '__delete__', '__objclass__', ] for ad in attribute_dunders: - dunders[ad] = ('attribute-access', 'SPECIALNAMES') + dunders[ad] = ('attribute-access', 'SPECIALMETHODS') class_dunders = [ '__init_subclass__', '__set_names__', '__mro_entries__', ] for cd in class_dunders: - dunders[cd] = ('class-customization', 'SPECIALNAMES') + dunders[cd] = ('class-customization', 'SPECIALMETHODS') instance_dunders = [ '__instancecheck__', '__subclasscheck__' ] for d in instance_dunders: - dunders[d] = ('customize-instance-subclass-checks', 'SPECIALNAMES') + dunders[d] = ('customize-instance-subclass-checks', 'SPECIALMETHODS') sequence_dunders = [ '__len__', '__length_hint__', '__getitem__', '__setitem__', From 587740b82ffa98af134e33df6642efa163a73ea4 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Mon, 18 Aug 2025 01:54:14 -0400 Subject: [PATCH 13/23] change help(help) output --- Lib/pydoc.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 9e43074fe4a1ba..dcacc0f0f97968 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -2222,15 +2222,18 @@ def helphelp(self): * Calling help() with no arguments starts an interactive help session. - * Calling help(x) will have one of two behaviors depending on the type - of the argument: + * The behavior of help(x) depends on x's type: * If x is a string, help(x) provides information about the given topic. For example, help("class") will provide information about the "class" keyword, and help("math.sqrt") will provide information about the "math.sqrt" function. - * If x is not a string, help(x) prints information about x's type. + * If x is a class or a built-in type, help(x) provides information + about that type. For example, help(str) will provide information + about the str type. + + * For all other objects, help(x) prints information about x's type. For example, help(20) will provide information about the int type. """)) From 69265aa009593ee37b7895b8ce16e2be03441312 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Mon, 18 Aug 2025 06:49:44 -0400 Subject: [PATCH 14/23] clarify message when an entry is not found --- Lib/pydoc.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index dcacc0f0f97968..7523c131a9fef2 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1744,7 +1744,7 @@ def resolve(thing, forceload=0): else: special = "" raise ImportError('''\ -No Python documentation found for %r. +No interactive help entry found for %r. %sUse help() to get the interactive help utility. Use help(str) for help on the str class. Additional documentation is available online at https://docs.python.org/%s.%s/''' % (thing, special, *sys.version_info[:2])) From 101fff37440e05c2bb45505c66083a1f06c2b906 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Mon, 18 Aug 2025 06:54:36 -0400 Subject: [PATCH 15/23] update test cases for new message --- Lib/pydoc.py | 2 +- Lib/test/test_pydoc/test_pydoc.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 7523c131a9fef2..6b477b18114469 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1744,7 +1744,7 @@ def resolve(thing, forceload=0): else: special = "" raise ImportError('''\ -No interactive help entry found for %r. +No help entry found for %r. %sUse help() to get the interactive help utility. Use help(str) for help on the str class. Additional documentation is available online at https://docs.python.org/%s.%s/''' % (thing, special, *sys.version_info[:2])) diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index 3b50ead00bdd31..63aee311169028 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -229,10 +229,11 @@ class C(builtins.object) for s in expected_data_docstrings) # output pattern for missing module -missing_pattern = '''\ -No Python documentation found for %r. +missing_pattern = ('''\ +No help entry found for %%r. Use help() to get the interactive help utility. -Use help(str) for help on the str class.'''.replace('\n', os.linesep) +Use help(str) for help on the str class. +Additional documentation is available online at https://docs.python.org/%s.%s/''' % sys.version_info[:2]).replace('\n', os.linesep) # output pattern for module with bad imports badimport_pattern = "problem in %s - ModuleNotFoundError: No module named %r" @@ -667,7 +668,7 @@ def test_fail_help_cli(self): elines = (missing_pattern % 'abd').splitlines() with spawn_python("-c" "help()") as proc: out, _ = proc.communicate(b"abd") - olines = out.decode().splitlines()[-9:-6] + olines = out.decode().splitlines()[-10:-6] olines[0] = olines[0].removeprefix('help> ') self.assertEqual(elines, olines) From 35b44708c4bcf414c0d3447fa183b1de53208c39 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Tue, 19 Aug 2025 10:20:37 -0400 Subject: [PATCH 16/23] revert changes to help(help); those should probably be a separate PR --- Lib/pydoc.py | 32 ++------------------------------ 1 file changed, 2 insertions(+), 30 deletions(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index 6b477b18114469..a97924c2818b58 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -2189,9 +2189,7 @@ def getline(self, prompt): def help(self, request, is_cli=False): if isinstance(request, str): request = request.strip() - if request == 'help': - self.helphelp() - elif request == 'keywords': self.listkeywords() + if request == 'keywords': self.listkeywords() elif request == 'symbols': self.listsymbols() elif request == 'topics': self.listtopics() elif request == 'specialnames': @@ -2207,36 +2205,10 @@ def help(self, request, is_cli=False): elif request in self.topics: self.showtopic(request) elif request: doc(request, 'Help on %s:', output=self._output, is_cli=is_cli) else: doc(str, 'Help on %s:', output=self._output, is_cli=is_cli) - elif request is builtins.help: - self.helphelp() + elif isinstance(request, Helper): self() else: doc(request, 'Help on %s:', output=self._output, is_cli=is_cli) self.output.write('\n') - def helphelp(self): - pager(textwrap.dedent("""\ - help - Interactive Help - ======================= - - The built-in help function implements an interactive help utility. You - can make use of it in a few different ways: - - * Calling help() with no arguments starts an interactive help session. - - * The behavior of help(x) depends on x's type: - - * If x is a string, help(x) provides information about the given - topic. For example, help("class") will provide information about - the "class" keyword, and help("math.sqrt") will provide - information about the "math.sqrt" function. - - * If x is a class or a built-in type, help(x) provides information - about that type. For example, help(str) will provide information - about the str type. - - * For all other objects, help(x) prints information about x's type. - For example, help(20) will provide information about the int type. - """)) - def intro(self): self.output.write(_introdoc()) From e1968187df7bd09e76736527f66baf00ffd1d83f Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Tue, 19 Aug 2025 12:50:56 -0400 Subject: [PATCH 17/23] new test cases for help() about dunder methods --- Lib/test/test_pydoc/test_pydoc.py | 45 ++++++++++++++++++++++++++----- 1 file changed, 38 insertions(+), 7 deletions(-) diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index 63aee311169028..c3f8da9f754e0c 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -229,11 +229,12 @@ class C(builtins.object) for s in expected_data_docstrings) # output pattern for missing module -missing_pattern = ('''\ +def missing_pattern(name, dunder=False): + return ('''\ No help entry found for %%r. -Use help() to get the interactive help utility. +%%sUse help() to get the interactive help utility. Use help(str) for help on the str class. -Additional documentation is available online at https://docs.python.org/%s.%s/''' % sys.version_info[:2]).replace('\n', os.linesep) +Additional documentation is available online at https://docs.python.org/%s.%s/''' % sys.version_info[:2]).replace('\n', os.linesep) % (name, "Use help('specialnames') for a list of special names for which help is available.\n" if dunder else "") # output pattern for module with bad imports badimport_pattern = "problem in %s - ModuleNotFoundError: No module named %r" @@ -498,7 +499,33 @@ class B: def test_not_here(self): missing_module = "test.i_am_not_here" result = str(run_pydoc_fail(missing_module), 'ascii') - expected = missing_pattern % missing_module + expected = missing_pattern(missing_module) + self.assertEqual(expected, result, + "documentation for missing module found") + + def test_dunder_help(self): + def get_main_output(topic): + return run_pydoc(topic).split(b'Related help topics:')[0].strip() + + # check that each dunder method maps to a help topic that includes its + # name + for name in pydoc.Helper.dunders: + self.assertIn(name.encode('ascii'), get_main_output(name)) + + # check that right-hand and in-place versions match + self.assertEqual(get_main_output('__add__'), get_main_output('__radd__')) + self.assertEqual(get_main_output('__add__'), get_main_output('__iadd__')) + + def test_dunder_main(self): + self.assertEqual(run_pydoc('__main__').splitlines(False)[0], + '"__main__" — Top-level code environment'.encode('utf-8')) + self.assertEqual(run_pydoc('__name__').splitlines(False)[0], + '''"__name__ == '__main__'"'''.encode('utf-8')) + + def test_dunder_not_here(self): + missing_module = "__dict__" + result = str(run_pydoc_fail(missing_module), 'ascii') + expected = missing_pattern(missing_module, dunder=True) self.assertEqual(expected, result, "documentation for missing module found") @@ -511,7 +538,7 @@ def test_not_ascii(self): def test_input_strip(self): missing_module = " test.i_am_not_here " result = str(run_pydoc_fail(missing_module), 'ascii') - expected = missing_pattern % missing_module.strip() + expected = missing_pattern(missing_module.strip()) self.assertEqual(expected, result) def test_stripid(self): @@ -653,6 +680,8 @@ class ZeroDivisionError(ArithmeticError) # Testing that the subclasses section does not appear self.assertNotIn('Built-in subclasses', text) + + def test_builtin_on_metaclasses(self): """Tests help on metaclasses. @@ -665,7 +694,7 @@ def test_builtin_on_metaclasses(self): self.assertNotIn('Built-in subclasses', text) def test_fail_help_cli(self): - elines = (missing_pattern % 'abd').splitlines() + elines = (missing_pattern("abd")).splitlines() with spawn_python("-c" "help()") as proc: out, _ = proc.communicate(b"abd") olines = out.decode().splitlines()[-10:-6] @@ -676,7 +705,7 @@ def test_fail_help_output_redirect(self): with StringIO() as buf: helper = pydoc.Helper(output=buf) helper.help("abd") - expected = missing_pattern % "abd" + expected = missing_pattern("abd") self.assertEqual(expected, buf.getvalue().strip().replace('\n', os.linesep)) @unittest.skipIf(hasattr(sys, 'gettrace') and sys.gettrace(), @@ -738,6 +767,8 @@ def run_pydoc_for_request(request, expected_text_part): run_pydoc_for_request('keywords', 'Here is a list of the Python keywords.') # test for "symbols" run_pydoc_for_request('symbols', 'Here is a list of the punctuation symbols') + # test for "specialnames" + run_pydoc_for_request('specialnames', 'Here is a list of special names') # test for "topics" run_pydoc_for_request('topics', 'Here is a list of available topics.') # test for "modules" skipped, see test_modules() From d11fb771dc0e73d2df24f0f7e576b9e82073ac26 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Tue, 19 Aug 2025 13:03:56 -0400 Subject: [PATCH 18/23] remove extra whitespace --- Lib/test/test_pydoc/test_pydoc.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index c3f8da9f754e0c..ff3d29c7183d75 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -680,8 +680,6 @@ class ZeroDivisionError(ArithmeticError) # Testing that the subclasses section does not appear self.assertNotIn('Built-in subclasses', text) - - def test_builtin_on_metaclasses(self): """Tests help on metaclasses. From 2c34fba662b2040d454ba36a399c09964b512a6f Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Tue, 19 Aug 2025 13:12:57 -0400 Subject: [PATCH 19/23] __matmul__ shouldn't reference @ because of where help('@') currently goes --- Lib/pydoc.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Lib/pydoc.py b/Lib/pydoc.py index a97924c2818b58..a2d0882c4bbf50 100644 --- a/Lib/pydoc.py +++ b/Lib/pydoc.py @@ -1911,7 +1911,6 @@ def collect_dunders(symbols): '__add__': '+', '__sub__': '-', '__mul__': '*', - '__matmul__': '@', '__truediv__': '/', '__floordiv__': '//', '__mod__': '%', @@ -1931,6 +1930,12 @@ def collect_dunders(symbols): if symbol in symbols: symbols[symbol] += f' {dunder}' + # __matmul__ isn't included above because help('@') doesn't talk about + # matrix multiplication, so we shouldn't list it here as a related topic. + dunders['__matmul__'] = ('numeric-types', f'__rmatmul__ __imatmul__ SPECIALMETHODS') + dunders['__rmatmul__'] = ('numeric-types', f'__matmul__ SPECIALMETHODS') + dunders['__imatmul__'] = ('numeric-types', f'__matmul__ SPECIALMETHODS') + dunders['__divmod__'] = ('numeric-types', 'divmod') return dunders From 595b95c6359745970b85d57413ebff7cbe74f2db Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Tue, 19 Aug 2025 13:52:48 -0400 Subject: [PATCH 20/23] fixes for missing dunder doctest --- Lib/test/test_pydoc/test_pydoc.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index ff3d29c7183d75..7e3e122799c8df 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -523,11 +523,9 @@ def test_dunder_main(self): '''"__name__ == '__main__'"'''.encode('utf-8')) def test_dunder_not_here(self): - missing_module = "__dict__" - result = str(run_pydoc_fail(missing_module), 'ascii') - expected = missing_pattern(missing_module, dunder=True) - self.assertEqual(expected, result, - "documentation for missing module found") + result = str(run_pydoc_fail("__dict__"), 'ascii') + expected = missing_pattern("__dict__", dunder=True) + self.assertEqual(expected, result) @requires_docstrings def test_not_ascii(self): From 0a49cfb5d4b920b2629b7ac79ac7a94e5e6e71ac Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Tue, 19 Aug 2025 14:32:03 -0400 Subject: [PATCH 21/23] attempt to fix failing test case on windows --- Lib/test/test_pydoc/test_pydoc.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/Lib/test/test_pydoc/test_pydoc.py b/Lib/test/test_pydoc/test_pydoc.py index 7e3e122799c8df..dc4710eba009bc 100644 --- a/Lib/test/test_pydoc/test_pydoc.py +++ b/Lib/test/test_pydoc/test_pydoc.py @@ -230,11 +230,12 @@ class C(builtins.object) # output pattern for missing module def missing_pattern(name, dunder=False): + dunderhelp = "Use help('specialnames') for a list of special names for which help is available.\n" if dunder else "" return ('''\ -No help entry found for %%r. -%%sUse help() to get the interactive help utility. +No help entry found for %r. +%sUse help() to get the interactive help utility. Use help(str) for help on the str class. -Additional documentation is available online at https://docs.python.org/%s.%s/''' % sys.version_info[:2]).replace('\n', os.linesep) % (name, "Use help('specialnames') for a list of special names for which help is available.\n" if dunder else "") +Additional documentation is available online at https://docs.python.org/%s.%s/''' % (name, dunderhelp, *sys.version_info[:2])).replace('\n', os.linesep) # output pattern for module with bad imports badimport_pattern = "problem in %s - ModuleNotFoundError: No module named %r" From 394e401298aaeb1ccd843f9b23356f183550ef96 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Tue, 19 Aug 2025 16:12:22 -0400 Subject: [PATCH 22/23] add news blurb --- .../2025-08-19-16-10-26.gh-issue-137966.nEgFAt.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Core_and_Builtins/2025-08-19-16-10-26.gh-issue-137966.nEgFAt.rst diff --git a/Misc/NEWS.d/next/Core_and_Builtins/2025-08-19-16-10-26.gh-issue-137966.nEgFAt.rst b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-19-16-10-26.gh-issue-137966.nEgFAt.rst new file mode 100644 index 00000000000000..0d37c6b2992c12 --- /dev/null +++ b/Misc/NEWS.d/next/Core_and_Builtins/2025-08-19-16-10-26.gh-issue-137966.nEgFAt.rst @@ -0,0 +1,2 @@ +Add support for additional special names to the built-in ``help`` function. +Patch by Adam Hartz. From 685b627544860888f0365ae4108105f97113c3f0 Mon Sep 17 00:00:00 2001 From: adam j hartz Date: Wed, 20 Aug 2025 02:16:27 -0400 Subject: [PATCH 23/23] roll back changes to pydoc_data not relevant to the new topics --- Lib/pydoc_data/topics.py | 1642 ++++++++------------------------------ 1 file changed, 327 insertions(+), 1315 deletions(-) diff --git a/Lib/pydoc_data/topics.py b/Lib/pydoc_data/topics.py index c75c61840daaa2..a6cd8d814bcf32 100644 --- a/Lib/pydoc_data/topics.py +++ b/Lib/pydoc_data/topics.py @@ -1,4 +1,4 @@ -# Autogenerated by Sphinx on Mon Aug 18 01:09:13 2025 +# Autogenerated by Sphinx on Wed Aug 20 02:20:40 2025 # as part of the release process. topics = { @@ -785,9 +785,9 @@ async def func(param1, param2): 'atom-identifiers': r'''Identifiers (Names) ******************* -An identifier occurring as an atom is a name. See section Names -(identifiers and keywords) for lexical definition and section Naming -and binding for documentation of naming and binding. +An identifier occurring as an atom is a name. See section Identifiers +and keywords for lexical definition and section Naming and binding for +documentation of naming and binding. When the name is bound to an object, evaluation of the atom yields that object. When a name is not bound, an attempt to evaluate it @@ -842,65 +842,19 @@ async def func(param1, param2): Python supports string and bytes literals and various numeric literals: - literal: strings | NUMBER + literal: stringliteral | bytesliteral + | integer | floatnumber | imagnumber Evaluation of a literal yields an object of the given type (string, bytes, integer, floating-point number, complex number) with the given value. The value may be approximated in the case of floating-point -and imaginary (complex) literals. See section Literals for details. -See section String literal concatenation for details on "strings". +and imaginary (complex) literals. See section Literals for details. All literals correspond to immutable data types, and hence the object’s identity is less important than its value. Multiple evaluations of literals with the same value (either the same occurrence in the program text or a different occurrence) may obtain the same object or a different object with the same value. - - -String literal concatenation -============================ - -Multiple adjacent string or bytes literals (delimited by whitespace), -possibly using different quoting conventions, are allowed, and their -meaning is the same as their concatenation: - - >>> "hello" 'world' - "helloworld" - -Formally: - - strings: ( STRING | fstring)+ | tstring+ - -This feature is defined at the syntactical level, so it only works -with literals. To concatenate string expressions at run time, the ‘+’ -operator may be used: - - >>> greeting = "Hello" - >>> space = " " - >>> name = "Blaise" - >>> print(greeting + space + name) # not: print(greeting space name) - Hello Blaise - -Literal concatenation can freely mix raw strings, triple-quoted -strings, and formatted string literals. For example: - - >>> "Hello" r', ' f"{name}!" - "Hello, Blaise!" - -This feature can be used to reduce the number of backslashes needed, -to split long strings conveniently across long lines, or even to add -comments to parts of strings. For example: - - re.compile("[A-Za-z_]" # letter or underscore - "[A-Za-z0-9_]*" # letter, digit or underscore - ) - -However, bytes literals may only be combined with other byte literals; -not with string literals of any kind. Also, template string literals -may only be combined with other template string literals: - - >>> t"Hello" t"{name}!" - Template(strings=('Hello', '!'), interpolations=(...)) ''', 'attribute-access': r'''Customizing attribute access **************************** @@ -1710,9 +1664,6 @@ class Foo: class Foo(object): pass -There may be one or more base classes; see Multiple inheritance below -for more information. - The class’s suite is then executed in a new execution frame (see Naming and binding), using a newly created local namespace and the original global namespace. (Usually, the suite contains mostly @@ -1776,115 +1727,6 @@ class attributes; they are shared by instances. Instance attributes **PEP 3129** - Class Decorators The proposal that added class decorators. Function and method decorators were introduced in **PEP 318**. - - -Multiple inheritance -==================== - -Python classes may have multiple base classes, a technique known as -*multiple inheritance*. The base classes are specified in the class -definition by listing them in parentheses after the class name, -separated by commas. For example, the following class definition: - - >>> class A: pass - >>> class B: pass - >>> class C(A, B): pass - -defines a class "C" that inherits from classes "A" and "B". - -The *method resolution order* (MRO) is the order in which base classes -are searched when looking up an attribute on a class. See The Python -2.3 Method Resolution Order for a description of how Python determines -the MRO for a class. - -Multiple inheritance is not always allowed. Attempting to define a -class with multiple inheritance will raise an error if one of the -bases does not allow subclassing, if a consistent MRO cannot be -created, if no valid metaclass can be determined, or if there is an -instance layout conflict. We’ll discuss each of these in turn. - -First, all base classes must allow subclassing. While most classes -allow subclassing, some built-in classes do not, such as "bool": - - >>> class SubBool(bool): # TypeError - ... pass - Traceback (most recent call last): - ... - TypeError: type 'bool' is not an acceptable base type - -In the resolved MRO of a class, the class’s bases appear in the order -they were specified in the class’s bases list. Additionally, the MRO -always lists a child class before any of its bases. A class definition -will fail if it is impossible to resolve a consistent MRO that -satisfies these rules from the list of bases provided: - - >>> class Base: pass - >>> class Child(Base): pass - >>> class Grandchild(Base, Child): pass # TypeError - Traceback (most recent call last): - ... - TypeError: Cannot create a consistent method resolution order (MRO) for bases Base, Child - -In the MRO of "Grandchild", "Base" must appear before "Child" because -it is first in the base class list, but it must also appear after -"Child" because it is a parent of "Child". This is a contradiction, so -the class cannot be defined. - -If some of the bases have a custom *metaclass*, the metaclass of the -resulting class is chosen among the metaclasses of the bases and the -explicitly specified metaclass of the child class. It must be a -metaclass that is a subclass of all other candidate metaclasses. If no -such metaclass exists among the candidates, the class cannot be -created, as explained in Determining the appropriate metaclass. - -Finally, the instance layouts of the bases must be compatible. This -means that it must be possible to compute a *solid base* for the -class. Exactly which classes are solid bases depends on the Python -implementation. - -**CPython implementation detail:** In CPython, a class is a solid base -if it has a nonempty "__slots__" definition. Many but not all classes -defined in C are also solid bases, including most builtins (such as -"int" or "BaseException") but excluding most concrete "Exception" -classes. Generally, a C class is a solid base if its underlying struct -is different in size from its base class. - -Every class has a solid base. "object", the base class, has itself as -its solid base. If there is a single base, the child class’s solid -base is that class if it is a solid base, or else the base class’s -solid base. If there are multiple bases, we first find the solid base -for each base class to produce a list of candidate solid bases. If -there is a unique solid base that is a subclass of all others, then -that class is the solid base. Otherwise, class creation fails. - -Example: - - >>> class Solid1: - ... __slots__ = ("solid1",) - >>> - >>> class Solid2: - ... __slots__ = ("solid2",) - >>> - >>> class SolidChild(Solid1): - ... __slots__ = ("solid_child",) - >>> - >>> class C1: # solid base is `object` - ... pass - >>> - >>> # OK: solid bases are `Solid1` and `object`, and `Solid1` is a subclass of `object`. - >>> class C2(Solid1, C1): # solid base is `Solid1` - ... pass - >>> - >>> # OK: solid bases are `SolidChild` and `Solid1`, and `SolidChild` is a subclass of `Solid1`. - >>> class C3(SolidChild, Solid1): # solid base is `SolidChild` - ... pass - >>> - >>> # Error: solid bases are `Solid1` and `Solid2`, but neither is a subclass of the other. - >>> class C4(Solid1, Solid2): # error: no single solid base - ... pass - Traceback (most recent call last): - ... - TypeError: multiple bases have instance lay-out conflict ''', 'comparisons': r'''Comparisons *********** @@ -2232,16 +2074,16 @@ class with multiple inheritance will raise an error if one of the The "for" statement is used to iterate over the elements of a sequence (such as a string, tuple or list) or other iterable object: - for_stmt: "for" target_list "in" starred_expression_list ":" suite + for_stmt: "for" target_list "in" starred_list ":" suite ["else" ":" suite] -The "starred_expression_list" expression is evaluated once; it should -yield an *iterable* object. An *iterator* is created for that -iterable. The first item provided by the iterator is then assigned to -the target list using the standard rules for assignments (see -Assignment statements), and the suite is executed. This repeats for -each item provided by the iterator. When the iterator is exhausted, -the suite in the "else" clause, if present, is executed, and the loop +The "starred_list" expression is evaluated once; it should yield an +*iterable* object. An *iterator* is created for that iterable. The +first item provided by the iterator is then assigned to the target +list using the standard rules for assignments (see Assignment +statements), and the suite is executed. This repeats for each item +provided by the iterator. When the iterator is exhausted, the suite +in the "else" clause, if present, is executed, and the loop terminates. A "break" statement executed in the first suite terminates the loop @@ -2430,8 +2272,7 @@ class with multiple inheritance will raise an error if one of the group types, because that would have ambiguous semantics. It is not possible to mix "except" and "except*" in the same "try". -The "break", "continue", and "return" statements cannot appear in an -"except*" clause. +"break", "continue" and "return" cannot appear in an "except*" clause. "else" clause @@ -2838,8 +2679,7 @@ def foo(): The rule "strings" and the token "NUMBER" are defined in the standard Python grammar. Triple-quoted strings are supported. Raw strings and -byte strings are supported. f-strings and t-strings are not -supported. +byte strings are supported. f-strings are not supported. The forms "signed_number '+' NUMBER" and "signed_number '-' NUMBER" are for expressing complex numbers; they require a real number on the @@ -3376,9 +3216,6 @@ class Foo: class Foo(object): pass -There may be one or more base classes; see Multiple inheritance below -for more information. - The class’s suite is then executed in a new execution frame (see Naming and binding), using a newly created local namespace and the original global namespace. (Usually, the suite contains mostly @@ -3444,115 +3281,6 @@ class attributes; they are shared by instances. Instance attributes decorators were introduced in **PEP 318**. -Multiple inheritance --------------------- - -Python classes may have multiple base classes, a technique known as -*multiple inheritance*. The base classes are specified in the class -definition by listing them in parentheses after the class name, -separated by commas. For example, the following class definition: - - >>> class A: pass - >>> class B: pass - >>> class C(A, B): pass - -defines a class "C" that inherits from classes "A" and "B". - -The *method resolution order* (MRO) is the order in which base classes -are searched when looking up an attribute on a class. See The Python -2.3 Method Resolution Order for a description of how Python determines -the MRO for a class. - -Multiple inheritance is not always allowed. Attempting to define a -class with multiple inheritance will raise an error if one of the -bases does not allow subclassing, if a consistent MRO cannot be -created, if no valid metaclass can be determined, or if there is an -instance layout conflict. We’ll discuss each of these in turn. - -First, all base classes must allow subclassing. While most classes -allow subclassing, some built-in classes do not, such as "bool": - - >>> class SubBool(bool): # TypeError - ... pass - Traceback (most recent call last): - ... - TypeError: type 'bool' is not an acceptable base type - -In the resolved MRO of a class, the class’s bases appear in the order -they were specified in the class’s bases list. Additionally, the MRO -always lists a child class before any of its bases. A class definition -will fail if it is impossible to resolve a consistent MRO that -satisfies these rules from the list of bases provided: - - >>> class Base: pass - >>> class Child(Base): pass - >>> class Grandchild(Base, Child): pass # TypeError - Traceback (most recent call last): - ... - TypeError: Cannot create a consistent method resolution order (MRO) for bases Base, Child - -In the MRO of "Grandchild", "Base" must appear before "Child" because -it is first in the base class list, but it must also appear after -"Child" because it is a parent of "Child". This is a contradiction, so -the class cannot be defined. - -If some of the bases have a custom *metaclass*, the metaclass of the -resulting class is chosen among the metaclasses of the bases and the -explicitly specified metaclass of the child class. It must be a -metaclass that is a subclass of all other candidate metaclasses. If no -such metaclass exists among the candidates, the class cannot be -created, as explained in Determining the appropriate metaclass. - -Finally, the instance layouts of the bases must be compatible. This -means that it must be possible to compute a *solid base* for the -class. Exactly which classes are solid bases depends on the Python -implementation. - -**CPython implementation detail:** In CPython, a class is a solid base -if it has a nonempty "__slots__" definition. Many but not all classes -defined in C are also solid bases, including most builtins (such as -"int" or "BaseException") but excluding most concrete "Exception" -classes. Generally, a C class is a solid base if its underlying struct -is different in size from its base class. - -Every class has a solid base. "object", the base class, has itself as -its solid base. If there is a single base, the child class’s solid -base is that class if it is a solid base, or else the base class’s -solid base. If there are multiple bases, we first find the solid base -for each base class to produce a list of candidate solid bases. If -there is a unique solid base that is a subclass of all others, then -that class is the solid base. Otherwise, class creation fails. - -Example: - - >>> class Solid1: - ... __slots__ = ("solid1",) - >>> - >>> class Solid2: - ... __slots__ = ("solid2",) - >>> - >>> class SolidChild(Solid1): - ... __slots__ = ("solid_child",) - >>> - >>> class C1: # solid base is `object` - ... pass - >>> - >>> # OK: solid bases are `Solid1` and `object`, and `Solid1` is a subclass of `object`. - >>> class C2(Solid1, C1): # solid base is `Solid1` - ... pass - >>> - >>> # OK: solid bases are `SolidChild` and `Solid1`, and `SolidChild` is a subclass of `Solid1`. - >>> class C3(SolidChild, Solid1): # solid base is `SolidChild` - ... pass - >>> - >>> # Error: solid bases are `Solid1` and `Solid2`, but neither is a subclass of the other. - >>> class C4(Solid1, Solid2): # error: no single solid base - ... pass - Traceback (most recent call last): - ... - TypeError: multiple bases have instance lay-out conflict - - Coroutines ========== @@ -3926,7 +3654,7 @@ def f() -> annotation: ... introspects and uses the annotations (such as "dataclasses" or "functools.singledispatch()"). -By default, annotations are lazily evaluated in an annotation scope. +By default, annotations are lazily evaluated in a annotation scope. This means that they are not evaluated when the code containing the annotation is evaluated. Instead, the interpreter saves information that can be used to evaluate the annotation later if requested. The @@ -3940,12 +3668,6 @@ def f() -> annotation: ... >>> f.__annotations__ {'param': 'annotation'} -This future statement will be deprecated and removed in a future -version of Python, but not before Python 3.13 reaches its end of life -(see **PEP 749**). When it is used, introspection tools like -"annotationlib.get_annotations()" and "typing.get_type_hints()" are -less likely to be able to resolve annotations at runtime. - -[ Footnotes ]- [1] The exception is propagated to the invocation stack unless there @@ -4497,7 +4219,7 @@ def double(x): You can also invoke "pdb" from the command line to debug other scripts. For example: - python -m pdb [-c command] (-m module | -p pid | pyfile) [args ...] + python -m pdb [-c command] (-m module | pyfile) [args ...] When invoked as a module, pdb will automatically enter post-mortem debugging if the program being debugged exits abnormally. After post- @@ -4521,23 +4243,6 @@ def double(x): Changed in version 3.7: Added the "-m" option. --p, --pid - - Attach to the process with the specified PID. - - Added in version 3.14. - -To attach to a running Python process for remote debugging, use the -"-p" or "--pid" option with the target process’s PID: - - python -m pdb -p 1234 - -Note: - - Attaching to a process that is blocked in a system call or waiting - for I/O will only work once the next bytecode instruction is - executed or when the process receives a signal. - Typical usage to execute a statement under control of the debugger is: >>> import pdb @@ -5759,7 +5464,7 @@ class of the instance or a *non-virtual base class* thereof. The 'exprlists': r'''Expression lists **************** - starred_expression: "*" or_expr | expression + starred_expression: ["*"] or_expr flexible_expression: assignment_expression | starred_expression flexible_expression_list: flexible_expression ("," flexible_expression)* [","] starred_expression_list: starred_expression ("," starred_expression)* [","] @@ -5791,54 +5496,25 @@ class of the instance or a *non-virtual base class* thereof. The 'floating': r'''Floating-point literals *********************** -Floating-point (float) literals, such as "3.14" or "1.5", denote -approximations of real numbers. - -They consist of *integer* and *fraction* parts, each composed of -decimal digits. The parts are separated by a decimal point, ".": - - 2.71828 - 4.0 - -Unlike in integer literals, leading zeros are allowed in the numeric -parts. For example, "077.010" is legal, and denotes the same number as -"77.10". - -As in integer literals, single underscores may occur between digits to -help readability: - - 96_485.332_123 - 3.14_15_93 - -Either of these parts, but not both, can be empty. For example: - - 10. # (equivalent to 10.0) - .001 # (equivalent to 0.001) - -Optionally, the integer and fraction may be followed by an *exponent*: -the letter "e" or "E", followed by an optional sign, "+" or "-", and a -number in the same format as the integer and fraction parts. The "e" -or "E" represents “times ten raised to the power of”: - - 1.0e3 # (represents 1.0×10³, or 1000.0) - 1.166e-5 # (represents 1.166×10⁻⁵, or 0.00001166) - 6.02214076e+23 # (represents 6.02214076×10²³, or 602214076000000000000000.) +Floating-point literals are described by the following lexical +definitions: -In floats with only integer and exponent parts, the decimal point may -be omitted: + floatnumber: pointfloat | exponentfloat + pointfloat: [digitpart] fraction | digitpart "." + exponentfloat: (digitpart | pointfloat) exponent + digitpart: digit (["_"] digit)* + fraction: "." digitpart + exponent: ("e" | "E") ["+" | "-"] digitpart - 1e3 # (equivalent to 1.e3 and 1.0e3) - 0e0 # (equivalent to 0.) +Note that the integer and exponent parts are always interpreted using +radix 10. For example, "077e010" is legal, and denotes the same number +as "77e10". The allowed range of floating-point literals is +implementation-dependent. As in integer literals, underscores are +supported for digit grouping. -Formally, floating-point literals are described by the following -lexical definitions: +Some examples of floating-point literals: - floatnumber: - | digitpart "." [digitpart] [exponent] - | "." digitpart [exponent] - | digitpart exponent - digitpart: digit (["_"] digit)* - exponent: ("e" | "E") ["+" | "-"] digitpart + 3.14 10. .001 1e100 3.14e-10 0e0 3.14_15_93 Changed in version 3.6: Underscores are now allowed for grouping purposes in literals. @@ -5849,16 +5525,16 @@ class of the instance or a *non-virtual base class* thereof. The The "for" statement is used to iterate over the elements of a sequence (such as a string, tuple or list) or other iterable object: - for_stmt: "for" target_list "in" starred_expression_list ":" suite + for_stmt: "for" target_list "in" starred_list ":" suite ["else" ":" suite] -The "starred_expression_list" expression is evaluated once; it should -yield an *iterable* object. An *iterator* is created for that -iterable. The first item provided by the iterator is then assigned to -the target list using the standard rules for assignments (see -Assignment statements), and the suite is executed. This repeats for -each item provided by the iterator. When the iterator is exhausted, -the suite in the "else" clause, if present, is executed, and the loop +The "starred_list" expression is evaluated once; it should yield an +*iterable* object. An *iterator* is created for that iterable. The +first item provided by the iterator is then assigned to the target +list using the standard rules for assignments (see Assignment +statements), and the suite is executed. This repeats for each item +provided by the iterator. When the iterator is exhausted, the suite +in the "else" clause, if present, is executed, and the loop terminates. A "break" statement executed in the first suite terminates the loop @@ -5892,9 +5568,9 @@ class of the instance or a *non-virtual base class* thereof. The The "str.format()" method and the "Formatter" class share the same syntax for format strings (although in the case of "Formatter", subclasses can define their own format string syntax). The syntax is -related to that of formatted string literals and template string -literals, but it is less sophisticated and, in particular, does not -support arbitrary expressions in interpolations. +related to that of formatted string literals, but it is less +sophisticated and, in particular, does not support arbitrary +expressions. Format strings contain “replacement fields” surrounded by curly braces "{}". Anything that is not contained in braces is considered literal @@ -5994,9 +5670,9 @@ class of the instance or a *non-virtual base class* thereof. The “Format specifications” are used within replacement fields contained within a format string to define how individual values are presented -(see Format String Syntax, f-strings, and t-strings). They can also be -passed directly to the built-in "format()" function. Each formattable -type may define how the format specification is to be interpreted. +(see Format String Syntax and f-strings). They can also be passed +directly to the built-in "format()" function. Each formattable type +may define how the format specification is to be interpreted. Most built-in types implement the following options for format specifications, although some of the formatting options are only @@ -6015,7 +5691,7 @@ class of the instance or a *non-virtual base class* thereof. The sign: "+" | "-" | " " width_and_precision: [width_with_grouping][precision_with_grouping] width_with_grouping: [width][grouping] - precision_with_grouping: "." [precision][grouping] | "." grouping + precision_with_grouping: "." [precision][grouping] width: digit+ precision: digit+ grouping: "," | "_" @@ -6653,92 +6329,73 @@ class body. A "SyntaxError" is raised if a variable is used or to help avoid name clashes between “private” attributes of base and derived classes. See section Identifiers (Names). ''', - 'identifiers': r'''Names (identifiers and keywords) -******************************** - -"NAME" tokens represent *identifiers*, *keywords*, and *soft -keywords*. - -Within the ASCII range (U+0001..U+007F), the valid characters for -names include the uppercase and lowercase letters ("A-Z" and "a-z"), -the underscore "_" and, except for the first character, the digits "0" -through "9". - -Names must contain at least one character, but have no upper length -limit. Case is significant. - -Besides "A-Z", "a-z", "_" and "0-9", names can also use “letter-like” -and “number-like” characters from outside the ASCII range, as detailed -below. - -All identifiers are converted into the normalization form NFKC while -parsing; comparison of identifiers is based on NFKC. + 'identifiers': r'''Identifiers and keywords +************************ -Formally, the first character of a normalized identifier must belong -to the set "id_start", which is the union of: +Identifiers (also referred to as *names*) are described by the +following lexical definitions. -* Unicode category "" - uppercase letters (includes "A" to "Z") +The syntax of identifiers in Python is based on the Unicode standard +annex UAX-31, with elaboration and changes as defined below; see also +**PEP 3131** for further details. -* Unicode category "" - lowercase letters (includes "a" to "z") +Within the ASCII range (U+0001..U+007F), the valid characters for +identifiers include the uppercase and lowercase letters "A" through +"Z", the underscore "_" and, except for the first character, the +digits "0" through "9". Python 3.0 introduced additional characters +from outside the ASCII range (see **PEP 3131**). For these +characters, the classification uses the version of the Unicode +Character Database as included in the "unicodedata" module. -* Unicode category "" - titlecase letters +Identifiers are unlimited in length. Case is significant. -* Unicode category "" - modifier letters + identifier: xid_start xid_continue* + id_start: + id_continue: + xid_start: + xid_continue: -* Unicode category "" - other letters +The Unicode category codes mentioned above stand for: -* Unicode category "" - letter numbers +* *Lu* - uppercase letters -* {""_""} - the underscore +* *Ll* - lowercase letters -* "" - an explicit set of characters in PropList.txt - to support backwards compatibility +* *Lt* - titlecase letters -The remaining characters must belong to the set "id_continue", which -is the union of: +* *Lm* - modifier letters -* all characters in "id_start" +* *Lo* - other letters -* Unicode category "" - decimal numbers (includes "0" to "9") +* *Nl* - letter numbers -* Unicode category "" - connector punctuations +* *Mn* - nonspacing marks -* Unicode category "" - nonspacing marks +* *Mc* - spacing combining marks -* Unicode category "" - spacing combining marks +* *Nd* - decimal numbers -* "" - another explicit set of characters in - PropList.txt to support backwards compatibility +* *Pc* - connector punctuations -Unicode categories use the version of the Unicode Character Database -as included in the "unicodedata" module. +* *Other_ID_Start* - explicit list of characters in PropList.txt to + support backwards compatibility -These sets are based on the Unicode standard annex UAX-31. See also -**PEP 3131** for further details. +* *Other_ID_Continue* - likewise -Even more formally, names are described by the following lexical -definitions: - - NAME: xid_start xid_continue* - id_start: | | | | | | "_" | - id_continue: id_start | | | | | - xid_start: - xid_continue: - identifier: +All identifiers are converted into the normal form NFKC while parsing; +comparison of identifiers is based on NFKC. -A non-normative listing of all valid identifier characters as defined -by Unicode is available in the DerivedCoreProperties.txt file in the -Unicode Character Database. +A non-normative HTML file listing all valid identifier characters for +Unicode 16.0.0 can be found at +https://www.unicode.org/Public/16.0.0/ucd/DerivedCoreProperties.txt Keywords ======== -The following names are used as reserved words, or *keywords* of the -language, and cannot be used as ordinary identifiers. They must be -spelled exactly as written here: +The following identifiers are used as reserved words, or *keywords* of +the language, and cannot be used as ordinary identifiers. They must +be spelled exactly as written here: False await else import pass None break except in raise @@ -6754,20 +6411,18 @@ class body. A "SyntaxError" is raised if a variable is used or Added in version 3.10. -Some names are only reserved under specific contexts. These are known -as *soft keywords*: - -* "match", "case", and "_", when used in the "match" statement. - -* "type", when used in the "type" statement. - -These syntactically act as keywords in their specific contexts, but -this distinction is done at the parser level, not when tokenizing. +Some identifiers are only reserved under specific contexts. These are +known as *soft keywords*. The identifiers "match", "case", "type" and +"_" can syntactically act as keywords in certain contexts, but this +distinction is done at the parser level, not when tokenizing. As soft keywords, their use in the grammar is possible while still preserving compatibility with existing code that uses these names as identifier names. +"match", "case", and "_" are used in the "match" statement. "type" is +used in the "type" statement. + Changed in version 3.12: "type" is now a soft keyword. @@ -6833,53 +6488,17 @@ class body. A "SyntaxError" is raised if a variable is used or 'imaginary': r'''Imaginary literals ****************** -Python has complex number objects, but no complex literals. Instead, -*imaginary literals* denote complex numbers with a zero real part. - -For example, in math, the complex number 3+4.2*i* is written as the -real number 3 added to the imaginary number 4.2*i*. Python uses a -similar syntax, except the imaginary unit is written as "j" rather -than *i*: - - 3+4.2j - -This is an expression composed of the integer literal "3", the -operator ‘"+"’, and the imaginary literal "4.2j". Since these are -three separate tokens, whitespace is allowed between them: +Imaginary literals are described by the following lexical definitions: - 3 + 4.2j - -No whitespace is allowed *within* each token. In particular, the "j" -suffix, may not be separated from the number before it. - -The number before the "j" has the same syntax as a floating-point -literal. Thus, the following are valid imaginary literals: - - 4.2j - 3.14j - 10.j - .001j - 1e100j - 3.14e-10j - 3.14_15_93j - -Unlike in a floating-point literal the decimal point can be omitted if -the imaginary number only has an integer part. The number is still -evaluated as a floating-point number, not an integer: - - 10j - 0j - 1000000000000000000000000j # equivalent to 1e+24j - -The "j" suffix is case-insensitive. That means you can use "J" -instead: - - 3.14J # equivalent to 3.14j + imagnumber: (floatnumber | digitpart) ("j" | "J") -Formally, imaginary literals are described by the following lexical -definition: +An imaginary literal yields a complex number with a real part of 0.0. +Complex numbers are represented as a pair of floating-point numbers +and have the same restrictions on their range. To create a complex +number with a nonzero real part, add a floating-point number to it, +e.g., "(3+4j)". Some examples of imaginary literals: - imagnumber: (floatnumber | digitpart) ("j" | "J") + 3.14j 10.j 10j .001j 1e100j 3.14e-10j 3.14_15_93j ''', 'import': r'''The "import" statement ********************** @@ -7121,62 +6740,37 @@ class body. A "SyntaxError" is raised if a variable is used or 'integers': r'''Integer literals **************** -Integer literals denote whole numbers. For example: - - 7 - 3 - 2147483647 - -There is no limit for the length of integer literals apart from what -can be stored in available memory: - - 7922816251426433759354395033679228162514264337593543950336 - -Underscores can be used to group digits for enhanced readability, and -are ignored for determining the numeric value of the literal. For -example, the following literals are equivalent: - - 100_000_000_000 - 100000000000 - 1_00_00_00_00_000 - -Underscores can only occur between digits. For example, "_123", -"321_", and "123__321" are *not* valid literals. - -Integers can be specified in binary (base 2), octal (base 8), or -hexadecimal (base 16) using the prefixes "0b", "0o" and "0x", -respectively. Hexadecimal digits 10 through 15 are represented by -letters "A"-"F", case-insensitive. For example: - - 0b100110111 - 0b_1110_0101 - 0o177 - 0o377 - 0xdeadbeef - 0xDead_Beef +Integer literals are described by the following lexical definitions: -An underscore can follow the base specifier. For example, "0x_1f" is a -valid literal, but "0_x1f" and "0x__1f" are not. - -Leading zeros in a non-zero decimal number are not allowed. For -example, "0123" is not a valid literal. This is for disambiguation -with C-style octal literals, which Python used before version 3.0. - -Formally, integer literals are described by the following lexical -definitions: - - integer: decinteger | bininteger | octinteger | hexinteger | zerointeger - decinteger: nonzerodigit (["_"] digit)* + integer: decinteger | bininteger | octinteger | hexinteger + decinteger: nonzerodigit (["_"] digit)* | "0"+ (["_"] "0")* bininteger: "0" ("b" | "B") (["_"] bindigit)+ octinteger: "0" ("o" | "O") (["_"] octdigit)+ hexinteger: "0" ("x" | "X") (["_"] hexdigit)+ - zerointeger: "0"+ (["_"] "0")* nonzerodigit: "1"..."9" digit: "0"..."9" bindigit: "0" | "1" octdigit: "0"..."7" hexdigit: digit | "a"..."f" | "A"..."F" +There is no limit for the length of integer literals apart from what +can be stored in available memory. + +Underscores are ignored for determining the numeric value of the +literal. They can be used to group digits for enhanced readability. +One underscore can occur between digits, and after base specifiers +like "0x". + +Note that leading zeros in a non-zero decimal number are not allowed. +This is for disambiguation with C-style octal literals, which Python +used before version 3.0. + +Some examples of integer literals: + + 7 2147483647 0o177 0b100110111 + 3 79228162514264337593543950336 0o377 0xdeadbeef + 100_000_000_000 0b_1110_0101 + Changed in version 3.6: Underscores are now allowed for grouping purposes in literals. ''', @@ -7692,190 +7286,14 @@ class body. A "SyntaxError" is raised if a variable is used or 'numbers': r'''Numeric literals **************** -"NUMBER" tokens represent numeric literals, of which there are three -types: integers, floating-point numbers, and imaginary numbers. - - NUMBER: integer | floatnumber | imagnumber - -The numeric value of a numeric literal is the same as if it were -passed as a string to the "int", "float" or "complex" class -constructor, respectively. Note that not all valid inputs for those -constructors are also valid literals. - -Numeric literals do not include a sign; a phrase like "-1" is actually -an expression composed of the unary operator ‘"-"’ and the literal -"1". - - -Integer literals -================ - -Integer literals denote whole numbers. For example: - - 7 - 3 - 2147483647 - -There is no limit for the length of integer literals apart from what -can be stored in available memory: - - 7922816251426433759354395033679228162514264337593543950336 - -Underscores can be used to group digits for enhanced readability, and -are ignored for determining the numeric value of the literal. For -example, the following literals are equivalent: - - 100_000_000_000 - 100000000000 - 1_00_00_00_00_000 - -Underscores can only occur between digits. For example, "_123", -"321_", and "123__321" are *not* valid literals. - -Integers can be specified in binary (base 2), octal (base 8), or -hexadecimal (base 16) using the prefixes "0b", "0o" and "0x", -respectively. Hexadecimal digits 10 through 15 are represented by -letters "A"-"F", case-insensitive. For example: - - 0b100110111 - 0b_1110_0101 - 0o177 - 0o377 - 0xdeadbeef - 0xDead_Beef - -An underscore can follow the base specifier. For example, "0x_1f" is a -valid literal, but "0_x1f" and "0x__1f" are not. - -Leading zeros in a non-zero decimal number are not allowed. For -example, "0123" is not a valid literal. This is for disambiguation -with C-style octal literals, which Python used before version 3.0. - -Formally, integer literals are described by the following lexical -definitions: - - integer: decinteger | bininteger | octinteger | hexinteger | zerointeger - decinteger: nonzerodigit (["_"] digit)* - bininteger: "0" ("b" | "B") (["_"] bindigit)+ - octinteger: "0" ("o" | "O") (["_"] octdigit)+ - hexinteger: "0" ("x" | "X") (["_"] hexdigit)+ - zerointeger: "0"+ (["_"] "0")* - nonzerodigit: "1"..."9" - digit: "0"..."9" - bindigit: "0" | "1" - octdigit: "0"..."7" - hexdigit: digit | "a"..."f" | "A"..."F" - -Changed in version 3.6: Underscores are now allowed for grouping -purposes in literals. - - -Floating-point literals -======================= - -Floating-point (float) literals, such as "3.14" or "1.5", denote -approximations of real numbers. - -They consist of *integer* and *fraction* parts, each composed of -decimal digits. The parts are separated by a decimal point, ".": - - 2.71828 - 4.0 - -Unlike in integer literals, leading zeros are allowed in the numeric -parts. For example, "077.010" is legal, and denotes the same number as -"77.10". - -As in integer literals, single underscores may occur between digits to -help readability: - - 96_485.332_123 - 3.14_15_93 - -Either of these parts, but not both, can be empty. For example: - - 10. # (equivalent to 10.0) - .001 # (equivalent to 0.001) - -Optionally, the integer and fraction may be followed by an *exponent*: -the letter "e" or "E", followed by an optional sign, "+" or "-", and a -number in the same format as the integer and fraction parts. The "e" -or "E" represents “times ten raised to the power of”: - - 1.0e3 # (represents 1.0×10³, or 1000.0) - 1.166e-5 # (represents 1.166×10⁻⁵, or 0.00001166) - 6.02214076e+23 # (represents 6.02214076×10²³, or 602214076000000000000000.) - -In floats with only integer and exponent parts, the decimal point may -be omitted: - - 1e3 # (equivalent to 1.e3 and 1.0e3) - 0e0 # (equivalent to 0.) - -Formally, floating-point literals are described by the following -lexical definitions: - - floatnumber: - | digitpart "." [digitpart] [exponent] - | "." digitpart [exponent] - | digitpart exponent - digitpart: digit (["_"] digit)* - exponent: ("e" | "E") ["+" | "-"] digitpart - -Changed in version 3.6: Underscores are now allowed for grouping -purposes in literals. - - -Imaginary literals -================== - -Python has complex number objects, but no complex literals. Instead, -*imaginary literals* denote complex numbers with a zero real part. - -For example, in math, the complex number 3+4.2*i* is written as the -real number 3 added to the imaginary number 4.2*i*. Python uses a -similar syntax, except the imaginary unit is written as "j" rather -than *i*: +There are three types of numeric literals: integers, floating-point +numbers, and imaginary numbers. There are no complex literals +(complex numbers can be formed by adding a real number and an +imaginary number). - 3+4.2j - -This is an expression composed of the integer literal "3", the -operator ‘"+"’, and the imaginary literal "4.2j". Since these are -three separate tokens, whitespace is allowed between them: - - 3 + 4.2j - -No whitespace is allowed *within* each token. In particular, the "j" -suffix, may not be separated from the number before it. - -The number before the "j" has the same syntax as a floating-point -literal. Thus, the following are valid imaginary literals: - - 4.2j - 3.14j - 10.j - .001j - 1e100j - 3.14e-10j - 3.14_15_93j - -Unlike in a floating-point literal the decimal point can be omitted if -the imaginary number only has an integer part. The number is still -evaluated as a floating-point number, not an integer: - - 10j - 0j - 1000000000000000000000000j # equivalent to 1e+24j - -The "j" suffix is case-insensitive. That means you can use "J" -instead: - - 3.14J # equivalent to 3.14j - -Formally, imaginary literals are described by the following lexical -definition: - - imagnumber: (floatnumber | digitpart) ("j" | "J") +Note that numeric literals do not include a sign; a phrase like "-1" +is actually an expression composed of the unary operator ‘"-"’ and the +literal "1". ''', 'numeric-types': r'''Emulating numeric types *********************** @@ -7945,9 +7363,9 @@ class that has an "__rsub__()" method, "type(y).__rsub__(y, x)" is third argument if the three-argument version of the built-in "pow()" function is to be supported. - Changed in version 3.14: Three-argument "pow()" now try calling - "__rpow__()" if necessary. Previously it was only called in two- - argument "pow()" and the binary power operator. + Changed in version 3.14.0a7 (unreleased): Three-argument "pow()" + now try calling "__rpow__()" if necessary. Previously it was only + called in two-argument "pow()" and the binary power operator. Note: @@ -9983,9 +9401,9 @@ class that has an "__rsub__()" method, "type(y).__rsub__(y, x)" is third argument if the three-argument version of the built-in "pow()" function is to be supported. - Changed in version 3.14: Three-argument "pow()" now try calling - "__rpow__()" if necessary. Previously it was only called in two- - argument "pow()" and the binary power operator. + Changed in version 3.14.0a7 (unreleased): Three-argument "pow()" + now try calling "__rpow__()" if necessary. Previously it was only + called in two-argument "pow()" and the binary power operator. Note: @@ -10353,14 +9771,7 @@ class is used in a class pattern with positional arguments, each Return centered in a string of length *width*. Padding is done using the specified *fillchar* (default is an ASCII space). The original string is returned if *width* is less than or equal to - "len(s)". For example: - - >>> 'Python'.center(10) - ' Python ' - >>> 'Python'.center(10, '-') - '--Python--' - >>> 'Python'.center(4) - 'Python' + "len(s)". str.count(sub[, start[, end]]) @@ -10369,18 +9780,7 @@ class is used in a class pattern with positional arguments, each *end* are interpreted as in slice notation. If *sub* is empty, returns the number of empty strings between - characters which is the length of the string plus one. For example: - - >>> 'spam, spam, spam'.count('spam') - 3 - >>> 'spam, spam, spam'.count('spam', 5) - 2 - >>> 'spam, spam, spam'.count('spam', 5, 10) - 1 - >>> 'spam, spam, spam'.count('eggs') - 0 - >>> 'spam, spam, spam'.count('') - 17 + characters which is the length of the string plus one. str.encode(encoding='utf-8', errors='strict') @@ -10397,13 +9797,7 @@ class is used in a class pattern with positional arguments, each For performance reasons, the value of *errors* is not checked for validity unless an encoding error actually occurs, Python - Development Mode is enabled or a debug build is used. For example: - - >>> encoded_str_to_bytes = 'Python'.encode() - >>> type(encoded_str_to_bytes) - - >>> encoded_str_to_bytes - b'Python' + Development Mode is enabled or a debug build is used. Changed in version 3.1: Added support for keyword arguments. @@ -10416,19 +9810,6 @@ class is used in a class pattern with positional arguments, each otherwise return "False". *suffix* can also be a tuple of suffixes to look for. With optional *start*, test beginning at that position. With optional *end*, stop comparing at that position. - Using *start* and *end* is equivalent to - "str[start:end].endswith(suffix)". For example: - - >>> 'Python'.endswith('on') - True - >>> 'a tuple of suffixes'.endswith(('at', 'in')) - False - >>> 'a tuple of suffixes'.endswith(('at', 'es')) - True - >>> 'Python is amazing'.endswith('is', 0, 9) - True - - See also "startswith()" and "removesuffix()". str.expandtabs(tabsize=8) @@ -10444,15 +9825,12 @@ class is used in a class pattern with positional arguments, each ("\n") or return ("\r"), it is copied and the current column is reset to zero. Any other character is copied unchanged and the current column is incremented by one regardless of how the - character is represented when printed. For example: + character is represented when printed. - >>> '01\t012\t0123\t01234'.expandtabs() - '01 012 0123 01234' - >>> '01\t012\t0123\t01234'.expandtabs(4) - '01 012 0123 01234' - >>> print('01\t012\n0123\t01234'.expandtabs(4)) - 01 012 - 0123 01234 + >>> '01\t012\t0123\t01234'.expandtabs() + '01 012 0123 01234' + >>> '01\t012\t0123\t01234'.expandtabs(4) + '01 012 0123 01234' str.find(sub[, start[, end]]) @@ -10567,7 +9945,7 @@ class is used in a class pattern with positional arguments, each str.isidentifier() Return "True" if the string is a valid identifier according to the - language definition, section Names (identifiers and keywords). + language definition, section Identifiers and keywords. "keyword.iskeyword()" can be used to test whether string "s" is a reserved identifier, such as "def" and "class". @@ -10599,8 +9977,8 @@ class is used in a class pattern with positional arguments, each str.isprintable() - Return "True" if all characters in the string are printable, - "False" if it contains at least one non-printable character. + Return true if all characters in the string are printable, false if + it contains at least one non-printable character. Here “printable” means the character is suitable for "repr()" to use in its output; “non-printable” means that "repr()" on built-in @@ -10847,18 +10225,6 @@ class is used in a class pattern with positional arguments, each >>> ' 1 2 3 '.split() ['1', '2', '3'] - If *sep* is not specified or is "None" and *maxsplit* is "0", only - leading runs of consecutive whitespace are considered. - - For example: - - >>> "".split(None, 0) - [] - >>> " ".split(None, 0) - [] - >>> " foo ".split(maxsplit=0) - ['foo '] - str.splitlines(keepends=False) Return a list of the lines in the string, breaking at line @@ -11037,313 +10403,174 @@ class is used in a class pattern with positional arguments, each 'strings': '''String and Bytes literals ************************* -String literals are text enclosed in single quotes ("'") or double -quotes ("""). For example: - - "spam" - 'eggs' - -The quote used to start the literal also terminates it, so a string -literal can only contain the other quote (except with escape -sequences, see below). For example: - - 'Say "Hello", please.' - "Don't do that!" - -Except for this limitation, the choice of quote character ("'" or """) -does not affect how the literal is parsed. - -Inside a string literal, the backslash ("\\") character introduces an -*escape sequence*, which has special meaning depending on the -character after the backslash. For example, "\\"" denotes the double -quote character, and does *not* end the string: - - >>> print("Say \\"Hello\\" to everyone!") - Say "Hello" to everyone! - -See escape sequences below for a full list of such sequences, and more -details. - - -Triple-quoted strings -===================== - -Strings can also be enclosed in matching groups of three single or -double quotes. These are generally referred to as *triple-quoted -strings*: - - """This is a triple-quoted string.""" - -In triple-quoted literals, unescaped quotes are allowed (and are -retained), except that three unescaped quotes in a row terminate the -literal, if they are of the same kind ("'" or """) used at the start: - - """This string has "quotes" inside.""" - -Unescaped newlines are also allowed and retained: - - \'\'\'This triple-quoted string - continues on the next line.\'\'\' - - -String prefixes -=============== - -String literals can have an optional *prefix* that influences how the -content of the literal is parsed, for example: - - b"data" - f'{result=}' - -The allowed prefixes are: - -* "b": Bytes literal - -* "r": Raw string - -* "f": Formatted string literal (“f-string”) - -* "t": Template string literal (“t-string”) - -* "u": No effect (allowed for backwards compatibility) - -See the linked sections for details on each type. +String literals are described by the following lexical definitions: + + stringliteral: [stringprefix](shortstring | longstring) + stringprefix: "r" | "u" | "R" | "U" | "f" | "F" + | "fr" | "Fr" | "fR" | "FR" | "rf" | "rF" | "Rf" | "RF" + shortstring: "'" shortstringitem* "'" | '"' shortstringitem* '"' + longstring: "\'\'\'" longstringitem* "\'\'\'" | '"""' longstringitem* '"""' + shortstringitem: shortstringchar | stringescapeseq + longstringitem: longstringchar | stringescapeseq + shortstringchar: + longstringchar: + stringescapeseq: "\\" + + bytesliteral: bytesprefix(shortbytes | longbytes) + bytesprefix: "b" | "B" | "br" | "Br" | "bR" | "BR" | "rb" | "rB" | "Rb" | "RB" + shortbytes: "'" shortbytesitem* "'" | '"' shortbytesitem* '"' + longbytes: "\'\'\'" longbytesitem* "\'\'\'" | '"""' longbytesitem* '"""' + shortbytesitem: shortbyteschar | bytesescapeseq + longbytesitem: longbyteschar | bytesescapeseq + shortbyteschar: + longbyteschar: + bytesescapeseq: "\\" + +One syntactic restriction not indicated by these productions is that +whitespace is not allowed between the "stringprefix" or "bytesprefix" +and the rest of the literal. The source character set is defined by +the encoding declaration; it is UTF-8 if no encoding declaration is +given in the source file; see section Encoding declarations. + +In plain English: Both types of literals can be enclosed in matching +single quotes ("'") or double quotes ("""). They can also be enclosed +in matching groups of three single or double quotes (these are +generally referred to as *triple-quoted strings*). The backslash ("\\") +character is used to give special meaning to otherwise ordinary +characters like "n", which means ‘newline’ when escaped ("\\n"). It can +also be used to escape characters that otherwise have a special +meaning, such as newline, backslash itself, or the quote character. +See escape sequences below for examples. + +Bytes literals are always prefixed with "'b'" or "'B'"; they produce +an instance of the "bytes" type instead of the "str" type. They may +only contain ASCII characters; bytes with a numeric value of 128 or +greater must be expressed with escapes. -Prefixes are case-insensitive (for example, ‘"B"’ works the same as -‘"b"’). The ‘"r"’ prefix can be combined with ‘"f"’, ‘"t"’ or ‘"b"’, -so ‘"fr"’, ‘"rf"’, ‘"tr"’, ‘"rt"’, ‘"br"’, and ‘"rb"’ are also valid -prefixes. +Both string and bytes literals may optionally be prefixed with a +letter "'r'" or "'R'"; such constructs are called *raw string +literals* and *raw bytes literals* respectively and treat backslashes +as literal characters. As a result, in raw string literals, "'\\U'" +and "'\\u'" escapes are not treated specially. Added in version 3.3: The "'rb'" prefix of raw bytes literals has been added as a synonym of "'br'".Support for the unicode legacy literal ("u'value'") was reintroduced to simplify the maintenance of dual Python 2.x and 3.x codebases. See **PEP 414** for more information. +A string literal with "'f'" or "'F'" in its prefix is a *formatted +string literal*; see f-strings. The "'f'" may be combined with "'r'", +but not with "'b'" or "'u'", therefore raw formatted strings are +possible, but formatted bytes literals are not. -Formal grammar -============== - -String literals, except “f-strings” and “t-strings”, are described by -the following lexical definitions. - -These definitions use negative lookaheads ("!") to indicate that an -ending quote ends the literal. - - STRING: [stringprefix] (stringcontent) - stringprefix: <("r" | "u" | "b" | "br" | "rb"), case-insensitive> - stringcontent: - | "'" ( !"'" stringitem)* "'" - | '"' ( !'"' stringitem)* '"' - | "\'\'\'" ( !"\'\'\'" longstringitem)* "\'\'\'" - | '"""' ( !'"""' longstringitem)* '"""' - stringitem: stringchar | stringescapeseq - stringchar: - longstringitem: stringitem | newline - stringescapeseq: "\\" - -Note that as in all lexical definitions, whitespace is significant. In -particular, the prefix (if any) must be immediately followed by the -starting quote. +In triple-quoted literals, unescaped newlines and quotes are allowed +(and are retained), except that three unescaped quotes in a row +terminate the literal. (A “quote” is the character used to open the +literal, i.e. either "'" or """.) Escape sequences ================ -Unless an ‘"r"’ or ‘"R"’ prefix is present, escape sequences in string +Unless an "'r'" or "'R'" prefix is present, escape sequences in string and bytes literals are interpreted according to rules similar to those used by Standard C. The recognized escape sequences are: -+----------------------------------------------------+----------------------------------------------------+ -| Escape Sequence | Meaning | -|====================================================|====================================================| -| "\\" | Ignored end of line | -+----------------------------------------------------+----------------------------------------------------+ -| "\\\\" | Backslash | -+----------------------------------------------------+----------------------------------------------------+ -| "\\'" | Single quote | -+----------------------------------------------------+----------------------------------------------------+ -| "\\"" | Double quote | -+----------------------------------------------------+----------------------------------------------------+ -| "\\a" | ASCII Bell (BEL) | -+----------------------------------------------------+----------------------------------------------------+ -| "\\b" | ASCII Backspace (BS) | -+----------------------------------------------------+----------------------------------------------------+ -| "\\f" | ASCII Formfeed (FF) | -+----------------------------------------------------+----------------------------------------------------+ -| "\\n" | ASCII Linefeed (LF) | -+----------------------------------------------------+----------------------------------------------------+ -| "\\r" | ASCII Carriage Return (CR) | -+----------------------------------------------------+----------------------------------------------------+ -| "\\t" | ASCII Horizontal Tab (TAB) | -+----------------------------------------------------+----------------------------------------------------+ -| "\\v" | ASCII Vertical Tab (VT) | -+----------------------------------------------------+----------------------------------------------------+ -| "\\*ooo*" | Octal character | -+----------------------------------------------------+----------------------------------------------------+ -| "\\x*hh*" | Hexadecimal character | -+----------------------------------------------------+----------------------------------------------------+ -| "\\N{*name*}" | Named Unicode character | -+----------------------------------------------------+----------------------------------------------------+ -| "\\u*xxxx*" | Hexadecimal Unicode character | -+----------------------------------------------------+----------------------------------------------------+ -| "\\U*xxxxxxxx*" | Hexadecimal Unicode character | -+----------------------------------------------------+----------------------------------------------------+ - - -Ignored end of line -------------------- - -A backslash can be added at the end of a line to ignore the newline: - - >>> 'This string will not include \\ - ... backslashes or newline characters.' - 'This string will not include backslashes or newline characters.' - -The same result can be achieved using triple-quoted strings, or -parentheses and string literal concatenation. - - -Escaped characters ------------------- - -To include a backslash in a non-raw Python string literal, it must be -doubled. The "\\\\" escape sequence denotes a single backslash -character: - - >>> print('C:\\\\Program Files') - C:\\Program Files - -Similarly, the "\\'" and "\\"" sequences denote the single and double -quote character, respectively: - - >>> print('\\' and \\"') - ' and " - - -Octal character ---------------- - -The sequence "\\*ooo*" denotes a *character* with the octal (base 8) -value *ooo*: - - >>> '\\120' - 'P' - -Up to three octal digits (0 through 7) are accepted. - -In a bytes literal, *character* means a *byte* with the given value. -In a string literal, it means a Unicode character with the given -value. - -Changed in version 3.11: Octal escapes with value larger than "0o377" -(255) produce a "DeprecationWarning". ++---------------------------+-----------------------------------+---------+ +| Escape Sequence | Meaning | Notes | +|===========================|===================================|=========| +| "\\" | Backslash and newline ignored | (1) | ++---------------------------+-----------------------------------+---------+ +| "\\\\" | Backslash ("\\") | | ++---------------------------+-----------------------------------+---------+ +| "\\'" | Single quote ("'") | | ++---------------------------+-----------------------------------+---------+ +| "\\"" | Double quote (""") | | ++---------------------------+-----------------------------------+---------+ +| "\\a" | ASCII Bell (BEL) | | ++---------------------------+-----------------------------------+---------+ +| "\\b" | ASCII Backspace (BS) | | ++---------------------------+-----------------------------------+---------+ +| "\\f" | ASCII Formfeed (FF) | | ++---------------------------+-----------------------------------+---------+ +| "\\n" | ASCII Linefeed (LF) | | ++---------------------------+-----------------------------------+---------+ +| "\\r" | ASCII Carriage Return (CR) | | ++---------------------------+-----------------------------------+---------+ +| "\\t" | ASCII Horizontal Tab (TAB) | | ++---------------------------+-----------------------------------+---------+ +| "\\v" | ASCII Vertical Tab (VT) | | ++---------------------------+-----------------------------------+---------+ +| "\\*ooo*" | Character with octal value *ooo* | (2,4) | ++---------------------------+-----------------------------------+---------+ +| "\\x*hh*" | Character with hex value *hh* | (3,4) | ++---------------------------+-----------------------------------+---------+ + +Escape sequences only recognized in string literals are: + ++---------------------------+-----------------------------------+---------+ +| Escape Sequence | Meaning | Notes | +|===========================|===================================|=========| +| "\\N{*name*}" | Character named *name* in the | (5) | +| | Unicode database | | ++---------------------------+-----------------------------------+---------+ +| "\\u*xxxx*" | Character with 16-bit hex value | (6) | +| | *xxxx* | | ++---------------------------+-----------------------------------+---------+ +| "\\U*xxxxxxxx*" | Character with 32-bit hex value | (7) | +| | *xxxxxxxx* | | ++---------------------------+-----------------------------------+---------+ -Changed in version 3.12: Octal escapes with value larger than "0o377" -(255) produce a "SyntaxWarning". In a future Python version they will -raise a "SyntaxError". - - -Hexadecimal character ---------------------- - -The sequence "\\x*hh*" denotes a *character* with the hex (base 16) -value *hh*: - - >>> '\\x50' - 'P' - -Unlike in Standard C, exactly two hex digits are required. - -In a bytes literal, *character* means a *byte* with the given value. -In a string literal, it means a Unicode character with the given -value. - - -Named Unicode character ------------------------ - -The sequence "\\N{*name*}" denotes a Unicode character with the given -*name*: +Notes: - >>> '\\N{LATIN CAPITAL LETTER P}' - 'P' - >>> '\\N{SNAKE}' - '🐍' +1. A backslash can be added at the end of a line to ignore the + newline: -This sequence cannot appear in bytes literals. + >>> 'This string will not include \\ + ... backslashes or newline characters.' + 'This string will not include backslashes or newline characters.' -Changed in version 3.3: Support for name aliases has been added. + The same result can be achieved using triple-quoted strings, or + parentheses and string literal concatenation. +2. As in Standard C, up to three octal digits are accepted. -Hexadecimal Unicode characters ------------------------------- + Changed in version 3.11: Octal escapes with value larger than + "0o377" produce a "DeprecationWarning". -These sequences "\\u*xxxx*" and "\\U*xxxxxxxx*" denote the Unicode -character with the given hex (base 16) value. Exactly four digits are -required for "\\u"; exactly eight digits are required for "\\U". The -latter can encode any Unicode character. + Changed in version 3.12: Octal escapes with value larger than + "0o377" produce a "SyntaxWarning". In a future Python version they + will be eventually a "SyntaxError". - >>> '\\u1234' - 'ሴ' - >>> '\\U0001f40d' - '🐍' +3. Unlike in Standard C, exactly two hex digits are required. -These sequences cannot appear in bytes literals. +4. In a bytes literal, hexadecimal and octal escapes denote the byte + with the given value. In a string literal, these escapes denote a + Unicode character with the given value. +5. Changed in version 3.3: Support for name aliases [1] has been + added. -Unrecognized escape sequences ------------------------------ +6. Exactly four hex digits are required. -Unlike in Standard C, all unrecognized escape sequences are left in -the string unchanged, that is, *the backslash is left in the result*: +7. Any Unicode character can be encoded this way. Exactly eight hex + digits are required. - >>> print('\\q') - \\q - >>> list('\\q') - ['\\\\', 'q'] - -Note that for bytes literals, the escape sequences only recognized in -string literals ("\\N...", "\\u...", "\\U...") fall into the category of -unrecognized escapes. +Unlike Standard C, all unrecognized escape sequences are left in the +string unchanged, i.e., *the backslash is left in the result*. (This +behavior is useful when debugging: if an escape sequence is mistyped, +the resulting output is more easily recognized as broken.) It is also +important to note that the escape sequences only recognized in string +literals fall into the category of unrecognized escapes for bytes +literals. Changed in version 3.6: Unrecognized escape sequences produce a "DeprecationWarning". Changed in version 3.12: Unrecognized escape sequences produce a -"SyntaxWarning". In a future Python version they will raise a +"SyntaxWarning". In a future Python version they will be eventually a "SyntaxError". - -Bytes literals -============== - -*Bytes literals* are always prefixed with ‘"b"’ or ‘"B"’; they produce -an instance of the "bytes" type instead of the "str" type. They may -only contain ASCII characters; bytes with a numeric value of 128 or -greater must be expressed with escape sequences (typically Hexadecimal -character or Octal character): - - >>> b'\\x89PNG\\r\\n\\x1a\\n' - b'\\x89PNG\\r\\n\\x1a\\n' - >>> list(b'\\x89PNG\\r\\n\\x1a\\n') - [137, 80, 78, 71, 13, 10, 26, 10] - -Similarly, a zero byte must be expressed using an escape sequence -(typically "\\0" or "\\x00"). - - -Raw string literals -=================== - -Both string and bytes literals may optionally be prefixed with a -letter ‘"r"’ or ‘"R"’; such constructs are called *raw string -literals* and *raw bytes literals* respectively and treat backslashes -as literal characters. As a result, in raw string literals, escape -sequences are not treated specially: - - >>> r'\\d{4}-\\d{2}-\\d{2}' - '\\\\d{4}-\\\\d{2}-\\\\d{2}' - Even in a raw literal, quotes can be escaped with a backslash, but the backslash remains in the result; for example, "r"\\""" is a valid string literal consisting of two characters: a backslash and a double @@ -11353,199 +10580,6 @@ class is used in a class pattern with positional arguments, each the following quote character). Note also that a single backslash followed by a newline is interpreted as those two characters as part of the literal, *not* as a line continuation. - - -f-strings -========= - -Added in version 3.6. - -A *formatted string literal* or *f-string* is a string literal that is -prefixed with ‘"f"’ or ‘"F"’. These strings may contain replacement -fields, which are expressions delimited by curly braces "{}". While -other string literals always have a constant value, formatted strings -are really expressions evaluated at run time. - -Escape sequences are decoded like in ordinary string literals (except -when a literal is also marked as a raw string). After decoding, the -grammar for the contents of the string is: - - f_string: (literal_char | "{{" | "}}" | replacement_field)* - replacement_field: "{" f_expression ["="] ["!" conversion] [":" format_spec] "}" - f_expression: (conditional_expression | "*" or_expr) - ("," conditional_expression | "," "*" or_expr)* [","] - | yield_expression - conversion: "s" | "r" | "a" - format_spec: (literal_char | replacement_field)* - literal_char: - -The parts of the string outside curly braces are treated literally, -except that any doubled curly braces "'{{'" or "'}}'" are replaced -with the corresponding single curly brace. A single opening curly -bracket "'{'" marks a replacement field, which starts with a Python -expression. To display both the expression text and its value after -evaluation, (useful in debugging), an equal sign "'='" may be added -after the expression. A conversion field, introduced by an exclamation -point "'!'" may follow. A format specifier may also be appended, -introduced by a colon "':'". A replacement field ends with a closing -curly bracket "'}'". - -Expressions in formatted string literals are treated like regular -Python expressions surrounded by parentheses, with a few exceptions. -An empty expression is not allowed, and both "lambda" and assignment -expressions ":=" must be surrounded by explicit parentheses. Each -expression is evaluated in the context where the formatted string -literal appears, in order from left to right. Replacement expressions -can contain newlines in both single-quoted and triple-quoted f-strings -and they can contain comments. Everything that comes after a "#" -inside a replacement field is a comment (even closing braces and -quotes). In that case, replacement fields must be closed in a -different line. - - >>> f"abc{a # This is a comment }" - ... + 3}" - 'abc5' - -Changed in version 3.7: Prior to Python 3.7, an "await" expression and -comprehensions containing an "async for" clause were illegal in the -expressions in formatted string literals due to a problem with the -implementation. - -Changed in version 3.12: Prior to Python 3.12, comments were not -allowed inside f-string replacement fields. - -When the equal sign "'='" is provided, the output will have the -expression text, the "'='" and the evaluated value. Spaces after the -opening brace "'{'", within the expression and after the "'='" are all -retained in the output. By default, the "'='" causes the "repr()" of -the expression to be provided, unless there is a format specified. -When a format is specified it defaults to the "str()" of the -expression unless a conversion "'!r'" is declared. - -Added in version 3.8: The equal sign "'='". - -If a conversion is specified, the result of evaluating the expression -is converted before formatting. Conversion "'!s'" calls "str()" on -the result, "'!r'" calls "repr()", and "'!a'" calls "ascii()". - -The result is then formatted using the "format()" protocol. The -format specifier is passed to the "__format__()" method of the -expression or conversion result. An empty string is passed when the -format specifier is omitted. The formatted result is then included in -the final value of the whole string. - -Top-level format specifiers may include nested replacement fields. -These nested fields may include their own conversion fields and format -specifiers, but may not include more deeply nested replacement fields. -The format specifier mini-language is the same as that used by the -"str.format()" method. - -Formatted string literals may be concatenated, but replacement fields -cannot be split across literals. - -Some examples of formatted string literals: - - >>> name = "Fred" - >>> f"He said his name is {name!r}." - "He said his name is 'Fred'." - >>> f"He said his name is {repr(name)}." # repr() is equivalent to !r - "He said his name is 'Fred'." - >>> width = 10 - >>> precision = 4 - >>> value = decimal.Decimal("12.34567") - >>> f"result: {value:{width}.{precision}}" # nested fields - 'result: 12.35' - >>> today = datetime(year=2017, month=1, day=27) - >>> f"{today:%B %d, %Y}" # using date format specifier - 'January 27, 2017' - >>> f"{today=:%B %d, %Y}" # using date format specifier and debugging - 'today=January 27, 2017' - >>> number = 1024 - >>> f"{number:#0x}" # using integer format specifier - '0x400' - >>> foo = "bar" - >>> f"{ foo = }" # preserves whitespace - " foo = 'bar'" - >>> line = "The mill's closed" - >>> f"{line = }" - 'line = "The mill\\'s closed"' - >>> f"{line = :20}" - "line = The mill's closed " - >>> f"{line = !r:20}" - 'line = "The mill\\'s closed" ' - -Reusing the outer f-string quoting type inside a replacement field is -permitted: - - >>> a = dict(x=2) - >>> f"abc {a["x"]} def" - 'abc 2 def' - -Changed in version 3.12: Prior to Python 3.12, reuse of the same -quoting type of the outer f-string inside a replacement field was not -possible. - -Backslashes are also allowed in replacement fields and are evaluated -the same way as in any other context: - - >>> a = ["a", "b", "c"] - >>> print(f"List a contains:\\n{"\\n".join(a)}") - List a contains: - a - b - c - -Changed in version 3.12: Prior to Python 3.12, backslashes were not -permitted inside an f-string replacement field. - -Formatted string literals cannot be used as docstrings, even if they -do not include expressions. - - >>> def foo(): - ... f"Not a docstring" - ... - >>> foo.__doc__ is None - True - -See also **PEP 498** for the proposal that added formatted string -literals, and "str.format()", which uses a related format string -mechanism. - - -t-strings -========= - -Added in version 3.14. - -A *template string literal* or *t-string* is a string literal that is -prefixed with ‘"t"’ or ‘"T"’. These strings follow the same syntax and -evaluation rules as formatted string literals, with the following -differences: - -* Rather than evaluating to a "str" object, template string literals - evaluate to a "string.templatelib.Template" object. - -* The "format()" protocol is not used. Instead, the format specifier - and conversions (if any) are passed to a new "Interpolation" object - that is created for each evaluated expression. It is up to code that - processes the resulting "Template" object to decide how to handle - format specifiers and conversions. - -* Format specifiers containing nested replacement fields are evaluated - eagerly, prior to being passed to the "Interpolation" object. For - instance, an interpolation of the form "{amount:.{precision}f}" will - evaluate the inner expression "{precision}" to determine the value - of the "format_spec" attribute. If "precision" were to be "2", the - resulting format specifier would be "'.2f'". - -* When the equals sign "'='" is provided in an interpolation - expression, the text of the expression is appended to the literal - string that precedes the relevant interpolation. This includes the - equals sign and any surrounding whitespace. The "Interpolation" - instance for the expression will be created as normal, except that - "conversion" will be set to ‘"r"’ ("repr()") by default. If an - explicit conversion or format specifier are provided, this will - override the default behaviour. ''', 'subscriptions': r'''Subscriptions ************* @@ -11788,8 +10822,7 @@ class is used in a class pattern with positional arguments, each group types, because that would have ambiguous semantics. It is not possible to mix "except" and "except*" in the same "try". -The "break", "continue", and "return" statements cannot appear in an -"except*" clause. +"break", "continue" and "return" cannot appear in an "except*" clause. "else" clause @@ -12502,7 +11535,7 @@ def foo(): "ImportWarning" when falling back to "__package__" during import resolution. - Deprecated since version 3.13, removed in version 3.15: + Deprecated since version 3.13, will be removed in version 3.15: "__package__" will cease to be set or taken into consideration by the import system or standard library. @@ -12576,10 +11609,11 @@ def foo(): It is **strongly** recommended that you use "module.__spec__.cached" instead of "module.__cached__". - Deprecated since version 3.13, removed in version 3.15: Setting - "__cached__" on a module while failing to set "__spec__.cached" is - deprecated. In Python 3.15, "__cached__" will cease to be set or - taken into consideration by the import system or standard library. + Deprecated since version 3.13, will be removed in version 3.15: + Setting "__cached__" on a module while failing to set + "__spec__.cached" is deprecated. In Python 3.15, "__cached__" will + cease to be set or taken into consideration by the import system or + standard library. Other writable attributes on module objects @@ -12690,20 +11724,11 @@ class method object, it is transformed into an instance method object | | collected during class body execution. See also: | | | "__annotations__ attributes". For best practices | | | on working with "__annotations__", please see | -| | "annotationlib". Use | +| | "annotationlib". Where possible, use | | | "annotationlib.get_annotations()" instead of | -| | accessing this attribute directly. Warning: | -| | Accessing the "__annotations__" attribute directly | -| | on a class object may return annotations for the | -| | wrong class, specifically in certain cases where | -| | the class, its base class, or a metaclass is | -| | defined under "from __future__ import | -| | annotations". See **749** for details.This | -| | attribute does not exist on certain builtin | -| | classes. On user-defined classes without | -| | "__annotations__", it is an empty dictionary. | -| | Changed in version 3.14: Annotations are now | -| | lazily evaluated. See **PEP 649**. | +| | accessing this attribute directly. Changed in | +| | version 3.14: Annotations are now lazily | +| | evaluated. See **PEP 649**. | +----------------------------------------------------+----------------------------------------------------+ | type.__annotate__() | The *annotate function* for this class, or "None" | | | if the class has no annotations. See also: | @@ -13255,15 +12280,8 @@ class dict(iterable, **kwargs) the keyword argument replaces the value from the positional argument. - Providing keyword arguments as in the first example only works for - keys that are valid Python identifiers. Otherwise, any valid keys - can be used. - - Dictionaries compare equal if and only if they have the same "(key, - value)" pairs (regardless of ordering). Order comparisons (‘<’, - ‘<=’, ‘>=’, ‘>’) raise "TypeError". To illustrate dictionary - creation and equality, the following examples all return a - dictionary equal to "{"one": 1, "two": 2, "three": 3}": + To illustrate, the following examples all return a dictionary equal + to "{"one": 1, "two": 2, "three": 3}": >>> a = dict(one=1, two=2, three=3) >>> b = {'one': 1, 'two': 2, 'three': 3} @@ -13278,29 +12296,6 @@ class dict(iterable, **kwargs) keys that are valid Python identifiers. Otherwise, any valid keys can be used. - Dictionaries preserve insertion order. Note that updating a key - does not affect the order. Keys added after deletion are inserted - at the end. - - >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} - >>> d - {'one': 1, 'two': 2, 'three': 3, 'four': 4} - >>> list(d) - ['one', 'two', 'three', 'four'] - >>> list(d.values()) - [1, 2, 3, 4] - >>> d["one"] = 42 - >>> d - {'one': 42, 'two': 2, 'three': 3, 'four': 4} - >>> del d["two"] - >>> d["two"] = None - >>> d - {'one': 42, 'three': 3, 'four': 4, 'two': None} - - Changed in version 3.7: Dictionary order is guaranteed to be - insertion order. This behavior was an implementation detail of - CPython from 3.6. - These are the operations that dictionaries support (and therefore, custom mapping types should support too): @@ -13471,6 +12466,33 @@ class dict(iterable, **kwargs) Added in version 3.9. + Dictionaries compare equal if and only if they have the same "(key, + value)" pairs (regardless of ordering). Order comparisons (‘<’, + ‘<=’, ‘>=’, ‘>’) raise "TypeError". + + Dictionaries preserve insertion order. Note that updating a key + does not affect the order. Keys added after deletion are inserted + at the end. + + >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} + >>> d + {'one': 1, 'two': 2, 'three': 3, 'four': 4} + >>> list(d) + ['one', 'two', 'three', 'four'] + >>> list(d.values()) + [1, 2, 3, 4] + >>> d["one"] = 42 + >>> d + {'one': 42, 'two': 2, 'three': 3, 'four': 4} + >>> del d["two"] + >>> d["two"] = None + >>> d + {'one': 42, 'three': 3, 'four': 4, 'two': None} + + Changed in version 3.7: Dictionary order is guaranteed to be + insertion order. This behavior was an implementation detail of + CPython from 3.6. + Dictionaries and dictionary views are reversible. >>> d = {"one": 1, "two": 2, "three": 3, "four": 4} @@ -13697,7 +12719,7 @@ class dict(iterable, **kwargs) | "s * n" or "n * s" | equivalent to adding *s* to | (2)(7) | | | itself *n* times | | +----------------------------+----------------------------------+------------+ -| "s[i]" | *i*th item of *s*, origin 0 | (3)(9) | +| "s[i]" | *i*th item of *s*, origin 0 | (3) | +----------------------------+----------------------------------+------------+ | "s[i:j]" | slice of *s* from *i* to *j* | (3)(4) | +----------------------------+----------------------------------+------------+ @@ -13821,8 +12843,6 @@ class dict(iterable, **kwargs) returned index being relative to the start of the sequence rather than the start of the slice. -9. An "IndexError" is raised if *i* is outside the sequence range. - Immutable Sequence Types ======================== @@ -13857,15 +12877,11 @@ class dict(iterable, **kwargs) | "s[i] = x" | item *i* of *s* is replaced by | | | | *x* | | +--------------------------------+----------------------------------+-----------------------+ -| "del s[i]" | removes item *i* of *s* | | -+--------------------------------+----------------------------------+-----------------------+ | "s[i:j] = t" | slice of *s* from *i* to *j* is | | | | replaced by the contents of the | | | | iterable *t* | | +--------------------------------+----------------------------------+-----------------------+ -| "del s[i:j]" | removes the elements of "s[i:j]" | | -| | from the list (same as "s[i:j] = | | -| | []") | | +| "del s[i:j]" | same as "s[i:j] = []" | | +--------------------------------+----------------------------------+-----------------------+ | "s[i:j:k] = t" | the elements of "s[i:j:k]" are | (1) | | | replaced by those of *t* | | @@ -14189,15 +13205,11 @@ class range(start, stop[, step]) | "s[i] = x" | item *i* of *s* is replaced by | | | | *x* | | +--------------------------------+----------------------------------+-----------------------+ -| "del s[i]" | removes item *i* of *s* | | -+--------------------------------+----------------------------------+-----------------------+ | "s[i:j] = t" | slice of *s* from *i* to *j* is | | | | replaced by the contents of the | | | | iterable *t* | | +--------------------------------+----------------------------------+-----------------------+ -| "del s[i:j]" | removes the elements of "s[i:j]" | | -| | from the list (same as "s[i:j] = | | -| | []") | | +| "del s[i:j]" | same as "s[i:j] = []" | | +--------------------------------+----------------------------------+-----------------------+ | "s[i:j:k] = t" | the elements of "s[i:j:k]" are | (1) | | | replaced by those of *t* | |