Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +2 -0
- .gitignore +39 -0
- LICENSE.md +167 -0
- README.md +94 -0
- assets/id02548.0pAkJZmlFqc.00001_id04570.0YMGn6BI9rg.00001.gif +3 -0
- assets/website_gif_v2.gif +3 -0
- audio/__init__.py +0 -0
- audio/audio.py +136 -0
- audio/hparams.py +66 -0
- checkpoints/checkpoint.pt +3 -0
- checkpoints/e7.15_model210000_notUsedInPaper.pt +3 -0
- d2l_gen/log.txt +2 -0
- dataset/LRW/lrw_fullpath.py +25 -0
- dataset/filelists/lrw_cross.txt +0 -0
- dataset/filelists/lrw_cross_relative_path.txt +0 -0
- dataset/filelists/lrw_reconstruction.txt +0 -0
- dataset/filelists/lrw_reconstruction_relative_path.txt +0 -0
- dataset/filelists/voxceleb2_test_n_5000_reconstruction_5k.txt +0 -0
- dataset/filelists/voxceleb2_test_n_5000_seed_797_cross_5K.txt +0 -0
- dataset/filelists/voxceleb2_test_n_500_reconstruction.txt +500 -0
- dataset/filelists/voxceleb2_test_n_500_seed_797_cross.txt +500 -0
- face_detection/README.md +1 -0
- face_detection/__init__.py +7 -0
- face_detection/api.py +98 -0
- face_detection/detection/__init__.py +1 -0
- face_detection/detection/core.py +130 -0
- face_detection/detection/sfd/__init__.py +1 -0
- face_detection/detection/sfd/bbox.py +129 -0
- face_detection/detection/sfd/detect.py +112 -0
- face_detection/detection/sfd/net_s3fd.py +129 -0
- face_detection/detection/sfd/sfd_detector.py +59 -0
- face_detection/models.py +261 -0
- face_detection/utils.py +313 -0
- generate.py +399 -0
- generate_dist.py +428 -0
- guided-diffusion/LICENSE +21 -0
- guided-diffusion/guided_diffusion.egg-info/PKG-INFO +7 -0
- guided-diffusion/guided_diffusion.egg-info/SOURCES.txt +7 -0
- guided-diffusion/guided_diffusion.egg-info/dependency_links.txt +1 -0
- guided-diffusion/guided_diffusion.egg-info/requires.txt +3 -0
- guided-diffusion/guided_diffusion.egg-info/top_level.txt +1 -0
- guided-diffusion/guided_diffusion/__init__.py +3 -0
- guided-diffusion/guided_diffusion/dist_util.py +94 -0
- guided-diffusion/guided_diffusion/fp16_util.py +237 -0
- guided-diffusion/guided_diffusion/gaussian_diffusion.py +843 -0
- guided-diffusion/guided_diffusion/image_datasets.py +167 -0
- guided-diffusion/guided_diffusion/logger.py +491 -0
- guided-diffusion/guided_diffusion/losses.py +77 -0
- guided-diffusion/guided_diffusion/lpips.py +20 -0
- guided-diffusion/guided_diffusion/nn.py +170 -0
.gitattributes
CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
assets/id02548.0pAkJZmlFqc.00001_id04570.0YMGn6BI9rg.00001.gif filter=lfs diff=lfs merge=lfs -text
|
37 |
+
assets/website_gif_v2.gif filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#experiments folder
|
2 |
+
*.log
|
3 |
+
*.error
|
4 |
+
*slurm_logs*
|
5 |
+
*out.txt*
|
6 |
+
|
7 |
+
#preprocessing folder
|
8 |
+
preprocessing/dummy*
|
9 |
+
|
10 |
+
|
11 |
+
#scratch folder
|
12 |
+
scratch/dummy*
|
13 |
+
scratch/ffmpeg_out/
|
14 |
+
scratch/open_cv/
|
15 |
+
|
16 |
+
|
17 |
+
Wav2Lip/.git
|
18 |
+
Wav2Lip/.gitignore
|
19 |
+
# these are from Wav2Lip/.gitignore
|
20 |
+
*.pkl
|
21 |
+
*.jpg
|
22 |
+
*.mp4
|
23 |
+
*.pth
|
24 |
+
*.pyc
|
25 |
+
__pycache__
|
26 |
+
*.h5
|
27 |
+
*.avi
|
28 |
+
*.wav
|
29 |
+
Wav2Lip/filelists/*.txt
|
30 |
+
Wav2Lip/filelists/*.txt.bak
|
31 |
+
evaluation/test_filelists/lr*.txt
|
32 |
+
*.pyc
|
33 |
+
*.mkv
|
34 |
+
*.webm
|
35 |
+
*.mp3
|
36 |
+
|
37 |
+
#evaluation folder
|
38 |
+
evaluation/all_comparison/**/*.csv
|
39 |
+
evaluation/all_comparison/**/*.txt
|
LICENSE.md
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
|
2 |
+
<a rel="license" href="https://creativecommons.org/licenses/by-nc/4.0/"><img alt="Creative Commons License" style="border-width:0" src="https://licensebuttons.net/l/by-nc/4.0/88x31.png" /></a><br>Copyright © soumik-kanad 2023
|
3 |
+
|
4 |
+
License: Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)<br>
|
5 |
+
Human Readable License: https://creativecommons.org/licenses/by-nc/4.0/<br>
|
6 |
+
Complete Legal Terms: https://creativecommons.org/licenses/by-nc/4.0/legalcode<br>
|
7 |
+
Formatting: https://github.com/Gibberlings3/GitHub-Templates/blob/master/License-Templates/CC-BY-NC-4.0/LICENSE-CC-BY-NC-4.0.md<br>
|
8 |
+
|
9 |
+
# Attribution-NonCommercial 4.0 International
|
10 |
+
|
11 |
+
> *Creative Commons Corporation (“Creative Commons”) is not a law firm and does not provide legal services or legal advice. Distribution of Creative Commons public licenses does not create a lawyer-client or other relationship. Creative Commons makes its licenses and related information available on an “as-is” basis. Creative Commons gives no warranties regarding its licenses, any material licensed under their terms and conditions, or any related information. Creative Commons disclaims all liability for damages resulting from their use to the fullest extent possible.*
|
12 |
+
>
|
13 |
+
> ### Using Creative Commons Public Licenses
|
14 |
+
>
|
15 |
+
> Creative Commons public licenses provide a standard set of terms and conditions that creators and other rights holders may use to share original works of authorship and other material subject to copyright and certain other rights specified in the public license below. The following considerations are for informational purposes only, are not exhaustive, and do not form part of our licenses.
|
16 |
+
>
|
17 |
+
> * __Considerations for licensors:__ Our public licenses are intended for use by those authorized to give the public permission to use material in ways otherwise restricted by copyright and certain other rights. Our licenses are irrevocable. Licensors should read and understand the terms and conditions of the license they choose before applying it. Licensors should also secure all rights necessary before applying our licenses so that the public can reuse the material as expected. Licensors should clearly mark any material not subject to the license. This includes other CC-licensed material, or material used under an exception or limitation to copyright. [More considerations for licensors](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensors).
|
18 |
+
>
|
19 |
+
> * __Considerations for the public:__ By using one of our public licenses, a licensor grants the public permission to use the licensed material under specified terms and conditions. If the licensor’s permission is not necessary for any reason–for example, because of any applicable exception or limitation to copyright–then that use is not regulated by the license. Our licenses grant only permissions under copyright and certain other rights that a licensor has authority to grant. Use of the licensed material may still be restricted for other reasons, including because others have copyright or other rights in the material. A licensor may make special requests, such as asking that all changes be marked or described. Although not required by our licenses, you are encouraged to respect those requests where reasonable. [More considerations for the public](http://wiki.creativecommons.org/Considerations_for_licensors_and_licensees#Considerations_for_licensees).
|
20 |
+
|
21 |
+
## Creative Commons Attribution-NonCommercial 4.0 International Public License
|
22 |
+
|
23 |
+
By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-NonCommercial 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions.
|
24 |
+
|
25 |
+
### Section 1 – Definitions.
|
26 |
+
|
27 |
+
a. __Adapted Material__ means material subject to Copyright and Similar Rights that is derived from or based upon the Licensed Material and in which the Licensed Material is translated, altered, arranged, transformed, or otherwise modified in a manner requiring permission under the Copyright and Similar Rights held by the Licensor. For purposes of this Public License, where the Licensed Material is a musical work, performance, or sound recording, Adapted Material is always produced where the Licensed Material is synched in timed relation with a moving image.
|
28 |
+
|
29 |
+
b. __Adapter's License__ means the license You apply to Your Copyright and Similar Rights in Your contributions to Adapted Material in accordance with the terms and conditions of this Public License.
|
30 |
+
|
31 |
+
c. __Copyright and Similar Rights__ means copyright and/or similar rights closely related to copyright including, without limitation, performance, broadcast, sound recording, and Sui Generis Database Rights, without regard to how the rights are labeled or categorized. For purposes of this Public License, the rights specified in Section 2(b)(1)-(2) are not Copyright and Similar Rights.
|
32 |
+
|
33 |
+
d. __Effective Technological Measures__ means those measures that, in the absence of proper authority, may not be circumvented under laws fulfilling obligations under Article 11 of the WIPO Copyright Treaty adopted on December 20, 1996, and/or similar international agreements.
|
34 |
+
|
35 |
+
e. __Exceptions and Limitations__ means fair use, fair dealing, and/or any other exception or limitation to Copyright and Similar Rights that applies to Your use of the Licensed Material.
|
36 |
+
|
37 |
+
f. __Licensed Material__ means the artistic or literary work, database, or other material to which the Licensor applied this Public License.
|
38 |
+
|
39 |
+
g. __Licensed Rights__ means the rights granted to You subject to the terms and conditions of this Public License, which are limited to all Copyright and Similar Rights that apply to Your use of the Licensed Material and that the Licensor has authority to license.
|
40 |
+
|
41 |
+
h. __Licensor__ means the individual(s) or entity(ies) granting rights under this Public License.
|
42 |
+
|
43 |
+
i. __NonCommercial__ means not primarily intended for or directed towards commercial advantage or monetary compensation. For purposes of this Public License, the exchange of the Licensed Material for other material subject to Copyright and Similar Rights by digital file-sharing or similar means is NonCommercial provided there is no payment of monetary compensation in connection with the exchange.
|
44 |
+
|
45 |
+
j. __Share__ means to provide material to the public by any means or process that requires permission under the Licensed Rights, such as reproduction, public display, public performance, distribution, dissemination, communication, or importation, and to make material available to the public including in ways that members of the public may access the material from a place and at a time individually chosen by them.
|
46 |
+
|
47 |
+
k. __Sui Generis Database Rights__ means rights other than copyright resulting from Directive 96/9/EC of the European Parliament and of the Council of 11 March 1996 on the legal protection of databases, as amended and/or succeeded, as well as other essentially equivalent rights anywhere in the world.
|
48 |
+
|
49 |
+
l. __You__ means the individual or entity exercising the Licensed Rights under this Public License. Your has a corresponding meaning.
|
50 |
+
|
51 |
+
### Section 2 – Scope.
|
52 |
+
|
53 |
+
a. ___License grant.___
|
54 |
+
|
55 |
+
1. Subject to the terms and conditions of this Public License, the Licensor hereby grants You a worldwide, royalty-free, non-sublicensable, non-exclusive, irrevocable license to exercise the Licensed Rights in the Licensed Material to:
|
56 |
+
|
57 |
+
A. reproduce and Share the Licensed Material, in whole or in part, for NonCommercial purposes only; and
|
58 |
+
|
59 |
+
B. produce, reproduce, and Share Adapted Material for NonCommercial purposes only.
|
60 |
+
|
61 |
+
2. __Exceptions and Limitations.__ For the avoidance of doubt, where Exceptions and Limitations apply to Your use, this Public License does not apply, and You do not need to comply with its terms and conditions.
|
62 |
+
|
63 |
+
3. __Term.__ The term of this Public License is specified in Section 6(a).
|
64 |
+
|
65 |
+
4. __Media and formats; technical modifications allowed.__ The Licensor authorizes You to exercise the Licensed Rights in all media and formats whether now known or hereafter created, and to make technical modifications necessary to do so. The Licensor waives and/or agrees not to assert any right or authority to forbid You from making technical modifications necessary to exercise the Licensed Rights, including technical modifications necessary to circumvent Effective Technological Measures. For purposes of this Public License, simply making modifications authorized by this Section 2(a)(4) never produces Adapted Material.
|
66 |
+
|
67 |
+
5. __Downstream recipients.__
|
68 |
+
|
69 |
+
A. __Offer from the Licensor – Licensed Material.__ Every recipient of the Licensed Material automatically receives an offer from the Licensor to exercise the Licensed Rights under the terms and conditions of this Public License.
|
70 |
+
|
71 |
+
B. __No downstream restrictions.__ You may not offer or impose any additional or different terms or conditions on, or apply any Effective Technological Measures to, the Licensed Material if doing so restricts exercise of the Licensed Rights by any recipient of the Licensed Material.
|
72 |
+
|
73 |
+
6. __No endorsement.__ Nothing in this Public License constitutes or may be construed as permission to assert or imply that You are, or that Your use of the Licensed Material is, connected with, or sponsored, endorsed, or granted official status by, the Licensor or others designated to receive attribution as provided in Section 3(a)(1)(A)(i).
|
74 |
+
|
75 |
+
b. ___Other rights.___
|
76 |
+
|
77 |
+
1. Moral rights, such as the right of integrity, are not licensed under this Public License, nor are publicity, privacy, and/or other similar personality rights; however, to the extent possible, the Licensor waives and/or agrees not to assert any such rights held by the Licensor to the limited extent necessary to allow You to exercise the Licensed Rights, but not otherwise.
|
78 |
+
|
79 |
+
2. Patent and trademark rights are not licensed under this Public License.
|
80 |
+
|
81 |
+
3. To the extent possible, the Licensor waives any right to collect royalties from You for the exercise of the Licensed Rights, whether directly or through a collecting society under any voluntary or waivable statutory or compulsory licensing scheme. In all other cases the Licensor expressly reserves any right to collect such royalties, including when the Licensed Material is used other than for NonCommercial purposes.
|
82 |
+
|
83 |
+
### Section 3 – License Conditions.
|
84 |
+
|
85 |
+
Your exercise of the Licensed Rights is expressly made subject to the following conditions.
|
86 |
+
|
87 |
+
a. ___Attribution.___
|
88 |
+
|
89 |
+
1. If You Share the Licensed Material (including in modified form), You must:
|
90 |
+
|
91 |
+
A. retain the following if it is supplied by the Licensor with the Licensed Material:
|
92 |
+
|
93 |
+
i. identification of the creator(s) of the Licensed Material and any others designated to receive attribution, in any reasonable manner requested by the Licensor (including by pseudonym if designated);
|
94 |
+
|
95 |
+
ii. a copyright notice;
|
96 |
+
|
97 |
+
iii. a notice that refers to this Public License;
|
98 |
+
|
99 |
+
iv. a notice that refers to the disclaimer of warranties;
|
100 |
+
|
101 |
+
v. a URI or hyperlink to the Licensed Material to the extent reasonably practicable;
|
102 |
+
|
103 |
+
B. indicate if You modified the Licensed Material and retain an indication of any previous modifications; and
|
104 |
+
|
105 |
+
C. indicate the Licensed Material is licensed under this Public License, and include the text of, or the URI or hyperlink to, this Public License.
|
106 |
+
|
107 |
+
2. You may satisfy the conditions in Section 3(a)(1) in any reasonable manner based on the medium, means, and context in which You Share the Licensed Material. For example, it may be reasonable to satisfy the conditions by providing a URI or hyperlink to a resource that includes the required information.
|
108 |
+
|
109 |
+
3. If requested by the Licensor, You must remove any of the information required by Section 3(a)(1)(A) to the extent reasonably practicable.
|
110 |
+
|
111 |
+
4. If You Share Adapted Material You produce, the Adapter's License You apply must not prevent recipients of the Adapted Material from complying with this Public License.
|
112 |
+
|
113 |
+
### Section 4 – Sui Generis Database Rights.
|
114 |
+
|
115 |
+
Where the Licensed Rights include Sui Generis Database Rights that apply to Your use of the Licensed Material:
|
116 |
+
|
117 |
+
a. for the avoidance of doubt, Section 2(a)(1) grants You the right to extract, reuse, reproduce, and Share all or a substantial portion of the contents of the database for NonCommercial purposes only;
|
118 |
+
|
119 |
+
b. if You include all or a substantial portion of the database contents in a database in which You have Sui Generis Database Rights, then the database in which You have Sui Generis Database Rights (but not its individual contents) is Adapted Material; and
|
120 |
+
|
121 |
+
c. You must comply with the conditions in Section 3(a) if You Share all or a substantial portion of the contents of the database.
|
122 |
+
|
123 |
+
For the avoidance of doubt, this Section 4 supplements and does not replace Your obligations under this Public License where the Licensed Rights include other Copyright and Similar Rights.
|
124 |
+
|
125 |
+
### Section 5 – Disclaimer of Warranties and Limitation of Liability.
|
126 |
+
|
127 |
+
a. __Unless otherwise separately undertaken by the Licensor, to the extent possible, the Licensor offers the Licensed Material as-is and as-available, and makes no representations or warranties of any kind concerning the Licensed Material, whether express, implied, statutory, or other. This includes, without limitation, warranties of title, merchantability, fitness for a particular purpose, non-infringement, absence of latent or other defects, accuracy, or the presence or absence of errors, whether or not known or discoverable. Where disclaimers of warranties are not allowed in full or in part, this disclaimer may not apply to You.__
|
128 |
+
|
129 |
+
b. __To the extent possible, in no event will the Licensor be liable to You on any legal theory (including, without limitation, negligence) or otherwise for any direct, special, indirect, incidental, consequential, punitive, exemplary, or other losses, costs, expenses, or damages arising out of this Public License or use of the Licensed Material, even if the Licensor has been advised of the possibility of such losses, costs, expenses, or damages. Where a limitation of liability is not allowed in full or in part, this limitation may not apply to You.__
|
130 |
+
|
131 |
+
c. The disclaimer of warranties and limitation of liability provided above shall be interpreted in a manner that, to the extent possible, most closely approximates an absolute disclaimer and waiver of all liability.
|
132 |
+
|
133 |
+
### Section 6 – Term and Termination.
|
134 |
+
|
135 |
+
a. This Public License applies for the term of the Copyright and Similar Rights licensed here. However, if You fail to comply with this Public License, then Your rights under this Public License terminate automatically.
|
136 |
+
|
137 |
+
b. Where Your right to use the Licensed Material has terminated under Section 6(a), it reinstates:
|
138 |
+
|
139 |
+
1. automatically as of the date the violation is cured, provided it is cured within 30 days of Your discovery of the violation; or
|
140 |
+
|
141 |
+
2. upon express reinstatement by the Licensor.
|
142 |
+
|
143 |
+
For the avoidance of doubt, this Section 6(b) does not affect any right the Licensor may have to seek remedies for Your violations of this Public License.
|
144 |
+
|
145 |
+
c. For the avoidance of doubt, the Licensor may also offer the Licensed Material under separate terms or conditions or stop distributing the Licensed Material at any time; however, doing so will not terminate this Public License.
|
146 |
+
|
147 |
+
d. Sections 1, 5, 6, 7, and 8 survive termination of this Public License.
|
148 |
+
|
149 |
+
### Section 7 – Other Terms and Conditions.
|
150 |
+
|
151 |
+
a. The Licensor shall not be bound by any additional or different terms or conditions communicated by You unless expressly agreed.
|
152 |
+
|
153 |
+
b. Any arrangements, understandings, or agreements regarding the Licensed Material not stated herein are separate from and independent of the terms and conditions of this Public License.
|
154 |
+
|
155 |
+
### Section 8 – Interpretation.
|
156 |
+
|
157 |
+
a. For the avoidance of doubt, this Public License does not, and shall not be interpreted to, reduce, limit, restrict, or impose conditions on any use of the Licensed Material that could lawfully be made without permission under this Public License.
|
158 |
+
|
159 |
+
b. To the extent possible, if any provision of this Public License is deemed unenforceable, it shall be automatically reformed to the minimum extent necessary to make it enforceable. If the provision cannot be reformed, it shall be severed from this Public License without affecting the enforceability of the remaining terms and conditions.
|
160 |
+
|
161 |
+
c. No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor.
|
162 |
+
|
163 |
+
d. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority.
|
164 |
+
|
165 |
+
> Creative Commons is not a party to its public licenses. Notwithstanding, Creative Commons may elect to apply one of its public licenses to material it publishes and in those instances will be considered the “Licensor.” Except for the limited purpose of indicating that material is shared under a Creative Commons public license or as otherwise permitted by the Creative Commons policies published at [creativecommons.org/policies](http://creativecommons.org/policies), Creative Commons does not authorize the use of the trademark “Creative Commons” or any other trademark or logo of Creative Commons without its prior written consent including, without limitation, in connection with any unauthorized modifications to any of its public licenses or any other arrangements, understandings, or agreements concerning use of licensed material. For the avoidance of doubt, this paragraph does not form part of the public licenses.
|
166 |
+
>
|
167 |
+
> Creative Commons may be contacted at creativecommons.org
|
README.md
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Diff2Lip: Audio Conditioned Diffusion Models for Lip-Synchronization
|
2 |
+
This is the official repository for [Diff2Lip: Audio Conditioned Diffusion Models for Lip-Synchronization](https://openaccess.thecvf.com/content/WACV2024/html/Mukhopadhyay_Diff2Lip_Audio_Conditioned_Diffusion_Models_for_Lip-Synchronization_WACV_2024_paper.html) accepted at WACV 2024. It includes the script to run lip-synchronization at inference time given a filelist of audio-video pairs.
|
3 |
+
|
4 |
+
[Abstract](https://openaccess.thecvf.com/content/WACV2024/html/Mukhopadhyay_Diff2Lip_Audio_Conditioned_Diffusion_Models_for_Lip-Synchronization_WACV_2024_paper.html)|[ArXiv](https://arxiv.org/abs/2308.09716)|[PDF](https://arxiv.org/pdf/2308.09716.pdf)|[Website](https://soumik-kanad.github.io/diff2lip/)|
|
5 |
+
|:-:|:-:|:-:|:-:|
|
6 |
+
|
7 |
+
### tl;dr
|
8 |
+
**Diff2Lip**: arbitrary speech + face videos → high quality lip-sync.
|
9 |
+
|
10 |
+
**Applications**: movies, education, virtual avatars, (eventually) video conferencing.
|
11 |
+
|
12 |
+

|
13 |
+
<!-- https://soumik-kanad.github.io/diff2lip/static/website_videos/website_gif_v2.mp4 -->
|
14 |
+
<!-- <video id="teaser" autoplay muted loop playsinline height="100%" controls> <source src= "https://soumik-kanad.github.io/diff2lip/static/website_videos/website_gif_v2.mp4" type="video/mp4"></video> -->
|
15 |
+
|
16 |
+
### Results
|
17 |
+
|
18 |
+
(a) Video Source (b) Wav2Lip (c) PC-AVS (d) Diff2Lip (ours)
|
19 |
+
|
20 |
+

|
21 |
+
<!-- https://soumik-kanad.github.io/diff2lip/static/website_videos/id02548.0pAkJZmlFqc.00001_id04570.0YMGn6BI9rg.00001.mp4 -->
|
22 |
+
|
23 |
+
<!-- <video id="teaser" autoplay muted loop playsinline height="100%">
|
24 |
+
<source src="https://soumik-kanad.github.io/diff2lip/static/website_videos/id02548.0pAkJZmlFqc.00001_id04570.0YMGn6BI9rg.00001.mp4" type="video/mp4">
|
25 |
+
</video> -->
|
26 |
+
|
27 |
+
Please find more results on our website.
|
28 |
+
|
29 |
+
### Overview of our approach
|
30 |
+
- **Top** : Diff2Lip uses an audio-conditioned diffusion model to generate lip-synchronized videos.
|
31 |
+
- **Bottom**: On zooming in to the mouth region it can be seen that our method generates high-quality video frames without suffering from identity loss.
|
32 |
+
|
33 |
+

|
35 |
+
|
36 |
+
|
37 |
+
|
38 |
+
|
39 |
+
|
40 |
+
|
41 |
+
<br>
|
42 |
+
|
43 |
+
## Setting up the environment
|
44 |
+
```
|
45 |
+
conda create -n diff2lip python=3.9
|
46 |
+
conda activate diff2lip
|
47 |
+
conda install -c conda-forge ffmpeg=5.0.1
|
48 |
+
pip install -r requirements.txt
|
49 |
+
```
|
50 |
+
|
51 |
+
## Inference
|
52 |
+
For inference on [VoxCeleb2](https://www.robots.ox.ac.uk/~vgg/data/voxceleb/vox2.html) dataset we use `scripts/inference.sh` script which internally calls the python scripts `generate.py` or `generate_dist.py`. Set the following variables to run inference:
|
53 |
+
|
54 |
+
- `real_video_root`: set this to the base path of your directory containing VoxCeleb2 dataset.
|
55 |
+
- `model_path`: first download the Diff2Lip checkpoint from [here](https://drive.google.com/drive/folders/1UMiHAhVf5M_CKzjVQFC5jkz-IXAAnFo5?usp=drive_link), place it in `checkpoints` directory, and set this variable to the checkpoint's path.
|
56 |
+
- `sample_path`: set this to where you want to generate your output.
|
57 |
+
- `sample_mode`: set this to "cross" to drive a video source with a different/same audio source or set it to "reconstruction" to drive the first frame of the video with the same/differnt audio source.
|
58 |
+
- `NUM_GPUS`; controls the number of gpus to be used. If set to greater than 1, it runs the disributed generation.
|
59 |
+
|
60 |
+
After setting these variables in the script, inference can be run using the following command:
|
61 |
+
```
|
62 |
+
scripts/inference.sh
|
63 |
+
```
|
64 |
+
|
65 |
+
### Inference on other data
|
66 |
+
For example if you want to run on LRW dataset, apart from the above arguments you also need to set `--is_voxceleb2=False`, change variable `filelist_recon` to `dataset/filelists/lrw_reconstruction_relative_path.txt` and variable `filelist_cross` to `dataset/filelists/lrw_cross_relative_path.txt`. Each line of these filelists contain the relative path of the audio source and the video source separated by a space, relative to the `real_video_root` variable.
|
67 |
+
|
68 |
+
For inference on a single video set `--is_voxceleb2=False` and then either (1) filelist can have only one line or (2) set `--generate_from_filelist=0` and specify `--video_path`,`--audio_path`,`--out_path` instead of `--test_video_dir`,`--sample_path`,`--filelist` flags in the `scripts/inference.sh` script.
|
69 |
+
|
70 |
+
|
71 |
+
|
72 |
+
|
73 |
+
|
74 |
+
## License
|
75 |
+

|
76 |
+
|
77 |
+
Except where otherwise specified, the text/code on <a href="https://github.com/soumik-kanad/diff2lip">Diff2Lip</a> repository by Soumik Mukhopadhyay ([soumik-kanad](https://github.com/soumik-kanad/)) is licensed under the <a href="https://creativecommons.org/licenses/by-nc/4.0/">Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0)</a>. It can be shared and adapted provided that they credit us and don't use our work for commercial purposes.
|
78 |
+
|
79 |
+
|
80 |
+
|
81 |
+
## Citation
|
82 |
+
|
83 |
+
Please cite our paper if you find our work helpful and use our code.
|
84 |
+
|
85 |
+
```
|
86 |
+
@InProceedings{Mukhopadhyay_2024_WACV,
|
87 |
+
author = {Mukhopadhyay, Soumik and Suri, Saksham and Gadde, Ravi Teja and Shrivastava, Abhinav},
|
88 |
+
title = {Diff2Lip: Audio Conditioned Diffusion Models for Lip-Synchronization},
|
89 |
+
booktitle = {Proceedings of the IEEE/CVF Winter Conference on Applications of Computer Vision (WACV)},
|
90 |
+
month = {January},
|
91 |
+
year = {2024},
|
92 |
+
pages = {5292-5302}
|
93 |
+
}
|
94 |
+
```
|
assets/id02548.0pAkJZmlFqc.00001_id04570.0YMGn6BI9rg.00001.gif
ADDED
![]() |
Git LFS Details
|
assets/website_gif_v2.gif
ADDED
![]() |
Git LFS Details
|
audio/__init__.py
ADDED
File without changes
|
audio/audio.py
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import librosa
|
2 |
+
import librosa.filters
|
3 |
+
import numpy as np
|
4 |
+
# import tensorflow as tf
|
5 |
+
from scipy import signal
|
6 |
+
from scipy.io import wavfile
|
7 |
+
from .hparams import hparams as hp
|
8 |
+
|
9 |
+
def load_wav(path, sr):
|
10 |
+
return librosa.core.load(path, sr=sr)[0]
|
11 |
+
|
12 |
+
def save_wav(wav, path, sr):
|
13 |
+
wav *= 32767 / max(0.01, np.max(np.abs(wav)))
|
14 |
+
#proposed by @dsmiller
|
15 |
+
wavfile.write(path, sr, wav.astype(np.int16))
|
16 |
+
|
17 |
+
def save_wavenet_wav(wav, path, sr):
|
18 |
+
librosa.output.write_wav(path, wav, sr=sr)
|
19 |
+
|
20 |
+
def preemphasis(wav, k, preemphasize=True):
|
21 |
+
if preemphasize:
|
22 |
+
return signal.lfilter([1, -k], [1], wav)
|
23 |
+
return wav
|
24 |
+
|
25 |
+
def inv_preemphasis(wav, k, inv_preemphasize=True):
|
26 |
+
if inv_preemphasize:
|
27 |
+
return signal.lfilter([1], [1, -k], wav)
|
28 |
+
return wav
|
29 |
+
|
30 |
+
def get_hop_size():
|
31 |
+
hop_size = hp.hop_size
|
32 |
+
if hop_size is None:
|
33 |
+
assert hp.frame_shift_ms is not None
|
34 |
+
hop_size = int(hp.frame_shift_ms / 1000 * hp.sample_rate)
|
35 |
+
return hop_size
|
36 |
+
|
37 |
+
def linearspectrogram(wav):
|
38 |
+
D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
|
39 |
+
S = _amp_to_db(np.abs(D)) - hp.ref_level_db
|
40 |
+
|
41 |
+
if hp.signal_normalization:
|
42 |
+
return _normalize(S)
|
43 |
+
return S
|
44 |
+
|
45 |
+
def melspectrogram(wav):
|
46 |
+
D = _stft(preemphasis(wav, hp.preemphasis, hp.preemphasize))
|
47 |
+
S = _amp_to_db(_linear_to_mel(np.abs(D))) - hp.ref_level_db
|
48 |
+
|
49 |
+
if hp.signal_normalization:
|
50 |
+
return _normalize(S)
|
51 |
+
return S
|
52 |
+
|
53 |
+
def _lws_processor():
|
54 |
+
import lws
|
55 |
+
return lws.lws(hp.n_fft, get_hop_size(), fftsize=hp.win_size, mode="speech")
|
56 |
+
|
57 |
+
def _stft(y):
|
58 |
+
if hp.use_lws:
|
59 |
+
return _lws_processor(hp).stft(y).T
|
60 |
+
else:
|
61 |
+
return librosa.stft(y=y, n_fft=hp.n_fft, hop_length=get_hop_size(), win_length=hp.win_size)
|
62 |
+
|
63 |
+
##########################################################
|
64 |
+
#Those are only correct when using lws!!! (This was messing with Wavenet quality for a long time!)
|
65 |
+
def num_frames(length, fsize, fshift):
|
66 |
+
"""Compute number of time frames of spectrogram
|
67 |
+
"""
|
68 |
+
pad = (fsize - fshift)
|
69 |
+
if length % fshift == 0:
|
70 |
+
M = (length + pad * 2 - fsize) // fshift + 1
|
71 |
+
else:
|
72 |
+
M = (length + pad * 2 - fsize) // fshift + 2
|
73 |
+
return M
|
74 |
+
|
75 |
+
|
76 |
+
def pad_lr(x, fsize, fshift):
|
77 |
+
"""Compute left and right padding
|
78 |
+
"""
|
79 |
+
M = num_frames(len(x), fsize, fshift)
|
80 |
+
pad = (fsize - fshift)
|
81 |
+
T = len(x) + 2 * pad
|
82 |
+
r = (M - 1) * fshift + fsize - T
|
83 |
+
return pad, pad + r
|
84 |
+
##########################################################
|
85 |
+
#Librosa correct padding
|
86 |
+
def librosa_pad_lr(x, fsize, fshift):
|
87 |
+
return 0, (x.shape[0] // fshift + 1) * fshift - x.shape[0]
|
88 |
+
|
89 |
+
# Conversions
|
90 |
+
_mel_basis = None
|
91 |
+
|
92 |
+
def _linear_to_mel(spectogram):
|
93 |
+
global _mel_basis
|
94 |
+
if _mel_basis is None:
|
95 |
+
_mel_basis = _build_mel_basis()
|
96 |
+
return np.dot(_mel_basis, spectogram)
|
97 |
+
|
98 |
+
def _build_mel_basis():
|
99 |
+
assert hp.fmax <= hp.sample_rate // 2
|
100 |
+
return librosa.filters.mel(sr=hp.sample_rate, n_fft=hp.n_fft, n_mels=hp.num_mels,
|
101 |
+
fmin=hp.fmin, fmax=hp.fmax)
|
102 |
+
|
103 |
+
def _amp_to_db(x):
|
104 |
+
min_level = np.exp(hp.min_level_db / 20 * np.log(10))
|
105 |
+
return 20 * np.log10(np.maximum(min_level, x))
|
106 |
+
|
107 |
+
def _db_to_amp(x):
|
108 |
+
return np.power(10.0, (x) * 0.05)
|
109 |
+
|
110 |
+
def _normalize(S):
|
111 |
+
if hp.allow_clipping_in_normalization:
|
112 |
+
if hp.symmetric_mels:
|
113 |
+
return np.clip((2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value,
|
114 |
+
-hp.max_abs_value, hp.max_abs_value)
|
115 |
+
else:
|
116 |
+
return np.clip(hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db)), 0, hp.max_abs_value)
|
117 |
+
|
118 |
+
assert S.max() <= 0 and S.min() - hp.min_level_db >= 0
|
119 |
+
if hp.symmetric_mels:
|
120 |
+
return (2 * hp.max_abs_value) * ((S - hp.min_level_db) / (-hp.min_level_db)) - hp.max_abs_value
|
121 |
+
else:
|
122 |
+
return hp.max_abs_value * ((S - hp.min_level_db) / (-hp.min_level_db))
|
123 |
+
|
124 |
+
def _denormalize(D):
|
125 |
+
if hp.allow_clipping_in_normalization:
|
126 |
+
if hp.symmetric_mels:
|
127 |
+
return (((np.clip(D, -hp.max_abs_value,
|
128 |
+
hp.max_abs_value) + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value))
|
129 |
+
+ hp.min_level_db)
|
130 |
+
else:
|
131 |
+
return ((np.clip(D, 0, hp.max_abs_value) * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
|
132 |
+
|
133 |
+
if hp.symmetric_mels:
|
134 |
+
return (((D + hp.max_abs_value) * -hp.min_level_db / (2 * hp.max_abs_value)) + hp.min_level_db)
|
135 |
+
else:
|
136 |
+
return ((D * -hp.min_level_db / hp.max_abs_value) + hp.min_level_db)
|
audio/hparams.py
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from glob import glob
|
2 |
+
import os
|
3 |
+
|
4 |
+
|
5 |
+
class HParams:
|
6 |
+
def __init__(self, **kwargs):
|
7 |
+
self.data = {}
|
8 |
+
|
9 |
+
for key, value in kwargs.items():
|
10 |
+
self.data[key] = value
|
11 |
+
|
12 |
+
def __getattr__(self, key):
|
13 |
+
if key not in self.data:
|
14 |
+
raise AttributeError("'HParams' object has no attribute %s" % key)
|
15 |
+
return self.data[key]
|
16 |
+
|
17 |
+
def set_hparam(self, key, value):
|
18 |
+
self.data[key] = value
|
19 |
+
|
20 |
+
|
21 |
+
# Default hyperparameters
|
22 |
+
hparams = HParams(
|
23 |
+
num_mels=80, # Number of mel-spectrogram channels and local conditioning dimensionality
|
24 |
+
# network
|
25 |
+
rescale=True, # Whether to rescale audio prior to preprocessing
|
26 |
+
rescaling_max=0.9, # Rescaling value
|
27 |
+
|
28 |
+
# Use LWS (https://github.com/Jonathan-LeRoux/lws) for STFT and phase reconstruction
|
29 |
+
# It"s preferred to set True to use with https://github.com/r9y9/wavenet_vocoder
|
30 |
+
# Does not work if n_ffit is not multiple of hop_size!!
|
31 |
+
use_lws=False,
|
32 |
+
|
33 |
+
n_fft=800, # Extra window size is filled with 0 paddings to match this parameter
|
34 |
+
hop_size=200, # For 16000Hz, 200 = 12.5 ms (0.0125 * sample_rate)
|
35 |
+
win_size=800, # For 16000Hz, 800 = 50 ms (If None, win_size = n_fft) (0.05 * sample_rate)
|
36 |
+
sample_rate=16000, # 16000Hz (corresponding to librispeech) (sox --i <filename>)
|
37 |
+
|
38 |
+
frame_shift_ms=None, # Can replace hop_size parameter. (Recommended: 12.5)
|
39 |
+
|
40 |
+
# Mel and Linear spectrograms normalization/scaling and clipping
|
41 |
+
signal_normalization=True,
|
42 |
+
# Whether to normalize mel spectrograms to some predefined range (following below parameters)
|
43 |
+
allow_clipping_in_normalization=True, # Only relevant if mel_normalization = True
|
44 |
+
symmetric_mels=True,
|
45 |
+
# Whether to scale the data to be symmetric around 0. (Also multiplies the output range by 2,
|
46 |
+
# faster and cleaner convergence)
|
47 |
+
max_abs_value=4.,
|
48 |
+
# max absolute value of data. If symmetric, data will be [-max, max] else [0, max] (Must not
|
49 |
+
# be too big to avoid gradient explosion,
|
50 |
+
# not too small for fast convergence)
|
51 |
+
# Contribution by @begeekmyfriend
|
52 |
+
# Spectrogram Pre-Emphasis (Lfilter: Reduce spectrogram noise and helps model certitude
|
53 |
+
# levels. Also allows for better G&L phase reconstruction)
|
54 |
+
preemphasize=True, # whether to apply filter
|
55 |
+
preemphasis=0.97, # filter coefficient.
|
56 |
+
|
57 |
+
# Limits
|
58 |
+
min_level_db=-100,
|
59 |
+
ref_level_db=20,
|
60 |
+
fmin=55,
|
61 |
+
# Set this to 55 if your speaker is male! if female, 95 should help taking off noise. (To
|
62 |
+
# test depending on dataset. Pitch info: male~[65, 260], female~[100, 525])
|
63 |
+
fmax=7600, # To be increased/reduced depending on data.
|
64 |
+
|
65 |
+
)
|
66 |
+
|
checkpoints/checkpoint.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8c71166482d2b893f2f77450563a1bb31d805f3048c7213b974fd9201e9aa4b3
|
3 |
+
size 406815527
|
checkpoints/e7.15_model210000_notUsedInPaper.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9349ee01b1c2f30aec16408cd120b728c7e8e9559190598ca50365a94ac4686a
|
3 |
+
size 406815527
|
d2l_gen/log.txt
ADDED
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
1 |
+
Logging to d2l_gen
|
2 |
+
creating model...
|
dataset/LRW/lrw_fullpath.py
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
'''Converts the LRW video names in filelists to LRW relative paths and dumps them unto new filelists'''
|
2 |
+
import os
|
3 |
+
filelist = "../filelists/lrw_cross.txt"
|
4 |
+
|
5 |
+
filelist_split_path = filelist.replace(".txt","_relative_path.txt")
|
6 |
+
with open(filelist, 'r') as f:
|
7 |
+
lines = f.readlines()
|
8 |
+
with open(filelist_split_path, 'w') as f:
|
9 |
+
for i in range(len(lines)):
|
10 |
+
audio_name, video_name=lines[i].split(' ')
|
11 |
+
audio_word = audio_name.split('_')[0]
|
12 |
+
video_word = video_name.split('_')[0]
|
13 |
+
f.write(os.path.join(audio_word,'test',audio_name)+' '+os.path.join(video_word,'test',video_name))
|
14 |
+
|
15 |
+
filelist = "../filelists/lrw_reconstruction.txt"
|
16 |
+
|
17 |
+
filelist_split_path = filelist.replace(".txt","_relative_path.txt")
|
18 |
+
with open(filelist, 'r') as f:
|
19 |
+
lines = f.readlines()
|
20 |
+
with open(filelist_split_path, 'w') as f:
|
21 |
+
for i in range(len(lines)):
|
22 |
+
audio_name, video_name=lines[i].split(' ')
|
23 |
+
audio_word = audio_name.split('_')[0]
|
24 |
+
video_word = video_name.split('_')[0]
|
25 |
+
f.write(os.path.join(audio_word,'test',audio_name)+' '+os.path.join(video_word,'test',video_name))
|
dataset/filelists/lrw_cross.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dataset/filelists/lrw_cross_relative_path.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dataset/filelists/lrw_reconstruction.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dataset/filelists/lrw_reconstruction_relative_path.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dataset/filelists/voxceleb2_test_n_5000_reconstruction_5k.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dataset/filelists/voxceleb2_test_n_5000_seed_797_cross_5K.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dataset/filelists/voxceleb2_test_n_500_reconstruction.txt
ADDED
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
id09017/SjCgiXBHfNU/00111 id09017/SjCgiXBHfNU/00111
|
2 |
+
id05055/HobsYUHmgr0/00138 id05055/HobsYUHmgr0/00138
|
3 |
+
id01567/M47d5UckOV8/00099 id01567/M47d5UckOV8/00099
|
4 |
+
id01228/SH3eBLMsRwY/00211 id01228/SH3eBLMsRwY/00211
|
5 |
+
id07312/m1VY1sC_P_o/00093 id07312/m1VY1sC_P_o/00093
|
6 |
+
id08696/dJH9aBSs1nE/00370 id08696/dJH9aBSs1nE/00370
|
7 |
+
id07312/9PCY4xwxgcE/00006 id07312/9PCY4xwxgcE/00006
|
8 |
+
id07494/1A8ZDo11tzY/00006 id07494/1A8ZDo11tzY/00006
|
9 |
+
id00061/thHLZ8tDJ-M/00276 id00061/thHLZ8tDJ-M/00276
|
10 |
+
id03862/YQMdTzyG-P8/00297 id03862/YQMdTzyG-P8/00297
|
11 |
+
id04570/zsnG6eKzOGE/00406 id04570/zsnG6eKzOGE/00406
|
12 |
+
id07414/1m1C-CdhmZ0/00016 id07414/1m1C-CdhmZ0/00016
|
13 |
+
id01509/2uKpHd-euIo/00038 id01509/2uKpHd-euIo/00038
|
14 |
+
id04276/qHXwXqxL0mk/00401 id04276/qHXwXqxL0mk/00401
|
15 |
+
id04366/x-VQ6z2QC4w/00252 id04366/x-VQ6z2QC4w/00252
|
16 |
+
id07620/um6sY627GaE/00475 id07620/um6sY627GaE/00475
|
17 |
+
id01000/RvjbLfo3XDM/00052 id01000/RvjbLfo3XDM/00052
|
18 |
+
id07868/fnWDbUI_Zbg/00289 id07868/fnWDbUI_Zbg/00289
|
19 |
+
id01333/cymDCPEhalE/00351 id01333/cymDCPEhalE/00351
|
20 |
+
id02317/Mv16h1Bx7HE/00241 id02317/Mv16h1Bx7HE/00241
|
21 |
+
id02317/Vi4k3cuwfgc/00342 id02317/Vi4k3cuwfgc/00342
|
22 |
+
id01000/eGeGHhuOJJ0/00077 id01000/eGeGHhuOJJ0/00077
|
23 |
+
id03980/zaDLb12pDBQ/00130 id03980/zaDLb12pDBQ/00130
|
24 |
+
id05124/c-Pa7b81coQ/00354 id05124/c-Pa7b81coQ/00354
|
25 |
+
id04478/nhLuGj0vGb8/00234 id04478/nhLuGj0vGb8/00234
|
26 |
+
id01541/3su8tn9nwi4/00007 id01541/3su8tn9nwi4/00007
|
27 |
+
id06484/cmIyVotzXiE/00125 id06484/cmIyVotzXiE/00125
|
28 |
+
id06209/oxofNHGCj7s/00139 id06209/oxofNHGCj7s/00139
|
29 |
+
id02181/rxX3t2rzLbg/00146 id02181/rxX3t2rzLbg/00146
|
30 |
+
id02286/YL75-u9XYUM/00105 id02286/YL75-u9XYUM/00105
|
31 |
+
id04276/v9mSslwD0Kg/00470 id04276/v9mSslwD0Kg/00470
|
32 |
+
id07802/6qBSFfV_Mig/00042 id07802/6qBSFfV_Mig/00042
|
33 |
+
id04295/DtC2X1KG8TE/00057 id04295/DtC2X1KG8TE/00057
|
34 |
+
id00866/shG_183xFlw/00243 id00866/shG_183xFlw/00243
|
35 |
+
id03862/2nagLhV_Yvw/00012 id03862/2nagLhV_Yvw/00012
|
36 |
+
id04119/Yndoy1jgHWs/00042 id04119/Yndoy1jgHWs/00042
|
37 |
+
id04295/mTCDT_Fv5So/00203 id04295/mTCDT_Fv5So/00203
|
38 |
+
id08456/o-5hKwhGqac/00354 id08456/o-5hKwhGqac/00354
|
39 |
+
id07494/tv6GJkx_Wy4/00331 id07494/tv6GJkx_Wy4/00331
|
40 |
+
id04295/mClPHVzTCLI/00196 id04295/mClPHVzTCLI/00196
|
41 |
+
id04478/81Tb6kjlNIk/00019 id04478/81Tb6kjlNIk/00019
|
42 |
+
id00812/NeNXGI8mox8/00158 id00812/NeNXGI8mox8/00158
|
43 |
+
id04536/sfldoEPrFPI/00438 id04536/sfldoEPrFPI/00438
|
44 |
+
id07620/aJVbccKJwEw/00327 id07620/aJVbccKJwEw/00327
|
45 |
+
id02286/4LAIxvdvguc/00001 id02286/4LAIxvdvguc/00001
|
46 |
+
id07802/BfQUBDw7TiM/00080 id07802/BfQUBDw7TiM/00080
|
47 |
+
id01066/65k0p7fUBVI/00026 id01066/65k0p7fUBVI/00026
|
48 |
+
id03862/w97YzyPYm1k/00460 id03862/w97YzyPYm1k/00460
|
49 |
+
id05816/njFBkJSpUrY/00414 id05816/njFBkJSpUrY/00414
|
50 |
+
id05124/_Oxp6absIhY/00341 id05124/_Oxp6absIhY/00341
|
51 |
+
id07663/mUw-kxAavdM/00192 id07663/mUw-kxAavdM/00192
|
52 |
+
id05999/Ls440srvfR4/00127 id05999/Ls440srvfR4/00127
|
53 |
+
id02548/Hmlw5PIf64o/00098 id02548/Hmlw5PIf64o/00098
|
54 |
+
id04276/Pbo_nlcZ0Lc/00190 id04276/Pbo_nlcZ0Lc/00190
|
55 |
+
id07802/FhKML4dLE60/00115 id07802/FhKML4dLE60/00115
|
56 |
+
id07621/1L2IUy6gqaM/00012 id07621/1L2IUy6gqaM/00012
|
57 |
+
id05654/veGIQ7p2ZSk/00130 id05654/veGIQ7p2ZSk/00130
|
58 |
+
id04094/0z1JYPKGBI8/00007 id04094/0z1JYPKGBI8/00007
|
59 |
+
id02576/wWUREnOwYo0/00136 id02576/wWUREnOwYo0/00136
|
60 |
+
id09017/PLNK1g5w4FY/00099 id09017/PLNK1g5w4FY/00099
|
61 |
+
id06484/USbx34RUkVI/00096 id06484/USbx34RUkVI/00096
|
62 |
+
id03030/FXbzdRO7t98/00101 id03030/FXbzdRO7t98/00101
|
63 |
+
id02057/VCXnx-ozS8c/00263 id02057/VCXnx-ozS8c/00263
|
64 |
+
id02542/JUodrwt9ucI/00033 id02542/JUodrwt9ucI/00033
|
65 |
+
id03030/DM_Z5D2fkRA/00068 id03030/DM_Z5D2fkRA/00068
|
66 |
+
id08552/irj3SqKAe0c/00196 id08552/irj3SqKAe0c/00196
|
67 |
+
id03030/YxBoufnVIMw/00177 id03030/YxBoufnVIMw/00177
|
68 |
+
id07868/Eaf-dgA59Gs/00061 id07868/Eaf-dgA59Gs/00061
|
69 |
+
id08456/6xVSlQDr7-w/00031 id08456/6xVSlQDr7-w/00031
|
70 |
+
id06811/OYFkt_n18hg/00128 id06811/OYFkt_n18hg/00128
|
71 |
+
id00817/tCnW5E8cMow/00383 id00817/tCnW5E8cMow/00383
|
72 |
+
id02542/fXQbNcIbcek/00053 id02542/fXQbNcIbcek/00053
|
73 |
+
id01567/oi2g17EF55s/00377 id01567/oi2g17EF55s/00377
|
74 |
+
id04366/HsG3OGE22DY/00117 id04366/HsG3OGE22DY/00117
|
75 |
+
id01509/1y0aWmgYDtw/00006 id01509/1y0aWmgYDtw/00006
|
76 |
+
id04295/pYfyopS672Y/00213 id04295/pYfyopS672Y/00213
|
77 |
+
id01989/6JfW9CPAoGY/00006 id01989/6JfW9CPAoGY/00006
|
78 |
+
id04366/tbcKV-IjZdI/00243 id04366/tbcKV-IjZdI/00243
|
79 |
+
id01298/UY0fkYSUFrY/00208 id01298/UY0fkYSUFrY/00208
|
80 |
+
id00817/GAs8WnyFKJM/00120 id00817/GAs8WnyFKJM/00120
|
81 |
+
id06484/TCp2-XVatIE/00079 id06484/TCp2-XVatIE/00079
|
82 |
+
id08374/Kf9N5AWprG8/00150 id08374/Kf9N5AWprG8/00150
|
83 |
+
id01822/QDWgjZqOkvM/00065 id01822/QDWgjZqOkvM/00065
|
84 |
+
id03030/pTz652Dx_6w/00230 id03030/pTz652Dx_6w/00230
|
85 |
+
id01460/chrI43l2Nuw/00201 id01460/chrI43l2Nuw/00201
|
86 |
+
id08374/85f-qB_KJP8/00041 id08374/85f-qB_KJP8/00041
|
87 |
+
id07961/PoSkUxZ4ags/00172 id07961/PoSkUxZ4ags/00172
|
88 |
+
id01437/uFPYqotT7tU/00233 id01437/uFPYqotT7tU/00233
|
89 |
+
id07621/Aan8MoozxII/00095 id07621/Aan8MoozxII/00095
|
90 |
+
id08456/fWTULQWYVoA/00250 id08456/fWTULQWYVoA/00250
|
91 |
+
id05055/da7Z8oWhFPY/00351 id05055/da7Z8oWhFPY/00351
|
92 |
+
id02181/hIvctbfcBx8/00106 id02181/hIvctbfcBx8/00106
|
93 |
+
id01541/dEmuPb4A7do/00184 id01541/dEmuPb4A7do/00184
|
94 |
+
id00419/a3Y7pQzcn40/00305 id00419/a3Y7pQzcn40/00305
|
95 |
+
id07354/dsDxN33xvL0/00262 id07354/dsDxN33xvL0/00262
|
96 |
+
id04478/MZh3AEgJ9pc/00092 id04478/MZh3AEgJ9pc/00092
|
97 |
+
id05124/UBUFmICrT-I/00281 id05124/UBUFmICrT-I/00281
|
98 |
+
id03127/SmGJu-t24hY/00195 id03127/SmGJu-t24hY/00195
|
99 |
+
id02465/coOp_DnsmEI/00150 id02465/coOp_DnsmEI/00150
|
100 |
+
id01618/qrOl1aaXBH0/00187 id01618/qrOl1aaXBH0/00187
|
101 |
+
id03969/WZVnB-m0X9g/00038 id03969/WZVnB-m0X9g/00038
|
102 |
+
id05202/s0m_4-SCn44/00186 id05202/s0m_4-SCn44/00186
|
103 |
+
id04657/SYVkfHq-pro/00172 id04657/SYVkfHq-pro/00172
|
104 |
+
id05176/p2IOP5_s_LM/00093 id05176/p2IOP5_s_LM/00093
|
105 |
+
id04950/XJS6SLQuCNM/00169 id04950/XJS6SLQuCNM/00169
|
106 |
+
id02019/anSrwA_9RPE/00152 id02019/anSrwA_9RPE/00152
|
107 |
+
id04570/Q-faEy1VXxQ/00140 id04570/Q-faEy1VXxQ/00140
|
108 |
+
id07621/bMvG2mQMZZw/00303 id07621/bMvG2mQMZZw/00303
|
109 |
+
id06811/vC3yQiWuuOI/00354 id06811/vC3yQiWuuOI/00354
|
110 |
+
id03839/aWMP8xzq2BE/00292 id03839/aWMP8xzq2BE/00292
|
111 |
+
id04094/j1ajUkR6_Q4/00326 id04094/j1ajUkR6_Q4/00326
|
112 |
+
id08149/o0Zdr9Jla7U/00047 id08149/o0Zdr9Jla7U/00047
|
113 |
+
id00017/hcr4tT9y3xs/00117 id00017/hcr4tT9y3xs/00117
|
114 |
+
id04950/Cu4jGRmYa4c/00064 id04950/Cu4jGRmYa4c/00064
|
115 |
+
id01567/TMozlhoPMfI/00223 id01567/TMozlhoPMfI/00223
|
116 |
+
id08374/QltFme-lqeI/00226 id08374/QltFme-lqeI/00226
|
117 |
+
id06816/tHor4VN8090/00259 id06816/tHor4VN8090/00259
|
118 |
+
id07494/xQ0YMPe-9u8/00413 id07494/xQ0YMPe-9u8/00413
|
119 |
+
id08374/FwR1K1rL3QI/00110 id08374/FwR1K1rL3QI/00110
|
120 |
+
id06692/Hlahj5abifM/00257 id06692/Hlahj5abifM/00257
|
121 |
+
id00419/J2LscHjRX7Q/00154 id00419/J2LscHjRX7Q/00154
|
122 |
+
id02057/CI5-q_qTR5I/00112 id02057/CI5-q_qTR5I/00112
|
123 |
+
id03862/7IccaH4HXRs/00069 id03862/7IccaH4HXRs/00069
|
124 |
+
id04656/ar3rKrkbjqI/00257 id04656/ar3rKrkbjqI/00257
|
125 |
+
id07494/XMEIdqio6ic/00184 id07494/XMEIdqio6ic/00184
|
126 |
+
id04657/dn4XY5c6mEw/00265 id04657/dn4XY5c6mEw/00265
|
127 |
+
id04570/SFKt669qIqs/00156 id04570/SFKt669qIqs/00156
|
128 |
+
id01541/sMDYdAB0MPs/00306 id01541/sMDYdAB0MPs/00306
|
129 |
+
id08456/F2O-frqyr9c/00101 id08456/F2O-frqyr9c/00101
|
130 |
+
id08701/_Ysb9mVibbk/00253 id08701/_Ysb9mVibbk/00253
|
131 |
+
id01333/e4FoER8nqx0/00365 id01333/e4FoER8nqx0/00365
|
132 |
+
id05124/F0Xpd6OoiDY/00161 id05124/F0Xpd6OoiDY/00161
|
133 |
+
id01593/AVmZf6Kl1So/00071 id01593/AVmZf6Kl1So/00071
|
134 |
+
id01567/fOlxxDqdrgc/00299 id01567/fOlxxDqdrgc/00299
|
135 |
+
id06484/2KVWoftPf2o/00001 id06484/2KVWoftPf2o/00001
|
136 |
+
id01224/g4jVqkEm1Gs/00274 id01224/g4jVqkEm1Gs/00274
|
137 |
+
id02445/ZX_6RMrTEP0/00066 id02445/ZX_6RMrTEP0/00066
|
138 |
+
id04656/5TR-W77XgF4/00032 id04656/5TR-W77XgF4/00032
|
139 |
+
id01618/F_ExF9xDajc/00060 id01618/F_ExF9xDajc/00060
|
140 |
+
id08392/gPX4IC53KwI/00355 id08392/gPX4IC53KwI/00355
|
141 |
+
id00866/pNbDtfW1JW4/00221 id00866/pNbDtfW1JW4/00221
|
142 |
+
id00812/b3dBqOtzsx0/00276 id00812/b3dBqOtzsx0/00276
|
143 |
+
id08701/61Al05HARgA/00001 id08701/61Al05HARgA/00001
|
144 |
+
id07663/FFo4JwVXeUM/00119 id07663/FFo4JwVXeUM/00119
|
145 |
+
id02057/22zJ50ky7CQ/00013 id02057/22zJ50ky7CQ/00013
|
146 |
+
id05055/2onVoeSgouI/00028 id05055/2onVoeSgouI/00028
|
147 |
+
id04006/zvUZFL0NyhM/00260 id04006/zvUZFL0NyhM/00260
|
148 |
+
id04950/EpOnsaBin0A/00077 id04950/EpOnsaBin0A/00077
|
149 |
+
id05015/RhBpC9Fc7a4/00154 id05015/RhBpC9Fc7a4/00154
|
150 |
+
id04656/Z_JFBDW9eZE/00251 id04656/Z_JFBDW9eZE/00251
|
151 |
+
id01509/2sb83ZBlbJg/00034 id01509/2sb83ZBlbJg/00034
|
152 |
+
id04030/JbcD0P6KGe0/00036 id04030/JbcD0P6KGe0/00036
|
153 |
+
id02542/cwgUjse_REU/00040 id02542/cwgUjse_REU/00040
|
154 |
+
id07620/xFc9X6EXtRM/00478 id07620/xFc9X6EXtRM/00478
|
155 |
+
id07354/Qrg89rvtZ1k/00217 id07354/Qrg89rvtZ1k/00217
|
156 |
+
id03839/wSQMEZMxxx4/00461 id03839/wSQMEZMxxx4/00461
|
157 |
+
id03127/iWeklsXc0H8/00268 id03127/iWeklsXc0H8/00268
|
158 |
+
id07663/54qlJ2HZ08s/00096 id07663/54qlJ2HZ08s/00096
|
159 |
+
id07961/Orp8s5aHYc8/00158 id07961/Orp8s5aHYc8/00158
|
160 |
+
id03347/y_F4aAkN0d8/00417 id03347/y_F4aAkN0d8/00417
|
161 |
+
id06913/KNDyf594xQg/00056 id06913/KNDyf594xQg/00056
|
162 |
+
id04366/DIgAc22fq9c/00080 id04366/DIgAc22fq9c/00080
|
163 |
+
id07396/uJPtbxlXi2c/00187 id07396/uJPtbxlXi2c/00187
|
164 |
+
id07868/gVspdH-U2XE/00290 id07868/gVspdH-U2XE/00290
|
165 |
+
id05594/u7qCFBP1nH4/00184 id05594/u7qCFBP1nH4/00184
|
166 |
+
id01541/mDoT5mpo_2c/00241 id01541/mDoT5mpo_2c/00241
|
167 |
+
id07354/0y9b8qlM170/00011 id07354/0y9b8qlM170/00011
|
168 |
+
id01460/DnnphhTlRPE/00075 id01460/DnnphhTlRPE/00075
|
169 |
+
id02548/1CNhmMmirfA/00009 id02548/1CNhmMmirfA/00009
|
170 |
+
id03127/k8z6DxdyF9w/00291 id03127/k8z6DxdyF9w/00291
|
171 |
+
id01437/zLRJ_8_M5Wg/00263 id01437/zLRJ_8_M5Wg/00263
|
172 |
+
id02576/WnbNQuJzErQ/00086 id02576/WnbNQuJzErQ/00086
|
173 |
+
id01333/M0UD9g1x18c/00128 id01333/M0UD9g1x18c/00128
|
174 |
+
id04295/1fSjOItVYVg/00001 id04295/1fSjOItVYVg/00001
|
175 |
+
id08456/8tt1LbCoU0E/00054 id08456/8tt1LbCoU0E/00054
|
176 |
+
id07494/r-ToqH_EJNs/00318 id07494/r-ToqH_EJNs/00318
|
177 |
+
id06816/XBKj9XWlZCw/00123 id06816/XBKj9XWlZCw/00123
|
178 |
+
id03030/haoNit7a4W0/00201 id03030/haoNit7a4W0/00201
|
179 |
+
id03839/aeObhOJLQzQ/00293 id03839/aeObhOJLQzQ/00293
|
180 |
+
id07868/COb1gFHXsBQ/00059 id07868/COb1gFHXsBQ/00059
|
181 |
+
id01224/eYWcMCsgkLY/00255 id01224/eYWcMCsgkLY/00255
|
182 |
+
id04006/K5ueXBlS6rc/00049 id04006/K5ueXBlS6rc/00049
|
183 |
+
id07620/G5-1CUbaz0c/00107 id07620/G5-1CUbaz0c/00107
|
184 |
+
id06104/cj0TAnwndoc/00230 id06104/cj0TAnwndoc/00230
|
185 |
+
id00061/STX1ycPt8fU/00076 id00061/STX1ycPt8fU/00076
|
186 |
+
id04478/wMbobxEQ7j8/00336 id04478/wMbobxEQ7j8/00336
|
187 |
+
id01106/7X_xtnJhEc0/00031 id01106/7X_xtnJhEc0/00031
|
188 |
+
id08374/zaYzRbE_2C8/00494 id08374/zaYzRbE_2C8/00494
|
189 |
+
id04276/MgOqCfwKE70/00173 id04276/MgOqCfwKE70/00173
|
190 |
+
id03127/Lgd5qn2-kMo/00079 id03127/Lgd5qn2-kMo/00079
|
191 |
+
id00154/xH3Pp_5yxOk/00153 id00154/xH3Pp_5yxOk/00153
|
192 |
+
id04030/7mXUMuo5_NE/00001 id04030/7mXUMuo5_NE/00001
|
193 |
+
id02542/p7bvjcLbZm4/00097 id02542/p7bvjcLbZm4/00097
|
194 |
+
id04232/T7dROCqmwNQ/00235 id04232/T7dROCqmwNQ/00235
|
195 |
+
id02548/KrXU-_jrtxY/00147 id02548/KrXU-_jrtxY/00147
|
196 |
+
id01567/SZyTC5dxJOY/00219 id01567/SZyTC5dxJOY/00219
|
197 |
+
id03524/2DD4Np7SaWw/00007 id03524/2DD4Np7SaWw/00007
|
198 |
+
id04094/DRq5F2261Ko/00072 id04094/DRq5F2261Ko/00072
|
199 |
+
id07802/HrpJg06dowY/00152 id07802/HrpJg06dowY/00152
|
200 |
+
id06816/pBt-DxsTFc8/00231 id06816/pBt-DxsTFc8/00231
|
201 |
+
id00154/2pSNL5YdcoQ/00002 id00154/2pSNL5YdcoQ/00002
|
202 |
+
id01541/C29fUBtimOE/00038 id01541/C29fUBtimOE/00038
|
203 |
+
id06310/b6qPjJ0isPI/00155 id06310/b6qPjJ0isPI/00155
|
204 |
+
id05714/wFGNufaMbDY/00025 id05714/wFGNufaMbDY/00025
|
205 |
+
id03980/m-8Ffv2RqYs/00092 id03980/m-8Ffv2RqYs/00092
|
206 |
+
id01437/uXAe0vbNWeo/00238 id01437/uXAe0vbNWeo/00238
|
207 |
+
id04232/tPZ-zVT67gs/00479 id04232/tPZ-zVT67gs/00479
|
208 |
+
id06811/ImzUwwYU6SQ/00067 id06811/ImzUwwYU6SQ/00067
|
209 |
+
id05459/wq3Z0I944wU/00436 id05459/wq3Z0I944wU/00436
|
210 |
+
id03969/Evoldg-U2_c/00024 id03969/Evoldg-U2_c/00024
|
211 |
+
id08548/BSChFozahbU/00019 id08548/BSChFozahbU/00019
|
212 |
+
id04950/PQEAck-3wcA/00134 id04950/PQEAck-3wcA/00134
|
213 |
+
id04295/G4YnExZSzlM/00066 id04295/G4YnExZSzlM/00066
|
214 |
+
id05176/mc7rFp2B1j0/00092 id05176/mc7rFp2B1j0/00092
|
215 |
+
id00812/1Xfgvdu7oDo/00001 id00812/1Xfgvdu7oDo/00001
|
216 |
+
id05459/UPSPGawaVsg/00233 id05459/UPSPGawaVsg/00233
|
217 |
+
id04656/7nG3rOv0oBw/00050 id04656/7nG3rOv0oBw/00050
|
218 |
+
id02548/nvYBpt14BrQ/00309 id02548/nvYBpt14BrQ/00309
|
219 |
+
id02317/A3AvljK8Upk/00102 id02317/A3AvljK8Upk/00102
|
220 |
+
id04478/qLNvRwMkhik/00242 id04478/qLNvRwMkhik/00242
|
221 |
+
id01228/lCDMC8JvKyU/00295 id01228/lCDMC8JvKyU/00295
|
222 |
+
id03041/5CfnYwQCW48/00001 id03041/5CfnYwQCW48/00001
|
223 |
+
id04950/LnsriCjCIV4/00116 id04950/LnsriCjCIV4/00116
|
224 |
+
id04094/plxNYSFgDTM/00384 id04094/plxNYSFgDTM/00384
|
225 |
+
id01460/30_QmGw7lmE/00030 id01460/30_QmGw7lmE/00030
|
226 |
+
id04366/6rX7hCNSjaw/00056 id04366/6rX7hCNSjaw/00056
|
227 |
+
id01041/m-xolqIq8p4/00370 id01041/m-xolqIq8p4/00370
|
228 |
+
id04950/BG4CCg2RiuQ/00052 id04950/BG4CCg2RiuQ/00052
|
229 |
+
id01989/7g0A7pF94r0/00018 id01989/7g0A7pF94r0/00018
|
230 |
+
id03382/b_NJ2Xz3G4Y/00030 id03382/b_NJ2Xz3G4Y/00030
|
231 |
+
id00812/IteHRVKyzaE/00138 id00812/IteHRVKyzaE/00138
|
232 |
+
id00061/bdkqfVtDZVY/00121 id00061/bdkqfVtDZVY/00121
|
233 |
+
id03839/YkYIh4cYwwg/00275 id03839/YkYIh4cYwwg/00275
|
234 |
+
id07354/wyTuCRGjUIQ/00477 id07354/wyTuCRGjUIQ/00477
|
235 |
+
id02057/TddnW2TaXrc/00246 id02057/TddnW2TaXrc/00246
|
236 |
+
id01989/gHVHtKTQBsw/00128 id01989/gHVHtKTQBsw/00128
|
237 |
+
id08374/bXlUHb5hxxA/00266 id08374/bXlUHb5hxxA/00266
|
238 |
+
id03862/TE2zQc8_W-g/00252 id03862/TE2zQc8_W-g/00252
|
239 |
+
id08696/86-k8TuowAE/00033 id08696/86-k8TuowAE/00033
|
240 |
+
id05176/K8yZYHg_4ro/00050 id05176/K8yZYHg_4ro/00050
|
241 |
+
id04253/SKsPkHMGHYY/00240 id04253/SKsPkHMGHYY/00240
|
242 |
+
id07874/2KK4ozkjaEE/00002 id07874/2KK4ozkjaEE/00002
|
243 |
+
id08392/g-SJYYaaLgE/00352 id08392/g-SJYYaaLgE/00352
|
244 |
+
id02542/glhCf1hwJhE/00065 id02542/glhCf1hwJhE/00065
|
245 |
+
id00817/FsL-bTbDTyw/00112 id00817/FsL-bTbDTyw/00112
|
246 |
+
id04862/IuXPj9VhUVA/00100 id04862/IuXPj9VhUVA/00100
|
247 |
+
id06811/f9-8d3lNNcw/00237 id06811/f9-8d3lNNcw/00237
|
248 |
+
id04094/JUYMzfVp8zI/00113 id04094/JUYMzfVp8zI/00113
|
249 |
+
id03347/r-xJUB0A4ok/00346 id03347/r-xJUB0A4ok/00346
|
250 |
+
id07868/MNibTv_ODQ8/00148 id07868/MNibTv_ODQ8/00148
|
251 |
+
id08392/3e5zvNaT-eU/00020 id08392/3e5zvNaT-eU/00020
|
252 |
+
id04295/bKMKvAr440A/00141 id04295/bKMKvAr440A/00141
|
253 |
+
id04295/l62YPD0ZkZI/00185 id04295/l62YPD0ZkZI/00185
|
254 |
+
id07312/RO9DsspwXiE/00047 id07312/RO9DsspwXiE/00047
|
255 |
+
id03030/rmFsUV5ICKk/00267 id03030/rmFsUV5ICKk/00267
|
256 |
+
id03677/nVWTTopGQdU/00181 id03677/nVWTTopGQdU/00181
|
257 |
+
id00866/xQ1Yy0kjvjA/00256 id00866/xQ1Yy0kjvjA/00256
|
258 |
+
id01333/fRnqtJR0rws/00371 id01333/fRnqtJR0rws/00371
|
259 |
+
id05055/AZoIKG33E8s/00115 id05055/AZoIKG33E8s/00115
|
260 |
+
id01822/_CkfCmQXII8/00098 id01822/_CkfCmQXII8/00098
|
261 |
+
id01593/_gyaAyVi6SA/00344 id01593/_gyaAyVi6SA/00344
|
262 |
+
id04295/DS3RDwf2xI8/00049 id04295/DS3RDwf2xI8/00049
|
263 |
+
id00812/EjO-VORTv_o/00098 id00812/EjO-VORTv_o/00098
|
264 |
+
id04657/WdJ_DuU0ack/00236 id04657/WdJ_DuU0ack/00236
|
265 |
+
id04232/AB9fk1MH2rA/00035 id04232/AB9fk1MH2rA/00035
|
266 |
+
id00419/chfgCUm9-Mg/00364 id00419/chfgCUm9-Mg/00364
|
267 |
+
id02577/Az0BGrX_TwI/00021 id02577/Az0BGrX_TwI/00021
|
268 |
+
id01437/hyj4OYm0cvA/00195 id01437/hyj4OYm0cvA/00195
|
269 |
+
id01593/tLFWX-IdAwI/00431 id01593/tLFWX-IdAwI/00431
|
270 |
+
id04536/MNDmkEXRS7s/00312 id04536/MNDmkEXRS7s/00312
|
271 |
+
id03789/7qhkM8qY3Fw/00077 id03789/7qhkM8qY3Fw/00077
|
272 |
+
id01593/neAk6K8BvTA/00397 id01593/neAk6K8BvTA/00397
|
273 |
+
id06484/jTHSVo6NvS4/00151 id06484/jTHSVo6NvS4/00151
|
274 |
+
id07414/cAudd_5Yv2I/00256 id07414/cAudd_5Yv2I/00256
|
275 |
+
id00866/ADzqaRZtJNA/00087 id00866/ADzqaRZtJNA/00087
|
276 |
+
id06484/ZySpn0Aj09k/00108 id06484/ZySpn0Aj09k/00108
|
277 |
+
id07312/ZHBjHQENqW8/00053 id07312/ZHBjHQENqW8/00053
|
278 |
+
id04656/LDuq2UPHKoA/00157 id04656/LDuq2UPHKoA/00157
|
279 |
+
id01509/UZL8Obdt--8/00181 id01509/UZL8Obdt--8/00181
|
280 |
+
id05816/7jt8zGB27QQ/00017 id05816/7jt8zGB27QQ/00017
|
281 |
+
id08456/7PKsuBS5LQI/00050 id08456/7PKsuBS5LQI/00050
|
282 |
+
id06913/Tx0vAZhSPuE/00077 id06913/Tx0vAZhSPuE/00077
|
283 |
+
id02465/UEmI4r5G-5Y/00117 id02465/UEmI4r5G-5Y/00117
|
284 |
+
id01460/9sefvU9y4Kw/00046 id01460/9sefvU9y4Kw/00046
|
285 |
+
id01567/uYDx0vIVy_A/00429 id01567/uYDx0vIVy_A/00429
|
286 |
+
id07961/qott7SmhA-A/00351 id07961/qott7SmhA-A/00351
|
287 |
+
id00866/Awi1Q0yib1s/00092 id00866/Awi1Q0yib1s/00092
|
288 |
+
id02086/CqJKcn8m_Xo/00152 id02086/CqJKcn8m_Xo/00152
|
289 |
+
id05015/Obbv73CqtmQ/00137 id05015/Obbv73CqtmQ/00137
|
290 |
+
id01041/1UYZqPpavtk/00001 id01041/1UYZqPpavtk/00001
|
291 |
+
id01593/GiLxqKSI68o/00188 id01593/GiLxqKSI68o/00188
|
292 |
+
id02317/IR0psXbOjdc/00176 id02317/IR0psXbOjdc/00176
|
293 |
+
id01066/X33aJxc3Kt0/00112 id01066/X33aJxc3Kt0/00112
|
294 |
+
id08456/VU3fkD-QqPw/00206 id08456/VU3fkD-QqPw/00206
|
295 |
+
id04536/wat5sbCSs0k/00470 id04536/wat5sbCSs0k/00470
|
296 |
+
id01066/4KOSmyAMipc/00020 id01066/4KOSmyAMipc/00020
|
297 |
+
id02445/f5u3ktNPHAk/00074 id02445/f5u3ktNPHAk/00074
|
298 |
+
id03041/NJUcU7j30JI/00011 id03041/NJUcU7j30JI/00011
|
299 |
+
id00817/vUezvJDh_tA/00394 id00817/vUezvJDh_tA/00394
|
300 |
+
id04478/sw50KQMY8vw/00298 id04478/sw50KQMY8vw/00298
|
301 |
+
id04657/hMrgeYf5ToQ/00267 id04657/hMrgeYf5ToQ/00267
|
302 |
+
id02548/VdjlKRtLD_w/00206 id02548/VdjlKRtLD_w/00206
|
303 |
+
id06310/4oJF1NW2bIg/00006 id06310/4oJF1NW2bIg/00006
|
304 |
+
id01509/jqbtAt91alI/00329 id01509/jqbtAt91alI/00329
|
305 |
+
id07414/oXx9CvIeFFY/00407 id07414/oXx9CvIeFFY/00407
|
306 |
+
id04570/mwhiZtTZYX0/00271 id04570/mwhiZtTZYX0/00271
|
307 |
+
id00812/AzDjo0Uyk4Y/00061 id00812/AzDjo0Uyk4Y/00061
|
308 |
+
id05999/MJwLq17VoMA/00146 id05999/MJwLq17VoMA/00146
|
309 |
+
id07414/dsqrI97WQHE/00319 id07414/dsqrI97WQHE/00319
|
310 |
+
id05015/C3KsCD-pUgs/00046 id05015/C3KsCD-pUgs/00046
|
311 |
+
id06484/Gh6H7Md_L2k/00053 id06484/Gh6H7Md_L2k/00053
|
312 |
+
id00081/xlwJqdrzeMA/00291 id00081/xlwJqdrzeMA/00291
|
313 |
+
id05055/RLN5nKfza4A/00219 id05055/RLN5nKfza4A/00219
|
314 |
+
id05055/OKw_hph-hK8/00197 id05055/OKw_hph-hK8/00197
|
315 |
+
id03839/xtBkY9xYpjA/00464 id03839/xtBkY9xYpjA/00464
|
316 |
+
id07620/HEX00yF8LTs/00117 id07620/HEX00yF8LTs/00117
|
317 |
+
id05816/hjrZgsKuvpw/00349 id05816/hjrZgsKuvpw/00349
|
318 |
+
id02548/6LPbT49zy38/00050 id02548/6LPbT49zy38/00050
|
319 |
+
id01000/7eYakM6qrTs/00006 id01000/7eYakM6qrTs/00006
|
320 |
+
id02181/cNCj0pLxR24/00084 id02181/cNCj0pLxR24/00084
|
321 |
+
id02086/sSliWvu6Ufs/00453 id02086/sSliWvu6Ufs/00453
|
322 |
+
id03178/KHelFt1Jyyg/00057 id03178/KHelFt1Jyyg/00057
|
323 |
+
id05594/8dYcSoUAQO8/00014 id05594/8dYcSoUAQO8/00014
|
324 |
+
id05015/JmvJemqIeS0/00102 id05015/JmvJemqIeS0/00102
|
325 |
+
id00081/EvCyt2keqW4/00065 id00081/EvCyt2keqW4/00065
|
326 |
+
id07663/QWe7IIGrv5s/00146 id07663/QWe7IIGrv5s/00146
|
327 |
+
id01618/kzxW2WAFWLI/00126 id01618/kzxW2WAFWLI/00126
|
328 |
+
id00562/X7FJ3M3bz3c/00124 id00562/X7FJ3M3bz3c/00124
|
329 |
+
id07961/bvPOvzukTE4/00224 id07961/bvPOvzukTE4/00224
|
330 |
+
id03789/nv8sQplhvX0/00357 id03789/nv8sQplhvX0/00357
|
331 |
+
id04295/VUHarbuO_eE/00125 id04295/VUHarbuO_eE/00125
|
332 |
+
id01822/IaBziWYcwK4/00037 id01822/IaBziWYcwK4/00037
|
333 |
+
id05015/X1opVctkTE8/00170 id05015/X1opVctkTE8/00170
|
334 |
+
id01041/MMXznNig_iU/00248 id01041/MMXznNig_iU/00248
|
335 |
+
id02465/EZ_F0hUZdS4/00054 id02465/EZ_F0hUZdS4/00054
|
336 |
+
id04656/Bi7kCsbg5L0/00061 id04656/Bi7kCsbg5L0/00061
|
337 |
+
id07494/K4ndWNAHgdU/00093 id07494/K4ndWNAHgdU/00093
|
338 |
+
id07354/TKTT7fArInQ/00218 id07354/TKTT7fArInQ/00218
|
339 |
+
id05714/Lu4PPvWXGn8/00014 id05714/Lu4PPvWXGn8/00014
|
340 |
+
id05654/07pANazoyJg/00001 id05654/07pANazoyJg/00001
|
341 |
+
id01066/FDp-ZLCWrIc/00054 id01066/FDp-ZLCWrIc/00054
|
342 |
+
id05999/ZQJVmCJFjNs/00182 id05999/ZQJVmCJFjNs/00182
|
343 |
+
id04570/5Fg6CLuRntk/00041 id04570/5Fg6CLuRntk/00041
|
344 |
+
id08696/vqLNqYW4TQA/00476 id08696/vqLNqYW4TQA/00476
|
345 |
+
id04862/2uYHadPvHRU/00016 id04862/2uYHadPvHRU/00016
|
346 |
+
id03980/7MRUusImkno/00001 id03980/7MRUusImkno/00001
|
347 |
+
id02542/QJKFnt1lHeE/00035 id02542/QJKFnt1lHeE/00035
|
348 |
+
id04536/OYH-6uGB6jI/00322 id04536/OYH-6uGB6jI/00322
|
349 |
+
id06484/dOTMnYZcY9Q/00126 id06484/dOTMnYZcY9Q/00126
|
350 |
+
id04478/GZQGZOmFU5U/00063 id04478/GZQGZOmFU5U/00063
|
351 |
+
id01224/tELp6C7FELU/00421 id01224/tELp6C7FELU/00421
|
352 |
+
id03862/5m5iPZNJS6c/00022 id03862/5m5iPZNJS6c/00022
|
353 |
+
id05124/lcDhSnyeN5E/00381 id05124/lcDhSnyeN5E/00381
|
354 |
+
id08149/3V9V5sDAWTc/00001 id08149/3V9V5sDAWTc/00001
|
355 |
+
id02181/iEF0MWApQms/00108 id02181/iEF0MWApQms/00108
|
356 |
+
id04536/xrsxSF2qey8/00471 id04536/xrsxSF2qey8/00471
|
357 |
+
id03178/9AJzTUwGbRk/00005 id03178/9AJzTUwGbRk/00005
|
358 |
+
id01041/Izmh75CZNW0/00207 id01041/Izmh75CZNW0/00207
|
359 |
+
id03041/g5YLpUZBNKc/00018 id03041/g5YLpUZBNKc/00018
|
360 |
+
id03347/nSAKXYdEOOM/00297 id03347/nSAKXYdEOOM/00297
|
361 |
+
id03347/pPWGEPixOoM/00337 id03347/pPWGEPixOoM/00337
|
362 |
+
id07312/XBBpLMEjfUo/00048 id07312/XBBpLMEjfUo/00048
|
363 |
+
id08456/6QFe7cYnZk4/00023 id08456/6QFe7cYnZk4/00023
|
364 |
+
id05176/5Hk_hj0oXN8/00004 id05176/5Hk_hj0oXN8/00004
|
365 |
+
id07426/DBBfi7aKLx4/00038 id07426/DBBfi7aKLx4/00038
|
366 |
+
id07494/uhPKcTLLwcM/00347 id07494/uhPKcTLLwcM/00347
|
367 |
+
id02576/agxjz_O2Wfs/00088 id02576/agxjz_O2Wfs/00088
|
368 |
+
id01541/SvTz_Pn15Vk/00119 id01541/SvTz_Pn15Vk/00119
|
369 |
+
id07414/Uxggn91FBog/00214 id07414/Uxggn91FBog/00214
|
370 |
+
id04253/1HOlzefgLu8/00001 id04253/1HOlzefgLu8/00001
|
371 |
+
id01567/RPUd0ua7RR0/00216 id01567/RPUd0ua7RR0/00216
|
372 |
+
id04657/5DzZTPLgwTM/00044 id04657/5DzZTPLgwTM/00044
|
373 |
+
id04006/zSMWS35kYdQ/00253 id04006/zSMWS35kYdQ/00253
|
374 |
+
id03347/KT7B07WFWyM/00104 id03347/KT7B07WFWyM/00104
|
375 |
+
id02445/z5u4yO1EsZo/00109 id02445/z5u4yO1EsZo/00109
|
376 |
+
id00154/z1dLArSg5PQ/00190 id00154/z1dLArSg5PQ/00190
|
377 |
+
id07414/Cn6Ws4oK1jg/00095 id07414/Cn6Ws4oK1jg/00095
|
378 |
+
id02286/WHS1n7XUt_8/00103 id02286/WHS1n7XUt_8/00103
|
379 |
+
id01509/Zmmnr4iRsCM/00230 id01509/Zmmnr4iRsCM/00230
|
380 |
+
id04276/tGOA4fVnSgw/00448 id04276/tGOA4fVnSgw/00448
|
381 |
+
id00419/nu9cRW2J4Dk/00420 id00419/nu9cRW2J4Dk/00420
|
382 |
+
id07868/6RQX9l98N-g/00002 id07868/6RQX9l98N-g/00002
|
383 |
+
id03839/1lh57VnuaKE/00004 id03839/1lh57VnuaKE/00004
|
384 |
+
id03178/LT-BNQKA9NU/00075 id03178/LT-BNQKA9NU/00075
|
385 |
+
id01460/Es6CkRmkIBY/00080 id01460/Es6CkRmkIBY/00080
|
386 |
+
id06692/T2Xk7MO6m2g/00297 id06692/T2Xk7MO6m2g/00297
|
387 |
+
id01892/d8b9y_CRE3M/00102 id01892/d8b9y_CRE3M/00102
|
388 |
+
id07426/K_25cVSB-JU/00063 id07426/K_25cVSB-JU/00063
|
389 |
+
id01333/LI6eLfuTn6I/00127 id01333/LI6eLfuTn6I/00127
|
390 |
+
id00081/hIBFutPzn8s/00158 id00081/hIBFutPzn8s/00158
|
391 |
+
id04536/2j8I_WX5mhY/00009 id04536/2j8I_WX5mhY/00009
|
392 |
+
id04232/UElg0R7fmlk/00253 id04232/UElg0R7fmlk/00253
|
393 |
+
id01460/eZR__GGkVw4/00221 id01460/eZR__GGkVw4/00221
|
394 |
+
id01041/GymfYtTsKEU/00119 id01041/GymfYtTsKEU/00119
|
395 |
+
id07396/xK1gClL60tY/00191 id07396/xK1gClL60tY/00191
|
396 |
+
id05459/81o3ictaOnU/00075 id05459/81o3ictaOnU/00075
|
397 |
+
id02685/yN8ilDTW-o4/00114 id02685/yN8ilDTW-o4/00114
|
398 |
+
id02286/c8LjgwDQAkw/00137 id02286/c8LjgwDQAkw/00137
|
399 |
+
id01541/SWcGs-DbV9Q/00100 id01541/SWcGs-DbV9Q/00100
|
400 |
+
id01822/x4Fr2ceg_f8/00231 id01822/x4Fr2ceg_f8/00231
|
401 |
+
id03347/FKY5V8wmX5k/00043 id03347/FKY5V8wmX5k/00043
|
402 |
+
id00817/0GmSijZelGY/00001 id00817/0GmSijZelGY/00001
|
403 |
+
id06209/ahL3F1x5sE4/00091 id06209/ahL3F1x5sE4/00091
|
404 |
+
id06692/4k3Eo5s1Rwo/00057 id06692/4k3Eo5s1Rwo/00057
|
405 |
+
id09017/sduESYpj2-I/00297 id09017/sduESYpj2-I/00297
|
406 |
+
id07354/grg37qaxKjI/00329 id07354/grg37qaxKjI/00329
|
407 |
+
id07802/X8I5FN64_Oc/00199 id07802/X8I5FN64_Oc/00199
|
408 |
+
id07494/JV5S_SUcHmI/00088 id07494/JV5S_SUcHmI/00088
|
409 |
+
id03524/eHrI5bD8hSs/00282 id03524/eHrI5bD8hSs/00282
|
410 |
+
id01460/HNjuGz9ayBk/00109 id01460/HNjuGz9ayBk/00109
|
411 |
+
id04570/961AefP1-is/00056 id04570/961AefP1-is/00056
|
412 |
+
id00419/749eTxP4Us8/00061 id00419/749eTxP4Us8/00061
|
413 |
+
id00017/OLguY5ofUrY/00039 id00017/OLguY5ofUrY/00039
|
414 |
+
id08392/RogKVSjaAH0/00293 id08392/RogKVSjaAH0/00293
|
415 |
+
id01066/lI1wGa1UhEM/00205 id01066/lI1wGa1UhEM/00205
|
416 |
+
id07621/zSdriAuJUKo/00485 id07621/zSdriAuJUKo/00485
|
417 |
+
id03862/JBkaiUNeMmk/00166 id03862/JBkaiUNeMmk/00166
|
418 |
+
id00017/E6aqL_Nc410/00027 id00017/E6aqL_Nc410/00027
|
419 |
+
id03839/fi-g--cBwnU/00348 id03839/fi-g--cBwnU/00348
|
420 |
+
id05654/eLztZmvnk-k/00095 id05654/eLztZmvnk-k/00095
|
421 |
+
id02548/wF5HfFXZCBI/00349 id02548/wF5HfFXZCBI/00349
|
422 |
+
id02576/LAipS5WJ29s/00075 id02576/LAipS5WJ29s/00075
|
423 |
+
id06692/SEPs17_AkTI/00295 id06692/SEPs17_AkTI/00295
|
424 |
+
id05459/kkaYxtBZnNo/00348 id05459/kkaYxtBZnNo/00348
|
425 |
+
id04232/MEGVEqgGCME/00167 id04232/MEGVEqgGCME/00167
|
426 |
+
id01989/8CUktsB_2bA/00031 id01989/8CUktsB_2bA/00031
|
427 |
+
id01066/kqP_NZ1FRlM/00176 id01066/kqP_NZ1FRlM/00176
|
428 |
+
id03382/ockh8KdXJP8/00059 id03382/ockh8KdXJP8/00059
|
429 |
+
id01593/pO180haP_vo/00410 id01593/pO180haP_vo/00410
|
430 |
+
id07396/nTQDZrnGXXY/00179 id07396/nTQDZrnGXXY/00179
|
431 |
+
id03030/rg-VUeksKaU/00257 id03030/rg-VUeksKaU/00257
|
432 |
+
id08911/IddDkZwRflE/00053 id08911/IddDkZwRflE/00053
|
433 |
+
id02317/K2GT02zavxo/00193 id02317/K2GT02zavxo/00193
|
434 |
+
id01298/5P4ldDRuo5c/00065 id01298/5P4ldDRuo5c/00065
|
435 |
+
id01989/Evbf6fMJNmk/00060 id01989/Evbf6fMJNmk/00060
|
436 |
+
id05124/fNJI2A0v8yI/00357 id05124/fNJI2A0v8yI/00357
|
437 |
+
id02465/RLi2ItGherA/00098 id02465/RLi2ItGherA/00098
|
438 |
+
id07868/qMNfMcG6sh0/00346 id07868/qMNfMcG6sh0/00346
|
439 |
+
id04366/tmoYV4kPOGU/00246 id04366/tmoYV4kPOGU/00246
|
440 |
+
id06484/_ZkoebnFkVA/00110 id06484/_ZkoebnFkVA/00110
|
441 |
+
id04276/I9gCyrZWFn0/00097 id04276/I9gCyrZWFn0/00097
|
442 |
+
id03978/IMn6f0iDOtE/00032 id03978/IMn6f0iDOtE/00032
|
443 |
+
id00419/w_0sK8WuSsg/00472 id00419/w_0sK8WuSsg/00472
|
444 |
+
id04478/RwcHXQ3MvsQ/00109 id04478/RwcHXQ3MvsQ/00109
|
445 |
+
id08696/cUmyIjpOYlY/00360 id08696/cUmyIjpOYlY/00360
|
446 |
+
id04366/DqBQx6AZ1Nk/00083 id04366/DqBQx6AZ1Nk/00083
|
447 |
+
id05459/RhOon49C3g8/00201 id05459/RhOon49C3g8/00201
|
448 |
+
id04656/OzgjshkHUiA/00166 id04656/OzgjshkHUiA/00166
|
449 |
+
id03969/x38Sqv819yE/00110 id03969/x38Sqv819yE/00110
|
450 |
+
id00061/0G9G9oyFHI8/00001 id00061/0G9G9oyFHI8/00001
|
451 |
+
id06913/IreNhnVfTkQ/00043 id06913/IreNhnVfTkQ/00043
|
452 |
+
id01618/NqYUgbuImpk/00096 id01618/NqYUgbuImpk/00096
|
453 |
+
id08552/y05_B9NXizo/00237 id08552/y05_B9NXizo/00237
|
454 |
+
id01460/zcTt06bjKuA/00365 id01460/zcTt06bjKuA/00365
|
455 |
+
id00866/nI-zVYcQX40/00220 id00866/nI-zVYcQX40/00220
|
456 |
+
id08374/9eMfNJiKBPQ/00056 id08374/9eMfNJiKBPQ/00056
|
457 |
+
id03524/nKxz0LxKZ58/00344 id03524/nKxz0LxKZ58/00344
|
458 |
+
id09017/A3CAugN2cjk/00021 id09017/A3CAugN2cjk/00021
|
459 |
+
id02685/NtHmnSLaGCA/00036 id02685/NtHmnSLaGCA/00036
|
460 |
+
id01224/atjwjz0vAk8/00213 id01224/atjwjz0vAk8/00213
|
461 |
+
id07961/gvLf2DggTu0/00271 id07961/gvLf2DggTu0/00271
|
462 |
+
id01567/CCs8rZLCdVw/00043 id01567/CCs8rZLCdVw/00043
|
463 |
+
id03347/nbmPriSE9NY/00316 id03347/nbmPriSE9NY/00316
|
464 |
+
id06104/snzG1OymFgs/00273 id06104/snzG1OymFgs/00273
|
465 |
+
id02019/xsXm-MSuD-E/00290 id02019/xsXm-MSuD-E/00290
|
466 |
+
id00061/VugwXDj1ka4/00088 id00061/VugwXDj1ka4/00088
|
467 |
+
id01224/4z68GFZuYKU/00028 id01224/4z68GFZuYKU/00028
|
468 |
+
id03839/ajkGXKUvTWY/00296 id03839/ajkGXKUvTWY/00296
|
469 |
+
id07874/N7fMpS_yaF4/00047 id07874/N7fMpS_yaF4/00047
|
470 |
+
id05124/fRhAX7v_R6A/00365 id05124/fRhAX7v_R6A/00365
|
471 |
+
id02181/ci_22Oqhwtc/00088 id02181/ci_22Oqhwtc/00088
|
472 |
+
id07414/njxmqS9ncTA/00399 id07414/njxmqS9ncTA/00399
|
473 |
+
id05176/yEMRxKA0vSw/00101 id05176/yEMRxKA0vSw/00101
|
474 |
+
id03862/VVaxYHNmtA8/00269 id03862/VVaxYHNmtA8/00269
|
475 |
+
id07396/X6KkvYh6rPA/00148 id07396/X6KkvYh6rPA/00148
|
476 |
+
id06310/TkxTnoic67U/00130 id06310/TkxTnoic67U/00130
|
477 |
+
id08374/Yh9O9ETuF_0/00250 id08374/Yh9O9ETuF_0/00250
|
478 |
+
id02317/5moKZXlJTEs/00058 id02317/5moKZXlJTEs/00058
|
479 |
+
id04536/EDCwhtRFARA/00172 id04536/EDCwhtRFARA/00172
|
480 |
+
id03789/pz1jGMsPY9M/00381 id03789/pz1jGMsPY9M/00381
|
481 |
+
id03127/wzS06bKAZ48/00354 id03127/wzS06bKAZ48/00354
|
482 |
+
id08911/wedpC4fN4YY/00096 id08911/wedpC4fN4YY/00096
|
483 |
+
id01106/6SFpvp42pMA/00014 id01106/6SFpvp42pMA/00014
|
484 |
+
id02465/6jp5YsZYtHI/00021 id02465/6jp5YsZYtHI/00021
|
485 |
+
id01618/Ay_BKx5-JOc/00046 id01618/Ay_BKx5-JOc/00046
|
486 |
+
id04478/x07vvSVm2Yo/00363 id04478/x07vvSVm2Yo/00363
|
487 |
+
id01593/u5AgUWl3fFU/00437 id01593/u5AgUWl3fFU/00437
|
488 |
+
id03030/IpwcoJajjJI/00124 id03030/IpwcoJajjJI/00124
|
489 |
+
id01593/t9TUbyp3xfs/00423 id01593/t9TUbyp3xfs/00423
|
490 |
+
id07414/hUxcsEMKssA/00320 id07414/hUxcsEMKssA/00320
|
491 |
+
id04366/L-56A5RNeWg/00124 id04366/L-56A5RNeWg/00124
|
492 |
+
id07961/3EPjXGhfst4/00001 id07961/3EPjXGhfst4/00001
|
493 |
+
id00061/mMOd25Ag7XY/00239 id00061/mMOd25Ag7XY/00239
|
494 |
+
id01567/RQMG0K5AchU/00218 id01567/RQMG0K5AchU/00218
|
495 |
+
id08552/PL5vk3XeKRM/00114 id08552/PL5vk3XeKRM/00114
|
496 |
+
id04862/eX3wAZ0yr7w/00260 id04862/eX3wAZ0yr7w/00260
|
497 |
+
id02086/CBNOvx4Phxw/00146 id02086/CBNOvx4Phxw/00146
|
498 |
+
id01228/3wAkCYQR3fQ/00011 id01228/3wAkCYQR3fQ/00011
|
499 |
+
id06484/MXwPpo1Dg7U/00073 id06484/MXwPpo1Dg7U/00073
|
500 |
+
id01460/9fJy9zGdESI/00045 id01460/9fJy9zGdESI/00045
|
dataset/filelists/voxceleb2_test_n_500_seed_797_cross.txt
ADDED
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
id05459/18XmQEiGLnQ/00001 id07961/3EPjXGhfst4/00001
|
2 |
+
id03980/7MRUusImkno/00001 id08696/0H1PxInJCK0/00001
|
3 |
+
id05654/07pANazoyJg/00001 id04570/0YMGn6BI9rg/00001
|
4 |
+
id00817/0GmSijZelGY/00001 id07354/0NjekFZqaY0/00001
|
5 |
+
id05202/2gnLcAbAoSc/00001 id00817/0GmSijZelGY/00001
|
6 |
+
id03041/5CfnYwQCW48/00001 id07354/0NjekFZqaY0/00001
|
7 |
+
id03980/7MRUusImkno/00001 id07621/0CiFdFegqZM/00001
|
8 |
+
id05850/B8kp8ed48JE/00001 id04253/1HOlzefgLu8/00001
|
9 |
+
id01298/2K5F6xG-Rbs/00001 id05816/1dyCBbJ94iw/00001
|
10 |
+
id07494/0P1wPmgz0Bk/00001 id07621/0CiFdFegqZM/00001
|
11 |
+
id06913/4Ug7aJemzpg/00001 id04030/7mXUMuo5_NE/00001
|
12 |
+
id02286/4LAIxvdvguc/00001 id05850/B8kp8ed48JE/00001
|
13 |
+
id02548/0pAkJZmlFqc/00001 id05459/18XmQEiGLnQ/00001
|
14 |
+
id08456/29EhSZDqzas/00001 id04295/1fSjOItVYVg/00001
|
15 |
+
id04295/1fSjOItVYVg/00001 id02685/4JDRxqYC0a4/00001
|
16 |
+
id04276/5M8NmCwTHZ0/00001 id05654/07pANazoyJg/00001
|
17 |
+
id03030/5wOxV1wAgqA/00001 id03041/5CfnYwQCW48/00001
|
18 |
+
id04656/1tZYt8jey54/00001 id07961/3EPjXGhfst4/00001
|
19 |
+
id03980/7MRUusImkno/00001 id04536/0f_Yi_1CoeM/00001
|
20 |
+
id03980/7MRUusImkno/00001 id05202/2gnLcAbAoSc/00001
|
21 |
+
id01298/2K5F6xG-Rbs/00001 id04862/0zJh2FMTaDE/00001
|
22 |
+
id02548/0pAkJZmlFqc/00001 id04478/2grMtwdG93I/00001
|
23 |
+
id02685/4JDRxqYC0a4/00001 id01892/3vKPgjwFjbo/00001
|
24 |
+
id07494/0P1wPmgz0Bk/00001 id04656/1tZYt8jey54/00001
|
25 |
+
id00812/1Xfgvdu7oDo/00001 id00926/2Nd7f1yNQzE/00001
|
26 |
+
id07426/1KNFfOFEhyI/00001 id03030/5wOxV1wAgqA/00001
|
27 |
+
id00866/03SSllwNkGk/00001 id00812/1Xfgvdu7oDo/00001
|
28 |
+
id04570/0YMGn6BI9rg/00001 id01892/3vKPgjwFjbo/00001
|
29 |
+
id03041/5CfnYwQCW48/00001 id04030/7mXUMuo5_NE/00001
|
30 |
+
id07494/0P1wPmgz0Bk/00001 id00081/2xYrsnvtUWc/00001
|
31 |
+
id08392/0fwuibKviJU/00001 id05015/0Cu3AvWWOFI/00001
|
32 |
+
id06692/2ptBBNIZXtI/00001 id04536/0f_Yi_1CoeM/00001
|
33 |
+
id04253/1HOlzefgLu8/00001 id06104/02L1L9RFAgI/00001
|
34 |
+
id02725/37kUrf6RJdw/00001 id02685/4JDRxqYC0a4/00001
|
35 |
+
id04006/113VkmVVz1Q/00001 id04119/1uH67UruKlE/00001
|
36 |
+
id01567/1Lx_ZqrK1bM/00001 id04030/7mXUMuo5_NE/00001
|
37 |
+
id02445/3Rnk8eja3TU/00001 id05816/1dyCBbJ94iw/00001
|
38 |
+
id03041/5CfnYwQCW48/00001 id03347/4xXZ75_TeSM/00001
|
39 |
+
id04570/0YMGn6BI9rg/00001 id02317/0q4X8kPTlEY/00001
|
40 |
+
id07426/1KNFfOFEhyI/00001 id01822/0QcHowaLAF0/00001
|
41 |
+
id02577/0euHS_r5JH4/00001 id02725/37kUrf6RJdw/00001
|
42 |
+
id07354/0NjekFZqaY0/00001 id05459/18XmQEiGLnQ/00001
|
43 |
+
id06692/2ptBBNIZXtI/00001 id05850/B8kp8ed48JE/00001
|
44 |
+
id01822/0QcHowaLAF0/00001 id07961/3EPjXGhfst4/00001
|
45 |
+
id04366/0iG2Ub9zETM/00001 id03347/4xXZ75_TeSM/00001
|
46 |
+
id03030/5wOxV1wAgqA/00001 id03789/0kdVSujPa9g/00001
|
47 |
+
id04366/0iG2Ub9zETM/00001 id02286/4LAIxvdvguc/00001
|
48 |
+
id00926/2Nd7f1yNQzE/00001 id02548/0pAkJZmlFqc/00001
|
49 |
+
id03030/5wOxV1wAgqA/00001 id01298/2K5F6xG-Rbs/00001
|
50 |
+
id01892/3vKPgjwFjbo/00001 id02317/0q4X8kPTlEY/00001
|
51 |
+
id05202/2gnLcAbAoSc/00001 id04253/1HOlzefgLu8/00001
|
52 |
+
id05714/2gvpaZcvAY4/00001 id06692/2ptBBNIZXtI/00001
|
53 |
+
id07621/0CiFdFegqZM/00001 id07802/0RUpqvi3sPU/00001
|
54 |
+
id03030/5wOxV1wAgqA/00001 id02317/0q4X8kPTlEY/00001
|
55 |
+
id01822/0QcHowaLAF0/00001 id02445/3Rnk8eja3TU/00001
|
56 |
+
id07961/3EPjXGhfst4/00001 id05459/18XmQEiGLnQ/00001
|
57 |
+
id04950/2n4sGPqU9M8/00001 id07426/1KNFfOFEhyI/00001
|
58 |
+
id04862/0zJh2FMTaDE/00001 id02465/0Ocu8l1eAng/00001
|
59 |
+
id06104/02L1L9RFAgI/00001 id07312/0LWllHGohPY/00001
|
60 |
+
id07414/110UMQovTR0/00001 id06692/2ptBBNIZXtI/00001
|
61 |
+
id05015/0Cu3AvWWOFI/00001 id08696/0H1PxInJCK0/00001
|
62 |
+
id02181/02gIO4WrZLY/00001 id00812/1Xfgvdu7oDo/00001
|
63 |
+
id08392/0fwuibKviJU/00001 id01041/1UYZqPpavtk/00001
|
64 |
+
id03347/4xXZ75_TeSM/00001 id04950/2n4sGPqU9M8/00001
|
65 |
+
id07312/0LWllHGohPY/00001 id04950/2n4sGPqU9M8/00001
|
66 |
+
id05202/2gnLcAbAoSc/00001 id05654/07pANazoyJg/00001
|
67 |
+
id01041/1UYZqPpavtk/00001 id02317/0q4X8kPTlEY/00001
|
68 |
+
id02057/0xZU7Oi9nvM/00001 id03178/2CT-6fnBC_o/00001
|
69 |
+
id04006/113VkmVVz1Q/00001 id00817/0GmSijZelGY/00001
|
70 |
+
id05850/B8kp8ed48JE/00001 id01892/3vKPgjwFjbo/00001
|
71 |
+
id08696/0H1PxInJCK0/00001 id06692/2ptBBNIZXtI/00001
|
72 |
+
id02057/0xZU7Oi9nvM/00001 id01541/2P7hzPq5iDw/00001
|
73 |
+
id04006/113VkmVVz1Q/00001 id02057/0xZU7Oi9nvM/00001
|
74 |
+
id04276/5M8NmCwTHZ0/00001 id04570/0YMGn6BI9rg/00001
|
75 |
+
id07868/5YYJq3fSbH8/00001 id03030/5wOxV1wAgqA/00001
|
76 |
+
id00812/1Xfgvdu7oDo/00001 id00154/0hjW3eTGAy8/00001
|
77 |
+
id06692/2ptBBNIZXtI/00001 id05594/0ohBiepcHWI/00001
|
78 |
+
id04536/0f_Yi_1CoeM/00001 id05202/2gnLcAbAoSc/00001
|
79 |
+
id06310/1IAgr_CRnuE/00001 id05816/1dyCBbJ94iw/00001
|
80 |
+
id01541/2P7hzPq5iDw/00001 id00419/1zffAxBod_c/00001
|
81 |
+
id07354/0NjekFZqaY0/00001 id00866/03SSllwNkGk/00001
|
82 |
+
id03347/4xXZ75_TeSM/00001 id02577/0euHS_r5JH4/00001
|
83 |
+
id04119/1uH67UruKlE/00001 id04006/113VkmVVz1Q/00001
|
84 |
+
id05714/2gvpaZcvAY4/00001 id07961/3EPjXGhfst4/00001
|
85 |
+
id06104/02L1L9RFAgI/00001 id03178/2CT-6fnBC_o/00001
|
86 |
+
id07354/0NjekFZqaY0/00001 id02445/3Rnk8eja3TU/00001
|
87 |
+
id04030/7mXUMuo5_NE/00001 id03030/5wOxV1wAgqA/00001
|
88 |
+
id07312/0LWllHGohPY/00001 id04536/0f_Yi_1CoeM/00001
|
89 |
+
id03839/1jWHvl2qCq0/00001 id07802/0RUpqvi3sPU/00001
|
90 |
+
id07621/0CiFdFegqZM/00001 id05816/1dyCBbJ94iw/00001
|
91 |
+
id03839/1jWHvl2qCq0/00001 id03980/7MRUusImkno/00001
|
92 |
+
id03030/5wOxV1wAgqA/00001 id02445/3Rnk8eja3TU/00001
|
93 |
+
id03862/0w8W8jp7MJk/00001 id04253/1HOlzefgLu8/00001
|
94 |
+
id05714/2gvpaZcvAY4/00001 id04119/1uH67UruKlE/00001
|
95 |
+
id08392/0fwuibKviJU/00001 id07868/5YYJq3fSbH8/00001
|
96 |
+
id01298/2K5F6xG-Rbs/00001 id03030/5wOxV1wAgqA/00001
|
97 |
+
id05459/18XmQEiGLnQ/00001 id00817/0GmSijZelGY/00001
|
98 |
+
id05850/B8kp8ed48JE/00001 id06692/2ptBBNIZXtI/00001
|
99 |
+
id04295/1fSjOItVYVg/00001 id08456/29EhSZDqzas/00001
|
100 |
+
id04570/0YMGn6BI9rg/00001 id02057/0xZU7Oi9nvM/00001
|
101 |
+
id01541/2P7hzPq5iDw/00001 id00817/0GmSijZelGY/00001
|
102 |
+
id07426/1KNFfOFEhyI/00001 id07354/0NjekFZqaY0/00001
|
103 |
+
id04253/1HOlzefgLu8/00001 id06209/2zM9EAPsZZQ/00001
|
104 |
+
id05850/B8kp8ed48JE/00001 id08392/0fwuibKviJU/00001
|
105 |
+
id07802/0RUpqvi3sPU/00001 id02465/0Ocu8l1eAng/00001
|
106 |
+
id04119/1uH67UruKlE/00001 id04862/0zJh2FMTaDE/00001
|
107 |
+
id01541/2P7hzPq5iDw/00001 id08696/0H1PxInJCK0/00001
|
108 |
+
id08696/0H1PxInJCK0/00001 id07802/0RUpqvi3sPU/00001
|
109 |
+
id01228/2TIFacjgehY/00001 id07621/0CiFdFegqZM/00001
|
110 |
+
id03178/2CT-6fnBC_o/00001 id07868/5YYJq3fSbH8/00001
|
111 |
+
id05654/07pANazoyJg/00001 id01298/2K5F6xG-Rbs/00001
|
112 |
+
id01822/0QcHowaLAF0/00001 id02548/0pAkJZmlFqc/00001
|
113 |
+
id01618/0iFlmfmWVlY/00001 id08696/0H1PxInJCK0/00001
|
114 |
+
id00812/1Xfgvdu7oDo/00001 id08456/29EhSZDqzas/00001
|
115 |
+
id05594/0ohBiepcHWI/00001 id07312/0LWllHGohPY/00001
|
116 |
+
id05714/2gvpaZcvAY4/00001 id06104/02L1L9RFAgI/00001
|
117 |
+
id02445/3Rnk8eja3TU/00001 id07426/1KNFfOFEhyI/00001
|
118 |
+
id05714/2gvpaZcvAY4/00001 id00817/0GmSijZelGY/00001
|
119 |
+
id08696/0H1PxInJCK0/00001 id02317/0q4X8kPTlEY/00001
|
120 |
+
id04950/2n4sGPqU9M8/00001 id04478/2grMtwdG93I/00001
|
121 |
+
id01228/2TIFacjgehY/00001 id07414/110UMQovTR0/00001
|
122 |
+
id00926/2Nd7f1yNQzE/00001 id01541/2P7hzPq5iDw/00001
|
123 |
+
id05714/2gvpaZcvAY4/00001 id06913/4Ug7aJemzpg/00001
|
124 |
+
id01228/2TIFacjgehY/00001 id03862/0w8W8jp7MJk/00001
|
125 |
+
id03030/5wOxV1wAgqA/00001 id05015/0Cu3AvWWOFI/00001
|
126 |
+
id02548/0pAkJZmlFqc/00001 id06692/2ptBBNIZXtI/00001
|
127 |
+
id05202/2gnLcAbAoSc/00001 id04119/1uH67UruKlE/00001
|
128 |
+
id04656/1tZYt8jey54/00001 id07426/1KNFfOFEhyI/00001
|
129 |
+
id07312/0LWllHGohPY/00001 id03980/7MRUusImkno/00001
|
130 |
+
id04366/0iG2Ub9zETM/00001 id00817/0GmSijZelGY/00001
|
131 |
+
id07961/3EPjXGhfst4/00001 id01228/2TIFacjgehY/00001
|
132 |
+
id00154/0hjW3eTGAy8/00001 id04295/1fSjOItVYVg/00001
|
133 |
+
id04478/2grMtwdG93I/00001 id00154/0hjW3eTGAy8/00001
|
134 |
+
id04570/0YMGn6BI9rg/00001 id05202/2gnLcAbAoSc/00001
|
135 |
+
id04478/2grMtwdG93I/00001 id06913/4Ug7aJemzpg/00001
|
136 |
+
id06104/02L1L9RFAgI/00001 id04295/1fSjOItVYVg/00001
|
137 |
+
id05816/1dyCBbJ94iw/00001 id08392/0fwuibKviJU/00001
|
138 |
+
id00926/2Nd7f1yNQzE/00001 id04536/0f_Yi_1CoeM/00001
|
139 |
+
id00926/2Nd7f1yNQzE/00001 id02181/02gIO4WrZLY/00001
|
140 |
+
id05459/18XmQEiGLnQ/00001 id02317/0q4X8kPTlEY/00001
|
141 |
+
id05594/0ohBiepcHWI/00001 id01228/2TIFacjgehY/00001
|
142 |
+
id02181/02gIO4WrZLY/00001 id07312/0LWllHGohPY/00001
|
143 |
+
id00154/0hjW3eTGAy8/00001 id03839/1jWHvl2qCq0/00001
|
144 |
+
id04030/7mXUMuo5_NE/00001 id02725/37kUrf6RJdw/00001
|
145 |
+
id04295/1fSjOItVYVg/00001 id05714/2gvpaZcvAY4/00001
|
146 |
+
id02548/0pAkJZmlFqc/00001 id04570/0YMGn6BI9rg/00001
|
147 |
+
id04478/2grMtwdG93I/00001 id00866/03SSllwNkGk/00001
|
148 |
+
id03030/5wOxV1wAgqA/00001 id04366/0iG2Ub9zETM/00001
|
149 |
+
id02685/4JDRxqYC0a4/00001 id07426/1KNFfOFEhyI/00001
|
150 |
+
id07802/0RUpqvi3sPU/00001 id07312/0LWllHGohPY/00001
|
151 |
+
id02317/0q4X8kPTlEY/00001 id01892/3vKPgjwFjbo/00001
|
152 |
+
id00154/0hjW3eTGAy8/00001 id00866/03SSllwNkGk/00001
|
153 |
+
id02181/02gIO4WrZLY/00001 id02685/4JDRxqYC0a4/00001
|
154 |
+
id03178/2CT-6fnBC_o/00001 id05459/18XmQEiGLnQ/00001
|
155 |
+
id00926/2Nd7f1yNQzE/00001 id05202/2gnLcAbAoSc/00001
|
156 |
+
id03041/5CfnYwQCW48/00001 id03178/2CT-6fnBC_o/00001
|
157 |
+
id05850/B8kp8ed48JE/00001 id04006/113VkmVVz1Q/00001
|
158 |
+
id01822/0QcHowaLAF0/00001 id04570/0YMGn6BI9rg/00001
|
159 |
+
id04478/2grMtwdG93I/00001 id03839/1jWHvl2qCq0/00001
|
160 |
+
id01298/2K5F6xG-Rbs/00001 id01228/2TIFacjgehY/00001
|
161 |
+
id06310/1IAgr_CRnuE/00001 id04006/113VkmVVz1Q/00001
|
162 |
+
id00154/0hjW3eTGAy8/00001 id04006/113VkmVVz1Q/00001
|
163 |
+
id05816/1dyCBbJ94iw/00001 id01041/1UYZqPpavtk/00001
|
164 |
+
id04570/0YMGn6BI9rg/00001 id04862/0zJh2FMTaDE/00001
|
165 |
+
id06913/4Ug7aJemzpg/00001 id04862/0zJh2FMTaDE/00001
|
166 |
+
id03862/0w8W8jp7MJk/00001 id02465/0Ocu8l1eAng/00001
|
167 |
+
id04253/1HOlzefgLu8/00001 id01567/1Lx_ZqrK1bM/00001
|
168 |
+
id06209/2zM9EAPsZZQ/00001 id01298/2K5F6xG-Rbs/00001
|
169 |
+
id01822/0QcHowaLAF0/00001 id01541/2P7hzPq5iDw/00001
|
170 |
+
id07312/0LWllHGohPY/00001 id02317/0q4X8kPTlEY/00001
|
171 |
+
id06692/2ptBBNIZXtI/00001 id02445/3Rnk8eja3TU/00001
|
172 |
+
id07414/110UMQovTR0/00001 id00154/0hjW3eTGAy8/00001
|
173 |
+
id04478/2grMtwdG93I/00001 id03347/4xXZ75_TeSM/00001
|
174 |
+
id04656/1tZYt8jey54/00001 id07802/0RUpqvi3sPU/00001
|
175 |
+
id03839/1jWHvl2qCq0/00001 id06310/1IAgr_CRnuE/00001
|
176 |
+
id02057/0xZU7Oi9nvM/00001 id01228/2TIFacjgehY/00001
|
177 |
+
id00081/2xYrsnvtUWc/00001 id02057/0xZU7Oi9nvM/00001
|
178 |
+
id03862/0w8W8jp7MJk/00001 id01892/3vKPgjwFjbo/00001
|
179 |
+
id04570/0YMGn6BI9rg/00001 id06913/4Ug7aJemzpg/00001
|
180 |
+
id08392/0fwuibKviJU/00001 id01567/1Lx_ZqrK1bM/00001
|
181 |
+
id00081/2xYrsnvtUWc/00001 id07494/0P1wPmgz0Bk/00001
|
182 |
+
id04536/0f_Yi_1CoeM/00001 id00081/2xYrsnvtUWc/00001
|
183 |
+
id03839/1jWHvl2qCq0/00001 id05850/B8kp8ed48JE/00001
|
184 |
+
id07621/0CiFdFegqZM/00001 id08456/29EhSZDqzas/00001
|
185 |
+
id01822/0QcHowaLAF0/00001 id07868/5YYJq3fSbH8/00001
|
186 |
+
id05202/2gnLcAbAoSc/00001 id03178/2CT-6fnBC_o/00001
|
187 |
+
id06692/2ptBBNIZXtI/00001 id06913/4Ug7aJemzpg/00001
|
188 |
+
id01041/1UYZqPpavtk/00001 id03030/5wOxV1wAgqA/00001
|
189 |
+
id07426/1KNFfOFEhyI/00001 id08456/29EhSZDqzas/00001
|
190 |
+
id04478/2grMtwdG93I/00001 id02548/0pAkJZmlFqc/00001
|
191 |
+
id08392/0fwuibKviJU/00001 id01298/2K5F6xG-Rbs/00001
|
192 |
+
id03041/5CfnYwQCW48/00001 id08696/0H1PxInJCK0/00001
|
193 |
+
id04366/0iG2Ub9zETM/00001 id07426/1KNFfOFEhyI/00001
|
194 |
+
id04950/2n4sGPqU9M8/00001 id07494/0P1wPmgz0Bk/00001
|
195 |
+
id01822/0QcHowaLAF0/00001 id08392/0fwuibKviJU/00001
|
196 |
+
id02577/0euHS_r5JH4/00001 id06692/2ptBBNIZXtI/00001
|
197 |
+
id04570/0YMGn6BI9rg/00001 id00866/03SSllwNkGk/00001
|
198 |
+
id05850/B8kp8ed48JE/00001 id08456/29EhSZDqzas/00001
|
199 |
+
id01618/0iFlmfmWVlY/00001 id01041/1UYZqPpavtk/00001
|
200 |
+
id07414/110UMQovTR0/00001 id04536/0f_Yi_1CoeM/00001
|
201 |
+
id02057/0xZU7Oi9nvM/00001 id06913/4Ug7aJemzpg/00001
|
202 |
+
id04536/0f_Yi_1CoeM/00001 id01041/1UYZqPpavtk/00001
|
203 |
+
id04030/7mXUMuo5_NE/00001 id05850/B8kp8ed48JE/00001
|
204 |
+
id04656/1tZYt8jey54/00001 id05459/18XmQEiGLnQ/00001
|
205 |
+
id03789/0kdVSujPa9g/00001 id02057/0xZU7Oi9nvM/00001
|
206 |
+
id01041/1UYZqPpavtk/00001 id05594/0ohBiepcHWI/00001
|
207 |
+
id07494/0P1wPmgz0Bk/00001 id04006/113VkmVVz1Q/00001
|
208 |
+
id00812/1Xfgvdu7oDo/00001 id04295/1fSjOItVYVg/00001
|
209 |
+
id01541/2P7hzPq5iDw/00001 id02465/0Ocu8l1eAng/00001
|
210 |
+
id04862/0zJh2FMTaDE/00001 id05594/0ohBiepcHWI/00001
|
211 |
+
id05714/2gvpaZcvAY4/00001 id02286/4LAIxvdvguc/00001
|
212 |
+
id06209/2zM9EAPsZZQ/00001 id05816/1dyCBbJ94iw/00001
|
213 |
+
id05850/B8kp8ed48JE/00001 id00866/03SSllwNkGk/00001
|
214 |
+
id07494/0P1wPmgz0Bk/00001 id07312/0LWllHGohPY/00001
|
215 |
+
id04366/0iG2Ub9zETM/00001 id04570/0YMGn6BI9rg/00001
|
216 |
+
id00866/03SSllwNkGk/00001 id03347/4xXZ75_TeSM/00001
|
217 |
+
id02445/3Rnk8eja3TU/00001 id07802/0RUpqvi3sPU/00001
|
218 |
+
id08696/0H1PxInJCK0/00001 id06209/2zM9EAPsZZQ/00001
|
219 |
+
id02445/3Rnk8eja3TU/00001 id07621/0CiFdFegqZM/00001
|
220 |
+
id08392/0fwuibKviJU/00001 id05850/B8kp8ed48JE/00001
|
221 |
+
id00419/1zffAxBod_c/00001 id01228/2TIFacjgehY/00001
|
222 |
+
id07354/0NjekFZqaY0/00001 id01041/1UYZqPpavtk/00001
|
223 |
+
id04570/0YMGn6BI9rg/00001 id03347/4xXZ75_TeSM/00001
|
224 |
+
id01892/3vKPgjwFjbo/00001 id02445/3Rnk8eja3TU/00001
|
225 |
+
id00081/2xYrsnvtUWc/00001 id05459/18XmQEiGLnQ/00001
|
226 |
+
id06104/02L1L9RFAgI/00001 id04570/0YMGn6BI9rg/00001
|
227 |
+
id07961/3EPjXGhfst4/00001 id05654/07pANazoyJg/00001
|
228 |
+
id00926/2Nd7f1yNQzE/00001 id03839/1jWHvl2qCq0/00001
|
229 |
+
id02181/02gIO4WrZLY/00001 id08696/0H1PxInJCK0/00001
|
230 |
+
id07426/1KNFfOFEhyI/00001 id05459/18XmQEiGLnQ/00001
|
231 |
+
id03041/5CfnYwQCW48/00001 id06104/02L1L9RFAgI/00001
|
232 |
+
id01298/2K5F6xG-Rbs/00001 id01541/2P7hzPq5iDw/00001
|
233 |
+
id04570/0YMGn6BI9rg/00001 id01618/0iFlmfmWVlY/00001
|
234 |
+
id02685/4JDRxqYC0a4/00001 id02548/0pAkJZmlFqc/00001
|
235 |
+
id01822/0QcHowaLAF0/00001 id07426/1KNFfOFEhyI/00001
|
236 |
+
id07868/5YYJq3fSbH8/00001 id07494/0P1wPmgz0Bk/00001
|
237 |
+
id07802/0RUpqvi3sPU/00001 id03041/5CfnYwQCW48/00001
|
238 |
+
id04656/1tZYt8jey54/00001 id01541/2P7hzPq5iDw/00001
|
239 |
+
id03347/4xXZ75_TeSM/00001 id02445/3Rnk8eja3TU/00001
|
240 |
+
id02548/0pAkJZmlFqc/00001 id01298/2K5F6xG-Rbs/00001
|
241 |
+
id07354/0NjekFZqaY0/00001 id07426/1KNFfOFEhyI/00001
|
242 |
+
id03862/0w8W8jp7MJk/00001 id01298/2K5F6xG-Rbs/00001
|
243 |
+
id04536/0f_Yi_1CoeM/00001 id02465/0Ocu8l1eAng/00001
|
244 |
+
id00081/2xYrsnvtUWc/00001 id04366/0iG2Ub9zETM/00001
|
245 |
+
id04950/2n4sGPqU9M8/00001 id01822/0QcHowaLAF0/00001
|
246 |
+
id06692/2ptBBNIZXtI/00001 id03030/5wOxV1wAgqA/00001
|
247 |
+
id07312/0LWllHGohPY/00001 id04478/2grMtwdG93I/00001
|
248 |
+
id03862/0w8W8jp7MJk/00001 id03030/5wOxV1wAgqA/00001
|
249 |
+
id00081/2xYrsnvtUWc/00001 id08392/0fwuibKviJU/00001
|
250 |
+
id02317/0q4X8kPTlEY/00001 id00154/0hjW3eTGAy8/00001
|
251 |
+
id05594/0ohBiepcHWI/00001 id04536/0f_Yi_1CoeM/00001
|
252 |
+
id07868/5YYJq3fSbH8/00001 id03839/1jWHvl2qCq0/00001
|
253 |
+
id02577/0euHS_r5JH4/00001 id06913/4Ug7aJemzpg/00001
|
254 |
+
id08456/29EhSZDqzas/00001 id01541/2P7hzPq5iDw/00001
|
255 |
+
id01567/1Lx_ZqrK1bM/00001 id04119/1uH67UruKlE/00001
|
256 |
+
id04253/1HOlzefgLu8/00001 id01228/2TIFacjgehY/00001
|
257 |
+
id02445/3Rnk8eja3TU/00001 id02685/4JDRxqYC0a4/00001
|
258 |
+
id05015/0Cu3AvWWOFI/00001 id02465/0Ocu8l1eAng/00001
|
259 |
+
id07494/0P1wPmgz0Bk/00001 id05714/2gvpaZcvAY4/00001
|
260 |
+
id02548/0pAkJZmlFqc/00001 id04006/113VkmVVz1Q/00001
|
261 |
+
id00866/03SSllwNkGk/00001 id02317/0q4X8kPTlEY/00001
|
262 |
+
id07354/0NjekFZqaY0/00001 id04253/1HOlzefgLu8/00001
|
263 |
+
id00812/1Xfgvdu7oDo/00001 id03030/5wOxV1wAgqA/00001
|
264 |
+
id02465/0Ocu8l1eAng/00001 id07354/0NjekFZqaY0/00001
|
265 |
+
id04276/5M8NmCwTHZ0/00001 id03862/0w8W8jp7MJk/00001
|
266 |
+
id01567/1Lx_ZqrK1bM/00001 id04253/1HOlzefgLu8/00001
|
267 |
+
id01618/0iFlmfmWVlY/00001 id06913/4Ug7aJemzpg/00001
|
268 |
+
id03862/0w8W8jp7MJk/00001 id08392/0fwuibKviJU/00001
|
269 |
+
id07961/3EPjXGhfst4/00001 id00154/0hjW3eTGAy8/00001
|
270 |
+
id02577/0euHS_r5JH4/00001 id01228/2TIFacjgehY/00001
|
271 |
+
id05654/07pANazoyJg/00001 id03041/5CfnYwQCW48/00001
|
272 |
+
id03980/7MRUusImkno/00001 id08392/0fwuibKviJU/00001
|
273 |
+
id03178/2CT-6fnBC_o/00001 id04295/1fSjOItVYVg/00001
|
274 |
+
id02317/0q4X8kPTlEY/00001 id03347/4xXZ75_TeSM/00001
|
275 |
+
id02548/0pAkJZmlFqc/00001 id07426/1KNFfOFEhyI/00001
|
276 |
+
id03839/1jWHvl2qCq0/00001 id05654/07pANazoyJg/00001
|
277 |
+
id02548/0pAkJZmlFqc/00001 id07868/5YYJq3fSbH8/00001
|
278 |
+
id04570/0YMGn6BI9rg/00001 id01041/1UYZqPpavtk/00001
|
279 |
+
id07414/110UMQovTR0/00001 id00419/1zffAxBod_c/00001
|
280 |
+
id00154/0hjW3eTGAy8/00001 id01618/0iFlmfmWVlY/00001
|
281 |
+
id07494/0P1wPmgz0Bk/00001 id05654/07pANazoyJg/00001
|
282 |
+
id01822/0QcHowaLAF0/00001 id06310/1IAgr_CRnuE/00001
|
283 |
+
id05015/0Cu3AvWWOFI/00001 id05459/18XmQEiGLnQ/00001
|
284 |
+
id05816/1dyCBbJ94iw/00001 id02317/0q4X8kPTlEY/00001
|
285 |
+
id01541/2P7hzPq5iDw/00001 id05816/1dyCBbJ94iw/00001
|
286 |
+
id06104/02L1L9RFAgI/00001 id01892/3vKPgjwFjbo/00001
|
287 |
+
id04862/0zJh2FMTaDE/00001 id05850/B8kp8ed48JE/00001
|
288 |
+
id05202/2gnLcAbAoSc/00001 id04366/0iG2Ub9zETM/00001
|
289 |
+
id02286/4LAIxvdvguc/00001 id02725/37kUrf6RJdw/00001
|
290 |
+
id04276/5M8NmCwTHZ0/00001 id01541/2P7hzPq5iDw/00001
|
291 |
+
id02057/0xZU7Oi9nvM/00001 id03862/0w8W8jp7MJk/00001
|
292 |
+
id06104/02L1L9RFAgI/00001 id00419/1zffAxBod_c/00001
|
293 |
+
id04950/2n4sGPqU9M8/00001 id02181/02gIO4WrZLY/00001
|
294 |
+
id04478/2grMtwdG93I/00001 id02685/4JDRxqYC0a4/00001
|
295 |
+
id04006/113VkmVVz1Q/00001 id00081/2xYrsnvtUWc/00001
|
296 |
+
id06692/2ptBBNIZXtI/00001 id03347/4xXZ75_TeSM/00001
|
297 |
+
id03030/5wOxV1wAgqA/00001 id02465/0Ocu8l1eAng/00001
|
298 |
+
id07312/0LWllHGohPY/00001 id03839/1jWHvl2qCq0/00001
|
299 |
+
id04950/2n4sGPqU9M8/00001 id05654/07pANazoyJg/00001
|
300 |
+
id02465/0Ocu8l1eAng/00001 id01618/0iFlmfmWVlY/00001
|
301 |
+
id00419/1zffAxBod_c/00001 id02181/02gIO4WrZLY/00001
|
302 |
+
id07426/1KNFfOFEhyI/00001 id05202/2gnLcAbAoSc/00001
|
303 |
+
id07621/0CiFdFegqZM/00001 id08696/0H1PxInJCK0/00001
|
304 |
+
id04006/113VkmVVz1Q/00001 id08392/0fwuibKviJU/00001
|
305 |
+
id04478/2grMtwdG93I/00001 id02445/3Rnk8eja3TU/00001
|
306 |
+
id03347/4xXZ75_TeSM/00001 id00154/0hjW3eTGAy8/00001
|
307 |
+
id07312/0LWllHGohPY/00001 id02181/02gIO4WrZLY/00001
|
308 |
+
id06310/1IAgr_CRnuE/00001 id02057/0xZU7Oi9nvM/00001
|
309 |
+
id04366/0iG2Ub9zETM/00001 id05654/07pANazoyJg/00001
|
310 |
+
id00419/1zffAxBod_c/00001 id04570/0YMGn6BI9rg/00001
|
311 |
+
id04862/0zJh2FMTaDE/00001 id03862/0w8W8jp7MJk/00001
|
312 |
+
id04366/0iG2Ub9zETM/00001 id00154/0hjW3eTGAy8/00001
|
313 |
+
id00866/03SSllwNkGk/00001 id00081/2xYrsnvtUWc/00001
|
314 |
+
id01618/0iFlmfmWVlY/00001 id02725/37kUrf6RJdw/00001
|
315 |
+
id01892/3vKPgjwFjbo/00001 id07621/0CiFdFegqZM/00001
|
316 |
+
id05015/0Cu3AvWWOFI/00001 id00926/2Nd7f1yNQzE/00001
|
317 |
+
id06913/4Ug7aJemzpg/00001 id03839/1jWHvl2qCq0/00001
|
318 |
+
id07312/0LWllHGohPY/00001 id07802/0RUpqvi3sPU/00001
|
319 |
+
id06104/02L1L9RFAgI/00001 id02465/0Ocu8l1eAng/00001
|
320 |
+
id04295/1fSjOItVYVg/00001 id01298/2K5F6xG-Rbs/00001
|
321 |
+
id00866/03SSllwNkGk/00001 id05714/2gvpaZcvAY4/00001
|
322 |
+
id06104/02L1L9RFAgI/00001 id01541/2P7hzPq5iDw/00001
|
323 |
+
id02445/3Rnk8eja3TU/00001 id03789/0kdVSujPa9g/00001
|
324 |
+
id00081/2xYrsnvtUWc/00001 id05816/1dyCBbJ94iw/00001
|
325 |
+
id02548/0pAkJZmlFqc/00001 id03030/5wOxV1wAgqA/00001
|
326 |
+
id04276/5M8NmCwTHZ0/00001 id01041/1UYZqPpavtk/00001
|
327 |
+
id06913/4Ug7aJemzpg/00001 id07868/5YYJq3fSbH8/00001
|
328 |
+
id04656/1tZYt8jey54/00001 id06692/2ptBBNIZXtI/00001
|
329 |
+
id07494/0P1wPmgz0Bk/00001 id08696/0H1PxInJCK0/00001
|
330 |
+
id04119/1uH67UruKlE/00001 id02317/0q4X8kPTlEY/00001
|
331 |
+
id00419/1zffAxBod_c/00001 id04862/0zJh2FMTaDE/00001
|
332 |
+
id03862/0w8W8jp7MJk/00001 id02445/3Rnk8eja3TU/00001
|
333 |
+
id01892/3vKPgjwFjbo/00001 id04862/0zJh2FMTaDE/00001
|
334 |
+
id04950/2n4sGPqU9M8/00001 id01618/0iFlmfmWVlY/00001
|
335 |
+
id01228/2TIFacjgehY/00001 id01298/2K5F6xG-Rbs/00001
|
336 |
+
id01041/1UYZqPpavtk/00001 id07961/3EPjXGhfst4/00001
|
337 |
+
id07802/0RUpqvi3sPU/00001 id06913/4Ug7aJemzpg/00001
|
338 |
+
id04276/5M8NmCwTHZ0/00001 id03030/5wOxV1wAgqA/00001
|
339 |
+
id01567/1Lx_ZqrK1bM/00001 id05459/18XmQEiGLnQ/00001
|
340 |
+
id02465/0Ocu8l1eAng/00001 id02725/37kUrf6RJdw/00001
|
341 |
+
id05816/1dyCBbJ94iw/00001 id02181/02gIO4WrZLY/00001
|
342 |
+
id06913/4Ug7aJemzpg/00001 id04950/2n4sGPqU9M8/00001
|
343 |
+
id04276/5M8NmCwTHZ0/00001 id04253/1HOlzefgLu8/00001
|
344 |
+
id07414/110UMQovTR0/00001 id06209/2zM9EAPsZZQ/00001
|
345 |
+
id06310/1IAgr_CRnuE/00001 id03839/1jWHvl2qCq0/00001
|
346 |
+
id03347/4xXZ75_TeSM/00001 id04006/113VkmVVz1Q/00001
|
347 |
+
id01541/2P7hzPq5iDw/00001 id04253/1HOlzefgLu8/00001
|
348 |
+
id08456/29EhSZDqzas/00001 id07494/0P1wPmgz0Bk/00001
|
349 |
+
id07621/0CiFdFegqZM/00001 id05594/0ohBiepcHWI/00001
|
350 |
+
id02685/4JDRxqYC0a4/00001 id04536/0f_Yi_1CoeM/00001
|
351 |
+
id02317/0q4X8kPTlEY/00001 id08696/0H1PxInJCK0/00001
|
352 |
+
id04253/1HOlzefgLu8/00001 id01041/1UYZqPpavtk/00001
|
353 |
+
id01041/1UYZqPpavtk/00001 id03178/2CT-6fnBC_o/00001
|
354 |
+
id05654/07pANazoyJg/00001 id01892/3vKPgjwFjbo/00001
|
355 |
+
id04862/0zJh2FMTaDE/00001 id06310/1IAgr_CRnuE/00001
|
356 |
+
id01541/2P7hzPq5iDw/00001 id04478/2grMtwdG93I/00001
|
357 |
+
id02445/3Rnk8eja3TU/00001 id02057/0xZU7Oi9nvM/00001
|
358 |
+
id08392/0fwuibKviJU/00001 id04570/0YMGn6BI9rg/00001
|
359 |
+
id06692/2ptBBNIZXtI/00001 id02057/0xZU7Oi9nvM/00001
|
360 |
+
id04950/2n4sGPqU9M8/00001 id04862/0zJh2FMTaDE/00001
|
361 |
+
id03862/0w8W8jp7MJk/00001 id07621/0CiFdFegqZM/00001
|
362 |
+
id07312/0LWllHGohPY/00001 id04656/1tZYt8jey54/00001
|
363 |
+
id02577/0euHS_r5JH4/00001 id00866/03SSllwNkGk/00001
|
364 |
+
id01228/2TIFacjgehY/00001 id02685/4JDRxqYC0a4/00001
|
365 |
+
id00081/2xYrsnvtUWc/00001 id00419/1zffAxBod_c/00001
|
366 |
+
id00154/0hjW3eTGAy8/00001 id04656/1tZYt8jey54/00001
|
367 |
+
id03839/1jWHvl2qCq0/00001 id01618/0iFlmfmWVlY/00001
|
368 |
+
id03862/0w8W8jp7MJk/00001 id02286/4LAIxvdvguc/00001
|
369 |
+
id06310/1IAgr_CRnuE/00001 id08456/29EhSZDqzas/00001
|
370 |
+
id02317/0q4X8kPTlEY/00001 id04276/5M8NmCwTHZ0/00001
|
371 |
+
id06913/4Ug7aJemzpg/00001 id04366/0iG2Ub9zETM/00001
|
372 |
+
id06310/1IAgr_CRnuE/00001 id00926/2Nd7f1yNQzE/00001
|
373 |
+
id01228/2TIFacjgehY/00001 id02181/02gIO4WrZLY/00001
|
374 |
+
id07414/110UMQovTR0/00001 id05594/0ohBiepcHWI/00001
|
375 |
+
id03980/7MRUusImkno/00001 id03178/2CT-6fnBC_o/00001
|
376 |
+
id03347/4xXZ75_TeSM/00001 id04478/2grMtwdG93I/00001
|
377 |
+
id06692/2ptBBNIZXtI/00001 id05459/18XmQEiGLnQ/00001
|
378 |
+
id00154/0hjW3eTGAy8/00001 id02725/37kUrf6RJdw/00001
|
379 |
+
id01228/2TIFacjgehY/00001 id04006/113VkmVVz1Q/00001
|
380 |
+
id00866/03SSllwNkGk/00001 id00926/2Nd7f1yNQzE/00001
|
381 |
+
id05594/0ohBiepcHWI/00001 id04006/113VkmVVz1Q/00001
|
382 |
+
id04656/1tZYt8jey54/00001 id01822/0QcHowaLAF0/00001
|
383 |
+
id07354/0NjekFZqaY0/00001 id04536/0f_Yi_1CoeM/00001
|
384 |
+
id07354/0NjekFZqaY0/00001 id04656/1tZYt8jey54/00001
|
385 |
+
id04366/0iG2Ub9zETM/00001 id02057/0xZU7Oi9nvM/00001
|
386 |
+
id03789/0kdVSujPa9g/00001 id01822/0QcHowaLAF0/00001
|
387 |
+
id07621/0CiFdFegqZM/00001 id03347/4xXZ75_TeSM/00001
|
388 |
+
id04030/7mXUMuo5_NE/00001 id04366/0iG2Ub9zETM/00001
|
389 |
+
id00812/1Xfgvdu7oDo/00001 id07354/0NjekFZqaY0/00001
|
390 |
+
id04536/0f_Yi_1CoeM/00001 id07494/0P1wPmgz0Bk/00001
|
391 |
+
id04536/0f_Yi_1CoeM/00001 id05816/1dyCBbJ94iw/00001
|
392 |
+
id03862/0w8W8jp7MJk/00001 id07868/5YYJq3fSbH8/00001
|
393 |
+
id02685/4JDRxqYC0a4/00001 id05459/18XmQEiGLnQ/00001
|
394 |
+
id06209/2zM9EAPsZZQ/00001 id07426/1KNFfOFEhyI/00001
|
395 |
+
id07426/1KNFfOFEhyI/00001 id02317/0q4X8kPTlEY/00001
|
396 |
+
id00926/2Nd7f1yNQzE/00001 id05594/0ohBiepcHWI/00001
|
397 |
+
id00154/0hjW3eTGAy8/00001 id04950/2n4sGPqU9M8/00001
|
398 |
+
id03041/5CfnYwQCW48/00001 id01892/3vKPgjwFjbo/00001
|
399 |
+
id00419/1zffAxBod_c/00001 id00866/03SSllwNkGk/00001
|
400 |
+
id02725/37kUrf6RJdw/00001 id05202/2gnLcAbAoSc/00001
|
401 |
+
id04656/1tZYt8jey54/00001 id06913/4Ug7aJemzpg/00001
|
402 |
+
id03862/0w8W8jp7MJk/00001 id04006/113VkmVVz1Q/00001
|
403 |
+
id00419/1zffAxBod_c/00001 id04030/7mXUMuo5_NE/00001
|
404 |
+
id06692/2ptBBNIZXtI/00001 id01541/2P7hzPq5iDw/00001
|
405 |
+
id07354/0NjekFZqaY0/00001 id03041/5CfnYwQCW48/00001
|
406 |
+
id03347/4xXZ75_TeSM/00001 id07802/0RUpqvi3sPU/00001
|
407 |
+
id07354/0NjekFZqaY0/00001 id01298/2K5F6xG-Rbs/00001
|
408 |
+
id02725/37kUrf6RJdw/00001 id03980/7MRUusImkno/00001
|
409 |
+
id01618/0iFlmfmWVlY/00001 id02445/3Rnk8eja3TU/00001
|
410 |
+
id05816/1dyCBbJ94iw/00001 id00081/2xYrsnvtUWc/00001
|
411 |
+
id07354/0NjekFZqaY0/00001 id04478/2grMtwdG93I/00001
|
412 |
+
id03980/7MRUusImkno/00001 id04295/1fSjOItVYVg/00001
|
413 |
+
id02548/0pAkJZmlFqc/00001 id00081/2xYrsnvtUWc/00001
|
414 |
+
id05459/18XmQEiGLnQ/00001 id03347/4xXZ75_TeSM/00001
|
415 |
+
id04570/0YMGn6BI9rg/00001 id04006/113VkmVVz1Q/00001
|
416 |
+
id06209/2zM9EAPsZZQ/00001 id01041/1UYZqPpavtk/00001
|
417 |
+
id01228/2TIFacjgehY/00001 id02317/0q4X8kPTlEY/00001
|
418 |
+
id07802/0RUpqvi3sPU/00001 id01541/2P7hzPq5iDw/00001
|
419 |
+
id04862/0zJh2FMTaDE/00001 id01892/3vKPgjwFjbo/00001
|
420 |
+
id04253/1HOlzefgLu8/00001 id07802/0RUpqvi3sPU/00001
|
421 |
+
id06692/2ptBBNIZXtI/00001 id02286/4LAIxvdvguc/00001
|
422 |
+
id01228/2TIFacjgehY/00001 id07961/3EPjXGhfst4/00001
|
423 |
+
id05714/2gvpaZcvAY4/00001 id00812/1Xfgvdu7oDo/00001
|
424 |
+
id03789/0kdVSujPa9g/00001 id03862/0w8W8jp7MJk/00001
|
425 |
+
id04295/1fSjOItVYVg/00001 id07868/5YYJq3fSbH8/00001
|
426 |
+
id04276/5M8NmCwTHZ0/00001 id02057/0xZU7Oi9nvM/00001
|
427 |
+
id02286/4LAIxvdvguc/00001 id03862/0w8W8jp7MJk/00001
|
428 |
+
id04478/2grMtwdG93I/00001 id05816/1dyCBbJ94iw/00001
|
429 |
+
id08456/29EhSZDqzas/00001 id02725/37kUrf6RJdw/00001
|
430 |
+
id02577/0euHS_r5JH4/00001 id07961/3EPjXGhfst4/00001
|
431 |
+
id01618/0iFlmfmWVlY/00001 id00812/1Xfgvdu7oDo/00001
|
432 |
+
id07312/0LWllHGohPY/00001 id03789/0kdVSujPa9g/00001
|
433 |
+
id02685/4JDRxqYC0a4/00001 id03839/1jWHvl2qCq0/00001
|
434 |
+
id04030/7mXUMuo5_NE/00001 id07802/0RUpqvi3sPU/00001
|
435 |
+
id01567/1Lx_ZqrK1bM/00001 id04478/2grMtwdG93I/00001
|
436 |
+
id02577/0euHS_r5JH4/00001 id02548/0pAkJZmlFqc/00001
|
437 |
+
id04536/0f_Yi_1CoeM/00001 id03030/5wOxV1wAgqA/00001
|
438 |
+
id03347/4xXZ75_TeSM/00001 id00081/2xYrsnvtUWc/00001
|
439 |
+
id03980/7MRUusImkno/00001 id06209/2zM9EAPsZZQ/00001
|
440 |
+
id01567/1Lx_ZqrK1bM/00001 id00154/0hjW3eTGAy8/00001
|
441 |
+
id06104/02L1L9RFAgI/00001 id02057/0xZU7Oi9nvM/00001
|
442 |
+
id04570/0YMGn6BI9rg/00001 id03980/7MRUusImkno/00001
|
443 |
+
id08456/29EhSZDqzas/00001 id02286/4LAIxvdvguc/00001
|
444 |
+
id07312/0LWllHGohPY/00001 id04366/0iG2Ub9zETM/00001
|
445 |
+
id05654/07pANazoyJg/00001 id07426/1KNFfOFEhyI/00001
|
446 |
+
id03839/1jWHvl2qCq0/00001 id03347/4xXZ75_TeSM/00001
|
447 |
+
id04536/0f_Yi_1CoeM/00001 id04478/2grMtwdG93I/00001
|
448 |
+
id05816/1dyCBbJ94iw/00001 id04862/0zJh2FMTaDE/00001
|
449 |
+
id04950/2n4sGPqU9M8/00001 id00817/0GmSijZelGY/00001
|
450 |
+
id07426/1KNFfOFEhyI/00001 id04862/0zJh2FMTaDE/00001
|
451 |
+
id05459/18XmQEiGLnQ/00001 id00812/1Xfgvdu7oDo/00001
|
452 |
+
id00154/0hjW3eTGAy8/00001 id03178/2CT-6fnBC_o/00001
|
453 |
+
id04295/1fSjOItVYVg/00001 id07312/0LWllHGohPY/00001
|
454 |
+
id05594/0ohBiepcHWI/00001 id04862/0zJh2FMTaDE/00001
|
455 |
+
id03347/4xXZ75_TeSM/00001 id01541/2P7hzPq5iDw/00001
|
456 |
+
id04536/0f_Yi_1CoeM/00001 id02445/3Rnk8eja3TU/00001
|
457 |
+
id03862/0w8W8jp7MJk/00001 id04030/7mXUMuo5_NE/00001
|
458 |
+
id00154/0hjW3eTGAy8/00001 id01541/2P7hzPq5iDw/00001
|
459 |
+
id06913/4Ug7aJemzpg/00001 id03347/4xXZ75_TeSM/00001
|
460 |
+
id08696/0H1PxInJCK0/00001 id04478/2grMtwdG93I/00001
|
461 |
+
id04366/0iG2Ub9zETM/00001 id02445/3Rnk8eja3TU/00001
|
462 |
+
id07354/0NjekFZqaY0/00001 id01567/1Lx_ZqrK1bM/00001
|
463 |
+
id06913/4Ug7aJemzpg/00001 id05202/2gnLcAbAoSc/00001
|
464 |
+
id04862/0zJh2FMTaDE/00001 id08696/0H1PxInJCK0/00001
|
465 |
+
id03178/2CT-6fnBC_o/00001 id02685/4JDRxqYC0a4/00001
|
466 |
+
id01822/0QcHowaLAF0/00001 id04950/2n4sGPqU9M8/00001
|
467 |
+
id00081/2xYrsnvtUWc/00001 id06913/4Ug7aJemzpg/00001
|
468 |
+
id07868/5YYJq3fSbH8/00001 id02465/0Ocu8l1eAng/00001
|
469 |
+
id02181/02gIO4WrZLY/00001 id03862/0w8W8jp7MJk/00001
|
470 |
+
id07868/5YYJq3fSbH8/00001 id05202/2gnLcAbAoSc/00001
|
471 |
+
id02286/4LAIxvdvguc/00001 id03178/2CT-6fnBC_o/00001
|
472 |
+
id01298/2K5F6xG-Rbs/00001 id01618/0iFlmfmWVlY/00001
|
473 |
+
id03980/7MRUusImkno/00001 id04006/113VkmVVz1Q/00001
|
474 |
+
id03862/0w8W8jp7MJk/00001 id08456/29EhSZDqzas/00001
|
475 |
+
id01567/1Lx_ZqrK1bM/00001 id03041/5CfnYwQCW48/00001
|
476 |
+
id02465/0Ocu8l1eAng/00001 id00419/1zffAxBod_c/00001
|
477 |
+
id04570/0YMGn6BI9rg/00001 id04295/1fSjOItVYVg/00001
|
478 |
+
id03862/0w8W8jp7MJk/00001 id04295/1fSjOItVYVg/00001
|
479 |
+
id03789/0kdVSujPa9g/00001 id00866/03SSllwNkGk/00001
|
480 |
+
id05654/07pANazoyJg/00001 id00926/2Nd7f1yNQzE/00001
|
481 |
+
id05850/B8kp8ed48JE/00001 id02685/4JDRxqYC0a4/00001
|
482 |
+
id03347/4xXZ75_TeSM/00001 id08392/0fwuibKviJU/00001
|
483 |
+
id00926/2Nd7f1yNQzE/00001 id07312/0LWllHGohPY/00001
|
484 |
+
id05850/B8kp8ed48JE/00001 id01041/1UYZqPpavtk/00001
|
485 |
+
id03030/5wOxV1wAgqA/00001 id06913/4Ug7aJemzpg/00001
|
486 |
+
id02057/0xZU7Oi9nvM/00001 id01041/1UYZqPpavtk/00001
|
487 |
+
id03030/5wOxV1wAgqA/00001 id01041/1UYZqPpavtk/00001
|
488 |
+
id01618/0iFlmfmWVlY/00001 id04366/0iG2Ub9zETM/00001
|
489 |
+
id06310/1IAgr_CRnuE/00001 id04119/1uH67UruKlE/00001
|
490 |
+
id05594/0ohBiepcHWI/00001 id02317/0q4X8kPTlEY/00001
|
491 |
+
id01228/2TIFacjgehY/00001 id04119/1uH67UruKlE/00001
|
492 |
+
id02286/4LAIxvdvguc/00001 id02445/3Rnk8eja3TU/00001
|
493 |
+
id04030/7mXUMuo5_NE/00001 id00419/1zffAxBod_c/00001
|
494 |
+
id01298/2K5F6xG-Rbs/00001 id02445/3Rnk8eja3TU/00001
|
495 |
+
id07802/0RUpqvi3sPU/00001 id04862/0zJh2FMTaDE/00001
|
496 |
+
id04006/113VkmVVz1Q/00001 id03347/4xXZ75_TeSM/00001
|
497 |
+
id02317/0q4X8kPTlEY/00001 id05850/B8kp8ed48JE/00001
|
498 |
+
id08456/29EhSZDqzas/00001 id04656/1tZYt8jey54/00001
|
499 |
+
id04656/1tZYt8jey54/00001 id05816/1dyCBbJ94iw/00001
|
500 |
+
id05202/2gnLcAbAoSc/00001 id06209/2zM9EAPsZZQ/00001
|
face_detection/README.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
The code for Face Detection in this folder has been taken from the wonderful [face_alignment](https://github.com/1adrianb/face-alignment) repository. This has been modified to take batches of faces at a time.
|
face_detection/__init__.py
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# -*- coding: utf-8 -*-
|
2 |
+
|
3 |
+
__author__ = """Adrian Bulat"""
|
4 |
+
__email__ = 'adrian.bulat@nottingham.ac.uk'
|
5 |
+
__version__ = '1.0.1'
|
6 |
+
|
7 |
+
from .api import FaceAlignment, LandmarksType, NetworkSize
|
face_detection/api.py
ADDED
@@ -0,0 +1,98 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import print_function
|
2 |
+
import os
|
3 |
+
import torch
|
4 |
+
from torch.utils.model_zoo import load_url
|
5 |
+
from enum import Enum
|
6 |
+
import numpy as np
|
7 |
+
import cv2
|
8 |
+
try:
|
9 |
+
import urllib.request as request_file
|
10 |
+
except BaseException:
|
11 |
+
import urllib as request_file
|
12 |
+
|
13 |
+
from .models import FAN, ResNetDepth
|
14 |
+
from .utils import *
|
15 |
+
|
16 |
+
|
17 |
+
class LandmarksType(Enum):
|
18 |
+
"""Enum class defining the type of landmarks to detect.
|
19 |
+
|
20 |
+
``_2D`` - the detected points ``(x,y)`` are detected in a 2D space and follow the visible contour of the face
|
21 |
+
``_2halfD`` - this points represent the projection of the 3D points into 3D
|
22 |
+
``_3D`` - detect the points ``(x,y,z)``` in a 3D space
|
23 |
+
|
24 |
+
"""
|
25 |
+
_2D = 1
|
26 |
+
_2halfD = 2
|
27 |
+
_3D = 3
|
28 |
+
|
29 |
+
|
30 |
+
class NetworkSize(Enum):
|
31 |
+
# TINY = 1
|
32 |
+
# SMALL = 2
|
33 |
+
# MEDIUM = 3
|
34 |
+
LARGE = 4
|
35 |
+
|
36 |
+
def __new__(cls, value):
|
37 |
+
member = object.__new__(cls)
|
38 |
+
member._value_ = value
|
39 |
+
return member
|
40 |
+
|
41 |
+
def __int__(self):
|
42 |
+
return self.value
|
43 |
+
|
44 |
+
ROOT = os.path.dirname(os.path.abspath(__file__))
|
45 |
+
|
46 |
+
class FaceAlignment:
|
47 |
+
def __init__(self, landmarks_type, network_size=NetworkSize.LARGE,
|
48 |
+
device='cuda', flip_input=False, face_detector='sfd', verbose=False):
|
49 |
+
self.device = device
|
50 |
+
self.flip_input = flip_input
|
51 |
+
self.landmarks_type = landmarks_type
|
52 |
+
self.verbose = verbose
|
53 |
+
|
54 |
+
network_size = int(network_size)
|
55 |
+
|
56 |
+
if 'cuda' in device:
|
57 |
+
torch.backends.cudnn.benchmark = True
|
58 |
+
|
59 |
+
# Get the face detector
|
60 |
+
face_detector_module = __import__('face_detection.detection.' + face_detector,
|
61 |
+
globals(), locals(), [face_detector], 0)
|
62 |
+
self.face_detector = face_detector_module.FaceDetector(device=device, verbose=verbose)
|
63 |
+
|
64 |
+
def get_detections_for_batch(self, images):
|
65 |
+
images = images[..., ::-1]
|
66 |
+
detected_faces = self.face_detector.detect_from_batch(images.copy())
|
67 |
+
results = []
|
68 |
+
|
69 |
+
for i, d in enumerate(detected_faces):
|
70 |
+
# print("Inside facedection:", i, len(d))
|
71 |
+
if len(d) == 0:
|
72 |
+
results.append(None)
|
73 |
+
continue
|
74 |
+
d = d[0]
|
75 |
+
d = np.clip(d, 0, None)
|
76 |
+
|
77 |
+
x1, y1, x2, y2 = map(int, d[:-1])
|
78 |
+
results.append((x1, y1, x2, y2))
|
79 |
+
|
80 |
+
return results
|
81 |
+
|
82 |
+
def get_all_detections_for_batch(self, images):
|
83 |
+
#for multiface facedetection
|
84 |
+
images = images[..., ::-1]
|
85 |
+
detected_faces = self.face_detector.detect_from_batch(images.copy())
|
86 |
+
results = []
|
87 |
+
|
88 |
+
for i, d in enumerate(detected_faces):
|
89 |
+
# print("Inside facedection:", i, len(d))
|
90 |
+
if len(d) == 0:
|
91 |
+
results.append(None)
|
92 |
+
continue
|
93 |
+
d = [np.clip(dd, 0, None) for dd in d]
|
94 |
+
# d = [map(int, dd[:-1]) for dd in d]
|
95 |
+
d = [[int(ddd) for ddd in dd[:-1]] for dd in d]
|
96 |
+
results.append(d)
|
97 |
+
|
98 |
+
return results
|
face_detection/detection/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .core import FaceDetector
|
face_detection/detection/core.py
ADDED
@@ -0,0 +1,130 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import logging
|
2 |
+
import glob
|
3 |
+
from tqdm import tqdm
|
4 |
+
import numpy as np
|
5 |
+
import torch
|
6 |
+
import cv2
|
7 |
+
|
8 |
+
|
9 |
+
class FaceDetector(object):
|
10 |
+
"""An abstract class representing a face detector.
|
11 |
+
|
12 |
+
Any other face detection implementation must subclass it. All subclasses
|
13 |
+
must implement ``detect_from_image``, that return a list of detected
|
14 |
+
bounding boxes. Optionally, for speed considerations detect from path is
|
15 |
+
recommended.
|
16 |
+
"""
|
17 |
+
|
18 |
+
def __init__(self, device, verbose):
|
19 |
+
self.device = device
|
20 |
+
self.verbose = verbose
|
21 |
+
|
22 |
+
if verbose:
|
23 |
+
if 'cpu' in device:
|
24 |
+
logger = logging.getLogger(__name__)
|
25 |
+
logger.warning("Detection running on CPU, this may be potentially slow.")
|
26 |
+
|
27 |
+
if 'cpu' not in device and 'cuda' not in device:
|
28 |
+
if verbose:
|
29 |
+
logger.error("Expected values for device are: {cpu, cuda} but got: %s", device)
|
30 |
+
raise ValueError
|
31 |
+
|
32 |
+
def detect_from_image(self, tensor_or_path):
|
33 |
+
"""Detects faces in a given image.
|
34 |
+
|
35 |
+
This function detects the faces present in a provided BGR(usually)
|
36 |
+
image. The input can be either the image itself or the path to it.
|
37 |
+
|
38 |
+
Arguments:
|
39 |
+
tensor_or_path {numpy.ndarray, torch.tensor or string} -- the path
|
40 |
+
to an image or the image itself.
|
41 |
+
|
42 |
+
Example::
|
43 |
+
|
44 |
+
>>> path_to_image = 'data/image_01.jpg'
|
45 |
+
... detected_faces = detect_from_image(path_to_image)
|
46 |
+
[A list of bounding boxes (x1, y1, x2, y2)]
|
47 |
+
>>> image = cv2.imread(path_to_image)
|
48 |
+
... detected_faces = detect_from_image(image)
|
49 |
+
[A list of bounding boxes (x1, y1, x2, y2)]
|
50 |
+
|
51 |
+
"""
|
52 |
+
raise NotImplementedError
|
53 |
+
|
54 |
+
def detect_from_directory(self, path, extensions=['.jpg', '.png'], recursive=False, show_progress_bar=True):
|
55 |
+
"""Detects faces from all the images present in a given directory.
|
56 |
+
|
57 |
+
Arguments:
|
58 |
+
path {string} -- a string containing a path that points to the folder containing the images
|
59 |
+
|
60 |
+
Keyword Arguments:
|
61 |
+
extensions {list} -- list of string containing the extensions to be
|
62 |
+
consider in the following format: ``.extension_name`` (default:
|
63 |
+
{['.jpg', '.png']}) recursive {bool} -- option wherever to scan the
|
64 |
+
folder recursively (default: {False}) show_progress_bar {bool} --
|
65 |
+
display a progressbar (default: {True})
|
66 |
+
|
67 |
+
Example:
|
68 |
+
>>> directory = 'data'
|
69 |
+
... detected_faces = detect_from_directory(directory)
|
70 |
+
{A dictionary of [lists containing bounding boxes(x1, y1, x2, y2)]}
|
71 |
+
|
72 |
+
"""
|
73 |
+
if self.verbose:
|
74 |
+
logger = logging.getLogger(__name__)
|
75 |
+
|
76 |
+
if len(extensions) == 0:
|
77 |
+
if self.verbose:
|
78 |
+
logger.error("Expected at list one extension, but none was received.")
|
79 |
+
raise ValueError
|
80 |
+
|
81 |
+
if self.verbose:
|
82 |
+
logger.info("Constructing the list of images.")
|
83 |
+
additional_pattern = '/**/*' if recursive else '/*'
|
84 |
+
files = []
|
85 |
+
for extension in extensions:
|
86 |
+
files.extend(glob.glob(path + additional_pattern + extension, recursive=recursive))
|
87 |
+
|
88 |
+
if self.verbose:
|
89 |
+
logger.info("Finished searching for images. %s images found", len(files))
|
90 |
+
logger.info("Preparing to run the detection.")
|
91 |
+
|
92 |
+
predictions = {}
|
93 |
+
for image_path in tqdm(files, disable=not show_progress_bar):
|
94 |
+
if self.verbose:
|
95 |
+
logger.info("Running the face detector on image: %s", image_path)
|
96 |
+
predictions[image_path] = self.detect_from_image(image_path)
|
97 |
+
|
98 |
+
if self.verbose:
|
99 |
+
logger.info("The detector was successfully run on all %s images", len(files))
|
100 |
+
|
101 |
+
return predictions
|
102 |
+
|
103 |
+
@property
|
104 |
+
def reference_scale(self):
|
105 |
+
raise NotImplementedError
|
106 |
+
|
107 |
+
@property
|
108 |
+
def reference_x_shift(self):
|
109 |
+
raise NotImplementedError
|
110 |
+
|
111 |
+
@property
|
112 |
+
def reference_y_shift(self):
|
113 |
+
raise NotImplementedError
|
114 |
+
|
115 |
+
@staticmethod
|
116 |
+
def tensor_or_path_to_ndarray(tensor_or_path, rgb=True):
|
117 |
+
"""Convert path (represented as a string) or torch.tensor to a numpy.ndarray
|
118 |
+
|
119 |
+
Arguments:
|
120 |
+
tensor_or_path {numpy.ndarray, torch.tensor or string} -- path to the image, or the image itself
|
121 |
+
"""
|
122 |
+
if isinstance(tensor_or_path, str):
|
123 |
+
return cv2.imread(tensor_or_path) if not rgb else cv2.imread(tensor_or_path)[..., ::-1]
|
124 |
+
elif torch.is_tensor(tensor_or_path):
|
125 |
+
# Call cpu in case its coming from cuda
|
126 |
+
return tensor_or_path.cpu().numpy()[..., ::-1].copy() if not rgb else tensor_or_path.cpu().numpy()
|
127 |
+
elif isinstance(tensor_or_path, np.ndarray):
|
128 |
+
return tensor_or_path[..., ::-1].copy() if not rgb else tensor_or_path
|
129 |
+
else:
|
130 |
+
raise TypeError
|
face_detection/detection/sfd/__init__.py
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
from .sfd_detector import SFDDetector as FaceDetector
|
face_detection/detection/sfd/bbox.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import print_function
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import cv2
|
5 |
+
import random
|
6 |
+
import datetime
|
7 |
+
import time
|
8 |
+
import math
|
9 |
+
import argparse
|
10 |
+
import numpy as np
|
11 |
+
import torch
|
12 |
+
|
13 |
+
try:
|
14 |
+
from iou import IOU
|
15 |
+
except BaseException:
|
16 |
+
# IOU cython speedup 10x
|
17 |
+
def IOU(ax1, ay1, ax2, ay2, bx1, by1, bx2, by2):
|
18 |
+
sa = abs((ax2 - ax1) * (ay2 - ay1))
|
19 |
+
sb = abs((bx2 - bx1) * (by2 - by1))
|
20 |
+
x1, y1 = max(ax1, bx1), max(ay1, by1)
|
21 |
+
x2, y2 = min(ax2, bx2), min(ay2, by2)
|
22 |
+
w = x2 - x1
|
23 |
+
h = y2 - y1
|
24 |
+
if w < 0 or h < 0:
|
25 |
+
return 0.0
|
26 |
+
else:
|
27 |
+
return 1.0 * w * h / (sa + sb - w * h)
|
28 |
+
|
29 |
+
|
30 |
+
def bboxlog(x1, y1, x2, y2, axc, ayc, aww, ahh):
|
31 |
+
xc, yc, ww, hh = (x2 + x1) / 2, (y2 + y1) / 2, x2 - x1, y2 - y1
|
32 |
+
dx, dy = (xc - axc) / aww, (yc - ayc) / ahh
|
33 |
+
dw, dh = math.log(ww / aww), math.log(hh / ahh)
|
34 |
+
return dx, dy, dw, dh
|
35 |
+
|
36 |
+
|
37 |
+
def bboxloginv(dx, dy, dw, dh, axc, ayc, aww, ahh):
|
38 |
+
xc, yc = dx * aww + axc, dy * ahh + ayc
|
39 |
+
ww, hh = math.exp(dw) * aww, math.exp(dh) * ahh
|
40 |
+
x1, x2, y1, y2 = xc - ww / 2, xc + ww / 2, yc - hh / 2, yc + hh / 2
|
41 |
+
return x1, y1, x2, y2
|
42 |
+
|
43 |
+
|
44 |
+
def nms(dets, thresh):
|
45 |
+
if 0 == len(dets):
|
46 |
+
return []
|
47 |
+
x1, y1, x2, y2, scores = dets[:, 0], dets[:, 1], dets[:, 2], dets[:, 3], dets[:, 4]
|
48 |
+
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
|
49 |
+
order = scores.argsort()[::-1]
|
50 |
+
|
51 |
+
keep = []
|
52 |
+
while order.size > 0:
|
53 |
+
i = order[0]
|
54 |
+
keep.append(i)
|
55 |
+
xx1, yy1 = np.maximum(x1[i], x1[order[1:]]), np.maximum(y1[i], y1[order[1:]])
|
56 |
+
xx2, yy2 = np.minimum(x2[i], x2[order[1:]]), np.minimum(y2[i], y2[order[1:]])
|
57 |
+
|
58 |
+
w, h = np.maximum(0.0, xx2 - xx1 + 1), np.maximum(0.0, yy2 - yy1 + 1)
|
59 |
+
ovr = w * h / (areas[i] + areas[order[1:]] - w * h)
|
60 |
+
|
61 |
+
inds = np.where(ovr <= thresh)[0]
|
62 |
+
order = order[inds + 1]
|
63 |
+
|
64 |
+
return keep
|
65 |
+
|
66 |
+
|
67 |
+
def encode(matched, priors, variances):
|
68 |
+
"""Encode the variances from the priorbox layers into the ground truth boxes
|
69 |
+
we have matched (based on jaccard overlap) with the prior boxes.
|
70 |
+
Args:
|
71 |
+
matched: (tensor) Coords of ground truth for each prior in point-form
|
72 |
+
Shape: [num_priors, 4].
|
73 |
+
priors: (tensor) Prior boxes in center-offset form
|
74 |
+
Shape: [num_priors,4].
|
75 |
+
variances: (list[float]) Variances of priorboxes
|
76 |
+
Return:
|
77 |
+
encoded boxes (tensor), Shape: [num_priors, 4]
|
78 |
+
"""
|
79 |
+
|
80 |
+
# dist b/t match center and prior's center
|
81 |
+
g_cxcy = (matched[:, :2] + matched[:, 2:]) / 2 - priors[:, :2]
|
82 |
+
# encode variance
|
83 |
+
g_cxcy /= (variances[0] * priors[:, 2:])
|
84 |
+
# match wh / prior wh
|
85 |
+
g_wh = (matched[:, 2:] - matched[:, :2]) / priors[:, 2:]
|
86 |
+
g_wh = torch.log(g_wh) / variances[1]
|
87 |
+
# return target for smooth_l1_loss
|
88 |
+
return torch.cat([g_cxcy, g_wh], 1) # [num_priors,4]
|
89 |
+
|
90 |
+
|
91 |
+
def decode(loc, priors, variances):
|
92 |
+
"""Decode locations from predictions using priors to undo
|
93 |
+
the encoding we did for offset regression at train time.
|
94 |
+
Args:
|
95 |
+
loc (tensor): location predictions for loc layers,
|
96 |
+
Shape: [num_priors,4]
|
97 |
+
priors (tensor): Prior boxes in center-offset form.
|
98 |
+
Shape: [num_priors,4].
|
99 |
+
variances: (list[float]) Variances of priorboxes
|
100 |
+
Return:
|
101 |
+
decoded bounding box predictions
|
102 |
+
"""
|
103 |
+
|
104 |
+
boxes = torch.cat((
|
105 |
+
priors[:, :2] + loc[:, :2] * variances[0] * priors[:, 2:],
|
106 |
+
priors[:, 2:] * torch.exp(loc[:, 2:] * variances[1])), 1)
|
107 |
+
boxes[:, :2] -= boxes[:, 2:] / 2
|
108 |
+
boxes[:, 2:] += boxes[:, :2]
|
109 |
+
return boxes
|
110 |
+
|
111 |
+
def batch_decode(loc, priors, variances):
|
112 |
+
"""Decode locations from predictions using priors to undo
|
113 |
+
the encoding we did for offset regression at train time.
|
114 |
+
Args:
|
115 |
+
loc (tensor): location predictions for loc layers,
|
116 |
+
Shape: [num_priors,4]
|
117 |
+
priors (tensor): Prior boxes in center-offset form.
|
118 |
+
Shape: [num_priors,4].
|
119 |
+
variances: (list[float]) Variances of priorboxes
|
120 |
+
Return:
|
121 |
+
decoded bounding box predictions
|
122 |
+
"""
|
123 |
+
|
124 |
+
boxes = torch.cat((
|
125 |
+
priors[:, :, :2] + loc[:, :, :2] * variances[0] * priors[:, :, 2:],
|
126 |
+
priors[:, :, 2:] * torch.exp(loc[:, :, 2:] * variances[1])), 2)
|
127 |
+
boxes[:, :, :2] -= boxes[:, :, 2:] / 2
|
128 |
+
boxes[:, :, 2:] += boxes[:, :, :2]
|
129 |
+
return boxes
|
face_detection/detection/sfd/detect.py
ADDED
@@ -0,0 +1,112 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn.functional as F
|
3 |
+
|
4 |
+
import os
|
5 |
+
import sys
|
6 |
+
import cv2
|
7 |
+
import random
|
8 |
+
import datetime
|
9 |
+
import math
|
10 |
+
import argparse
|
11 |
+
import numpy as np
|
12 |
+
|
13 |
+
import scipy.io as sio
|
14 |
+
import zipfile
|
15 |
+
from .net_s3fd import s3fd
|
16 |
+
from .bbox import *
|
17 |
+
|
18 |
+
|
19 |
+
def detect(net, img, device):
|
20 |
+
img = img - np.array([104, 117, 123])
|
21 |
+
img = img.transpose(2, 0, 1)
|
22 |
+
img = img.reshape((1,) + img.shape)
|
23 |
+
|
24 |
+
if 'cuda' in device:
|
25 |
+
torch.backends.cudnn.benchmark = True
|
26 |
+
|
27 |
+
img = torch.from_numpy(img).float().to(device)
|
28 |
+
BB, CC, HH, WW = img.size()
|
29 |
+
with torch.no_grad():
|
30 |
+
olist = net(img)
|
31 |
+
|
32 |
+
bboxlist = []
|
33 |
+
for i in range(len(olist) // 2):
|
34 |
+
olist[i * 2] = F.softmax(olist[i * 2], dim=1)
|
35 |
+
olist = [oelem.data.cpu() for oelem in olist]
|
36 |
+
for i in range(len(olist) // 2):
|
37 |
+
ocls, oreg = olist[i * 2], olist[i * 2 + 1]
|
38 |
+
FB, FC, FH, FW = ocls.size() # feature map size
|
39 |
+
stride = 2**(i + 2) # 4,8,16,32,64,128
|
40 |
+
anchor = stride * 4
|
41 |
+
poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
|
42 |
+
for Iindex, hindex, windex in poss:
|
43 |
+
axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
|
44 |
+
score = ocls[0, 1, hindex, windex]
|
45 |
+
loc = oreg[0, :, hindex, windex].contiguous().view(1, 4)
|
46 |
+
priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]])
|
47 |
+
variances = [0.1, 0.2]
|
48 |
+
box = decode(loc, priors, variances)
|
49 |
+
x1, y1, x2, y2 = box[0] * 1.0
|
50 |
+
# cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
|
51 |
+
bboxlist.append([x1, y1, x2, y2, score])
|
52 |
+
bboxlist = np.array(bboxlist)
|
53 |
+
if 0 == len(bboxlist):
|
54 |
+
bboxlist = np.zeros((1, 5))
|
55 |
+
|
56 |
+
return bboxlist
|
57 |
+
|
58 |
+
def batch_detect(net, imgs, device):
|
59 |
+
imgs = imgs - np.array([104, 117, 123])
|
60 |
+
imgs = imgs.transpose(0, 3, 1, 2)
|
61 |
+
|
62 |
+
if 'cuda' in device:
|
63 |
+
torch.backends.cudnn.benchmark = True
|
64 |
+
|
65 |
+
imgs = torch.from_numpy(imgs).float().to(device)
|
66 |
+
BB, CC, HH, WW = imgs.size()
|
67 |
+
with torch.no_grad():
|
68 |
+
olist = net(imgs)
|
69 |
+
|
70 |
+
bboxlist = []
|
71 |
+
for i in range(len(olist) // 2):
|
72 |
+
olist[i * 2] = F.softmax(olist[i * 2], dim=1)
|
73 |
+
olist = [oelem.data.cpu() for oelem in olist]
|
74 |
+
for i in range(len(olist) // 2):
|
75 |
+
ocls, oreg = olist[i * 2], olist[i * 2 + 1]
|
76 |
+
FB, FC, FH, FW = ocls.size() # feature map size
|
77 |
+
stride = 2**(i + 2) # 4,8,16,32,64,128
|
78 |
+
anchor = stride * 4
|
79 |
+
poss = zip(*np.where(ocls[:, 1, :, :] > 0.05))
|
80 |
+
for Iindex, hindex, windex in poss:
|
81 |
+
axc, ayc = stride / 2 + windex * stride, stride / 2 + hindex * stride
|
82 |
+
score = ocls[:, 1, hindex, windex]
|
83 |
+
loc = oreg[:, :, hindex, windex].contiguous().view(BB, 1, 4)
|
84 |
+
priors = torch.Tensor([[axc / 1.0, ayc / 1.0, stride * 4 / 1.0, stride * 4 / 1.0]]).view(1, 1, 4)
|
85 |
+
variances = [0.1, 0.2]
|
86 |
+
box = batch_decode(loc, priors, variances)
|
87 |
+
box = box[:, 0] * 1.0
|
88 |
+
# cv2.rectangle(imgshow,(int(x1),int(y1)),(int(x2),int(y2)),(0,0,255),1)
|
89 |
+
bboxlist.append(torch.cat([box, score.unsqueeze(1)], 1).cpu().numpy())
|
90 |
+
bboxlist = np.array(bboxlist)
|
91 |
+
if 0 == len(bboxlist):
|
92 |
+
bboxlist = np.zeros((1, BB, 5))
|
93 |
+
|
94 |
+
return bboxlist
|
95 |
+
|
96 |
+
def flip_detect(net, img, device):
|
97 |
+
img = cv2.flip(img, 1)
|
98 |
+
b = detect(net, img, device)
|
99 |
+
|
100 |
+
bboxlist = np.zeros(b.shape)
|
101 |
+
bboxlist[:, 0] = img.shape[1] - b[:, 2]
|
102 |
+
bboxlist[:, 1] = b[:, 1]
|
103 |
+
bboxlist[:, 2] = img.shape[1] - b[:, 0]
|
104 |
+
bboxlist[:, 3] = b[:, 3]
|
105 |
+
bboxlist[:, 4] = b[:, 4]
|
106 |
+
return bboxlist
|
107 |
+
|
108 |
+
|
109 |
+
def pts_to_bb(pts):
|
110 |
+
min_x, min_y = np.min(pts, axis=0)
|
111 |
+
max_x, max_y = np.max(pts, axis=0)
|
112 |
+
return np.array([min_x, min_y, max_x, max_y])
|
face_detection/detection/sfd/net_s3fd.py
ADDED
@@ -0,0 +1,129 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
|
5 |
+
|
6 |
+
class L2Norm(nn.Module):
|
7 |
+
def __init__(self, n_channels, scale=1.0):
|
8 |
+
super(L2Norm, self).__init__()
|
9 |
+
self.n_channels = n_channels
|
10 |
+
self.scale = scale
|
11 |
+
self.eps = 1e-10
|
12 |
+
self.weight = nn.Parameter(torch.Tensor(self.n_channels))
|
13 |
+
self.weight.data *= 0.0
|
14 |
+
self.weight.data += self.scale
|
15 |
+
|
16 |
+
def forward(self, x):
|
17 |
+
norm = x.pow(2).sum(dim=1, keepdim=True).sqrt() + self.eps
|
18 |
+
x = x / norm * self.weight.view(1, -1, 1, 1)
|
19 |
+
return x
|
20 |
+
|
21 |
+
|
22 |
+
class s3fd(nn.Module):
|
23 |
+
def __init__(self):
|
24 |
+
super(s3fd, self).__init__()
|
25 |
+
self.conv1_1 = nn.Conv2d(3, 64, kernel_size=3, stride=1, padding=1)
|
26 |
+
self.conv1_2 = nn.Conv2d(64, 64, kernel_size=3, stride=1, padding=1)
|
27 |
+
|
28 |
+
self.conv2_1 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
|
29 |
+
self.conv2_2 = nn.Conv2d(128, 128, kernel_size=3, stride=1, padding=1)
|
30 |
+
|
31 |
+
self.conv3_1 = nn.Conv2d(128, 256, kernel_size=3, stride=1, padding=1)
|
32 |
+
self.conv3_2 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
|
33 |
+
self.conv3_3 = nn.Conv2d(256, 256, kernel_size=3, stride=1, padding=1)
|
34 |
+
|
35 |
+
self.conv4_1 = nn.Conv2d(256, 512, kernel_size=3, stride=1, padding=1)
|
36 |
+
self.conv4_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
37 |
+
self.conv4_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
38 |
+
|
39 |
+
self.conv5_1 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
40 |
+
self.conv5_2 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
41 |
+
self.conv5_3 = nn.Conv2d(512, 512, kernel_size=3, stride=1, padding=1)
|
42 |
+
|
43 |
+
self.fc6 = nn.Conv2d(512, 1024, kernel_size=3, stride=1, padding=3)
|
44 |
+
self.fc7 = nn.Conv2d(1024, 1024, kernel_size=1, stride=1, padding=0)
|
45 |
+
|
46 |
+
self.conv6_1 = nn.Conv2d(1024, 256, kernel_size=1, stride=1, padding=0)
|
47 |
+
self.conv6_2 = nn.Conv2d(256, 512, kernel_size=3, stride=2, padding=1)
|
48 |
+
|
49 |
+
self.conv7_1 = nn.Conv2d(512, 128, kernel_size=1, stride=1, padding=0)
|
50 |
+
self.conv7_2 = nn.Conv2d(128, 256, kernel_size=3, stride=2, padding=1)
|
51 |
+
|
52 |
+
self.conv3_3_norm = L2Norm(256, scale=10)
|
53 |
+
self.conv4_3_norm = L2Norm(512, scale=8)
|
54 |
+
self.conv5_3_norm = L2Norm(512, scale=5)
|
55 |
+
|
56 |
+
self.conv3_3_norm_mbox_conf = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
|
57 |
+
self.conv3_3_norm_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
|
58 |
+
self.conv4_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
|
59 |
+
self.conv4_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
|
60 |
+
self.conv5_3_norm_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
|
61 |
+
self.conv5_3_norm_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
|
62 |
+
|
63 |
+
self.fc7_mbox_conf = nn.Conv2d(1024, 2, kernel_size=3, stride=1, padding=1)
|
64 |
+
self.fc7_mbox_loc = nn.Conv2d(1024, 4, kernel_size=3, stride=1, padding=1)
|
65 |
+
self.conv6_2_mbox_conf = nn.Conv2d(512, 2, kernel_size=3, stride=1, padding=1)
|
66 |
+
self.conv6_2_mbox_loc = nn.Conv2d(512, 4, kernel_size=3, stride=1, padding=1)
|
67 |
+
self.conv7_2_mbox_conf = nn.Conv2d(256, 2, kernel_size=3, stride=1, padding=1)
|
68 |
+
self.conv7_2_mbox_loc = nn.Conv2d(256, 4, kernel_size=3, stride=1, padding=1)
|
69 |
+
|
70 |
+
def forward(self, x):
|
71 |
+
h = F.relu(self.conv1_1(x))
|
72 |
+
h = F.relu(self.conv1_2(h))
|
73 |
+
h = F.max_pool2d(h, 2, 2)
|
74 |
+
|
75 |
+
h = F.relu(self.conv2_1(h))
|
76 |
+
h = F.relu(self.conv2_2(h))
|
77 |
+
h = F.max_pool2d(h, 2, 2)
|
78 |
+
|
79 |
+
h = F.relu(self.conv3_1(h))
|
80 |
+
h = F.relu(self.conv3_2(h))
|
81 |
+
h = F.relu(self.conv3_3(h))
|
82 |
+
f3_3 = h
|
83 |
+
h = F.max_pool2d(h, 2, 2)
|
84 |
+
|
85 |
+
h = F.relu(self.conv4_1(h))
|
86 |
+
h = F.relu(self.conv4_2(h))
|
87 |
+
h = F.relu(self.conv4_3(h))
|
88 |
+
f4_3 = h
|
89 |
+
h = F.max_pool2d(h, 2, 2)
|
90 |
+
|
91 |
+
h = F.relu(self.conv5_1(h))
|
92 |
+
h = F.relu(self.conv5_2(h))
|
93 |
+
h = F.relu(self.conv5_3(h))
|
94 |
+
f5_3 = h
|
95 |
+
h = F.max_pool2d(h, 2, 2)
|
96 |
+
|
97 |
+
h = F.relu(self.fc6(h))
|
98 |
+
h = F.relu(self.fc7(h))
|
99 |
+
ffc7 = h
|
100 |
+
h = F.relu(self.conv6_1(h))
|
101 |
+
h = F.relu(self.conv6_2(h))
|
102 |
+
f6_2 = h
|
103 |
+
h = F.relu(self.conv7_1(h))
|
104 |
+
h = F.relu(self.conv7_2(h))
|
105 |
+
f7_2 = h
|
106 |
+
|
107 |
+
f3_3 = self.conv3_3_norm(f3_3)
|
108 |
+
f4_3 = self.conv4_3_norm(f4_3)
|
109 |
+
f5_3 = self.conv5_3_norm(f5_3)
|
110 |
+
|
111 |
+
cls1 = self.conv3_3_norm_mbox_conf(f3_3)
|
112 |
+
reg1 = self.conv3_3_norm_mbox_loc(f3_3)
|
113 |
+
cls2 = self.conv4_3_norm_mbox_conf(f4_3)
|
114 |
+
reg2 = self.conv4_3_norm_mbox_loc(f4_3)
|
115 |
+
cls3 = self.conv5_3_norm_mbox_conf(f5_3)
|
116 |
+
reg3 = self.conv5_3_norm_mbox_loc(f5_3)
|
117 |
+
cls4 = self.fc7_mbox_conf(ffc7)
|
118 |
+
reg4 = self.fc7_mbox_loc(ffc7)
|
119 |
+
cls5 = self.conv6_2_mbox_conf(f6_2)
|
120 |
+
reg5 = self.conv6_2_mbox_loc(f6_2)
|
121 |
+
cls6 = self.conv7_2_mbox_conf(f7_2)
|
122 |
+
reg6 = self.conv7_2_mbox_loc(f7_2)
|
123 |
+
|
124 |
+
# max-out background label
|
125 |
+
chunk = torch.chunk(cls1, 4, 1)
|
126 |
+
bmax = torch.max(torch.max(chunk[0], chunk[1]), chunk[2])
|
127 |
+
cls1 = torch.cat([bmax, chunk[3]], dim=1)
|
128 |
+
|
129 |
+
return [cls1, reg1, cls2, reg2, cls3, reg3, cls4, reg4, cls5, reg5, cls6, reg6]
|
face_detection/detection/sfd/sfd_detector.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
import cv2
|
3 |
+
from torch.utils.model_zoo import load_url
|
4 |
+
|
5 |
+
from ..core import FaceDetector
|
6 |
+
|
7 |
+
from .net_s3fd import s3fd
|
8 |
+
from .bbox import *
|
9 |
+
from .detect import *
|
10 |
+
|
11 |
+
models_urls = {
|
12 |
+
's3fd': 'https://www.adrianbulat.com/downloads/python-fan/s3fd-619a316812.pth',
|
13 |
+
}
|
14 |
+
|
15 |
+
|
16 |
+
class SFDDetector(FaceDetector):
|
17 |
+
def __init__(self, device, path_to_detector=os.path.join(os.path.dirname(os.path.abspath(__file__)), 's3fd.pth'), verbose=False):
|
18 |
+
super(SFDDetector, self).__init__(device, verbose)
|
19 |
+
|
20 |
+
# Initialise the face detector
|
21 |
+
if not os.path.isfile(path_to_detector):
|
22 |
+
model_weights = load_url(models_urls['s3fd'])
|
23 |
+
else:
|
24 |
+
model_weights = torch.load(path_to_detector)
|
25 |
+
|
26 |
+
self.face_detector = s3fd()
|
27 |
+
self.face_detector.load_state_dict(model_weights)
|
28 |
+
self.face_detector.to(device)
|
29 |
+
self.face_detector.eval()
|
30 |
+
|
31 |
+
def detect_from_image(self, tensor_or_path):
|
32 |
+
image = self.tensor_or_path_to_ndarray(tensor_or_path)
|
33 |
+
|
34 |
+
bboxlist = detect(self.face_detector, image, device=self.device)
|
35 |
+
keep = nms(bboxlist, 0.3)
|
36 |
+
bboxlist = bboxlist[keep, :]
|
37 |
+
bboxlist = [x for x in bboxlist if x[-1] > 0.5]
|
38 |
+
|
39 |
+
return bboxlist
|
40 |
+
|
41 |
+
def detect_from_batch(self, images):
|
42 |
+
bboxlists = batch_detect(self.face_detector, images, device=self.device)
|
43 |
+
keeps = [nms(bboxlists[:, i, :], 0.3) for i in range(bboxlists.shape[1])]
|
44 |
+
bboxlists = [bboxlists[keep, i, :] for i, keep in enumerate(keeps)]
|
45 |
+
bboxlists = [[x for x in bboxlist if x[-1] > 0.5] for bboxlist in bboxlists]
|
46 |
+
|
47 |
+
return bboxlists
|
48 |
+
|
49 |
+
@property
|
50 |
+
def reference_scale(self):
|
51 |
+
return 195
|
52 |
+
|
53 |
+
@property
|
54 |
+
def reference_x_shift(self):
|
55 |
+
return 0
|
56 |
+
|
57 |
+
@property
|
58 |
+
def reference_y_shift(self):
|
59 |
+
return 0
|
face_detection/models.py
ADDED
@@ -0,0 +1,261 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import torch
|
2 |
+
import torch.nn as nn
|
3 |
+
import torch.nn.functional as F
|
4 |
+
import math
|
5 |
+
|
6 |
+
|
7 |
+
def conv3x3(in_planes, out_planes, strd=1, padding=1, bias=False):
|
8 |
+
"3x3 convolution with padding"
|
9 |
+
return nn.Conv2d(in_planes, out_planes, kernel_size=3,
|
10 |
+
stride=strd, padding=padding, bias=bias)
|
11 |
+
|
12 |
+
|
13 |
+
class ConvBlock(nn.Module):
|
14 |
+
def __init__(self, in_planes, out_planes):
|
15 |
+
super(ConvBlock, self).__init__()
|
16 |
+
self.bn1 = nn.BatchNorm2d(in_planes)
|
17 |
+
self.conv1 = conv3x3(in_planes, int(out_planes / 2))
|
18 |
+
self.bn2 = nn.BatchNorm2d(int(out_planes / 2))
|
19 |
+
self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4))
|
20 |
+
self.bn3 = nn.BatchNorm2d(int(out_planes / 4))
|
21 |
+
self.conv3 = conv3x3(int(out_planes / 4), int(out_planes / 4))
|
22 |
+
|
23 |
+
if in_planes != out_planes:
|
24 |
+
self.downsample = nn.Sequential(
|
25 |
+
nn.BatchNorm2d(in_planes),
|
26 |
+
nn.ReLU(True),
|
27 |
+
nn.Conv2d(in_planes, out_planes,
|
28 |
+
kernel_size=1, stride=1, bias=False),
|
29 |
+
)
|
30 |
+
else:
|
31 |
+
self.downsample = None
|
32 |
+
|
33 |
+
def forward(self, x):
|
34 |
+
residual = x
|
35 |
+
|
36 |
+
out1 = self.bn1(x)
|
37 |
+
out1 = F.relu(out1, True)
|
38 |
+
out1 = self.conv1(out1)
|
39 |
+
|
40 |
+
out2 = self.bn2(out1)
|
41 |
+
out2 = F.relu(out2, True)
|
42 |
+
out2 = self.conv2(out2)
|
43 |
+
|
44 |
+
out3 = self.bn3(out2)
|
45 |
+
out3 = F.relu(out3, True)
|
46 |
+
out3 = self.conv3(out3)
|
47 |
+
|
48 |
+
out3 = torch.cat((out1, out2, out3), 1)
|
49 |
+
|
50 |
+
if self.downsample is not None:
|
51 |
+
residual = self.downsample(residual)
|
52 |
+
|
53 |
+
out3 += residual
|
54 |
+
|
55 |
+
return out3
|
56 |
+
|
57 |
+
|
58 |
+
class Bottleneck(nn.Module):
|
59 |
+
|
60 |
+
expansion = 4
|
61 |
+
|
62 |
+
def __init__(self, inplanes, planes, stride=1, downsample=None):
|
63 |
+
super(Bottleneck, self).__init__()
|
64 |
+
self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False)
|
65 |
+
self.bn1 = nn.BatchNorm2d(planes)
|
66 |
+
self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride,
|
67 |
+
padding=1, bias=False)
|
68 |
+
self.bn2 = nn.BatchNorm2d(planes)
|
69 |
+
self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False)
|
70 |
+
self.bn3 = nn.BatchNorm2d(planes * 4)
|
71 |
+
self.relu = nn.ReLU(inplace=True)
|
72 |
+
self.downsample = downsample
|
73 |
+
self.stride = stride
|
74 |
+
|
75 |
+
def forward(self, x):
|
76 |
+
residual = x
|
77 |
+
|
78 |
+
out = self.conv1(x)
|
79 |
+
out = self.bn1(out)
|
80 |
+
out = self.relu(out)
|
81 |
+
|
82 |
+
out = self.conv2(out)
|
83 |
+
out = self.bn2(out)
|
84 |
+
out = self.relu(out)
|
85 |
+
|
86 |
+
out = self.conv3(out)
|
87 |
+
out = self.bn3(out)
|
88 |
+
|
89 |
+
if self.downsample is not None:
|
90 |
+
residual = self.downsample(x)
|
91 |
+
|
92 |
+
out += residual
|
93 |
+
out = self.relu(out)
|
94 |
+
|
95 |
+
return out
|
96 |
+
|
97 |
+
|
98 |
+
class HourGlass(nn.Module):
|
99 |
+
def __init__(self, num_modules, depth, num_features):
|
100 |
+
super(HourGlass, self).__init__()
|
101 |
+
self.num_modules = num_modules
|
102 |
+
self.depth = depth
|
103 |
+
self.features = num_features
|
104 |
+
|
105 |
+
self._generate_network(self.depth)
|
106 |
+
|
107 |
+
def _generate_network(self, level):
|
108 |
+
self.add_module('b1_' + str(level), ConvBlock(self.features, self.features))
|
109 |
+
|
110 |
+
self.add_module('b2_' + str(level), ConvBlock(self.features, self.features))
|
111 |
+
|
112 |
+
if level > 1:
|
113 |
+
self._generate_network(level - 1)
|
114 |
+
else:
|
115 |
+
self.add_module('b2_plus_' + str(level), ConvBlock(self.features, self.features))
|
116 |
+
|
117 |
+
self.add_module('b3_' + str(level), ConvBlock(self.features, self.features))
|
118 |
+
|
119 |
+
def _forward(self, level, inp):
|
120 |
+
# Upper branch
|
121 |
+
up1 = inp
|
122 |
+
up1 = self._modules['b1_' + str(level)](up1)
|
123 |
+
|
124 |
+
# Lower branch
|
125 |
+
low1 = F.avg_pool2d(inp, 2, stride=2)
|
126 |
+
low1 = self._modules['b2_' + str(level)](low1)
|
127 |
+
|
128 |
+
if level > 1:
|
129 |
+
low2 = self._forward(level - 1, low1)
|
130 |
+
else:
|
131 |
+
low2 = low1
|
132 |
+
low2 = self._modules['b2_plus_' + str(level)](low2)
|
133 |
+
|
134 |
+
low3 = low2
|
135 |
+
low3 = self._modules['b3_' + str(level)](low3)
|
136 |
+
|
137 |
+
up2 = F.interpolate(low3, scale_factor=2, mode='nearest')
|
138 |
+
|
139 |
+
return up1 + up2
|
140 |
+
|
141 |
+
def forward(self, x):
|
142 |
+
return self._forward(self.depth, x)
|
143 |
+
|
144 |
+
|
145 |
+
class FAN(nn.Module):
|
146 |
+
|
147 |
+
def __init__(self, num_modules=1):
|
148 |
+
super(FAN, self).__init__()
|
149 |
+
self.num_modules = num_modules
|
150 |
+
|
151 |
+
# Base part
|
152 |
+
self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3)
|
153 |
+
self.bn1 = nn.BatchNorm2d(64)
|
154 |
+
self.conv2 = ConvBlock(64, 128)
|
155 |
+
self.conv3 = ConvBlock(128, 128)
|
156 |
+
self.conv4 = ConvBlock(128, 256)
|
157 |
+
|
158 |
+
# Stacking part
|
159 |
+
for hg_module in range(self.num_modules):
|
160 |
+
self.add_module('m' + str(hg_module), HourGlass(1, 4, 256))
|
161 |
+
self.add_module('top_m_' + str(hg_module), ConvBlock(256, 256))
|
162 |
+
self.add_module('conv_last' + str(hg_module),
|
163 |
+
nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
|
164 |
+
self.add_module('bn_end' + str(hg_module), nn.BatchNorm2d(256))
|
165 |
+
self.add_module('l' + str(hg_module), nn.Conv2d(256,
|
166 |
+
68, kernel_size=1, stride=1, padding=0))
|
167 |
+
|
168 |
+
if hg_module < self.num_modules - 1:
|
169 |
+
self.add_module(
|
170 |
+
'bl' + str(hg_module), nn.Conv2d(256, 256, kernel_size=1, stride=1, padding=0))
|
171 |
+
self.add_module('al' + str(hg_module), nn.Conv2d(68,
|
172 |
+
256, kernel_size=1, stride=1, padding=0))
|
173 |
+
|
174 |
+
def forward(self, x):
|
175 |
+
x = F.relu(self.bn1(self.conv1(x)), True)
|
176 |
+
x = F.avg_pool2d(self.conv2(x), 2, stride=2)
|
177 |
+
x = self.conv3(x)
|
178 |
+
x = self.conv4(x)
|
179 |
+
|
180 |
+
previous = x
|
181 |
+
|
182 |
+
outputs = []
|
183 |
+
for i in range(self.num_modules):
|
184 |
+
hg = self._modules['m' + str(i)](previous)
|
185 |
+
|
186 |
+
ll = hg
|
187 |
+
ll = self._modules['top_m_' + str(i)](ll)
|
188 |
+
|
189 |
+
ll = F.relu(self._modules['bn_end' + str(i)]
|
190 |
+
(self._modules['conv_last' + str(i)](ll)), True)
|
191 |
+
|
192 |
+
# Predict heatmaps
|
193 |
+
tmp_out = self._modules['l' + str(i)](ll)
|
194 |
+
outputs.append(tmp_out)
|
195 |
+
|
196 |
+
if i < self.num_modules - 1:
|
197 |
+
ll = self._modules['bl' + str(i)](ll)
|
198 |
+
tmp_out_ = self._modules['al' + str(i)](tmp_out)
|
199 |
+
previous = previous + ll + tmp_out_
|
200 |
+
|
201 |
+
return outputs
|
202 |
+
|
203 |
+
|
204 |
+
class ResNetDepth(nn.Module):
|
205 |
+
|
206 |
+
def __init__(self, block=Bottleneck, layers=[3, 8, 36, 3], num_classes=68):
|
207 |
+
self.inplanes = 64
|
208 |
+
super(ResNetDepth, self).__init__()
|
209 |
+
self.conv1 = nn.Conv2d(3 + 68, 64, kernel_size=7, stride=2, padding=3,
|
210 |
+
bias=False)
|
211 |
+
self.bn1 = nn.BatchNorm2d(64)
|
212 |
+
self.relu = nn.ReLU(inplace=True)
|
213 |
+
self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
|
214 |
+
self.layer1 = self._make_layer(block, 64, layers[0])
|
215 |
+
self.layer2 = self._make_layer(block, 128, layers[1], stride=2)
|
216 |
+
self.layer3 = self._make_layer(block, 256, layers[2], stride=2)
|
217 |
+
self.layer4 = self._make_layer(block, 512, layers[3], stride=2)
|
218 |
+
self.avgpool = nn.AvgPool2d(7)
|
219 |
+
self.fc = nn.Linear(512 * block.expansion, num_classes)
|
220 |
+
|
221 |
+
for m in self.modules():
|
222 |
+
if isinstance(m, nn.Conv2d):
|
223 |
+
n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
|
224 |
+
m.weight.data.normal_(0, math.sqrt(2. / n))
|
225 |
+
elif isinstance(m, nn.BatchNorm2d):
|
226 |
+
m.weight.data.fill_(1)
|
227 |
+
m.bias.data.zero_()
|
228 |
+
|
229 |
+
def _make_layer(self, block, planes, blocks, stride=1):
|
230 |
+
downsample = None
|
231 |
+
if stride != 1 or self.inplanes != planes * block.expansion:
|
232 |
+
downsample = nn.Sequential(
|
233 |
+
nn.Conv2d(self.inplanes, planes * block.expansion,
|
234 |
+
kernel_size=1, stride=stride, bias=False),
|
235 |
+
nn.BatchNorm2d(planes * block.expansion),
|
236 |
+
)
|
237 |
+
|
238 |
+
layers = []
|
239 |
+
layers.append(block(self.inplanes, planes, stride, downsample))
|
240 |
+
self.inplanes = planes * block.expansion
|
241 |
+
for i in range(1, blocks):
|
242 |
+
layers.append(block(self.inplanes, planes))
|
243 |
+
|
244 |
+
return nn.Sequential(*layers)
|
245 |
+
|
246 |
+
def forward(self, x):
|
247 |
+
x = self.conv1(x)
|
248 |
+
x = self.bn1(x)
|
249 |
+
x = self.relu(x)
|
250 |
+
x = self.maxpool(x)
|
251 |
+
|
252 |
+
x = self.layer1(x)
|
253 |
+
x = self.layer2(x)
|
254 |
+
x = self.layer3(x)
|
255 |
+
x = self.layer4(x)
|
256 |
+
|
257 |
+
x = self.avgpool(x)
|
258 |
+
x = x.view(x.size(0), -1)
|
259 |
+
x = self.fc(x)
|
260 |
+
|
261 |
+
return x
|
face_detection/utils.py
ADDED
@@ -0,0 +1,313 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from __future__ import print_function
|
2 |
+
import os
|
3 |
+
import sys
|
4 |
+
import time
|
5 |
+
import torch
|
6 |
+
import math
|
7 |
+
import numpy as np
|
8 |
+
import cv2
|
9 |
+
|
10 |
+
|
11 |
+
def _gaussian(
|
12 |
+
size=3, sigma=0.25, amplitude=1, normalize=False, width=None,
|
13 |
+
height=None, sigma_horz=None, sigma_vert=None, mean_horz=0.5,
|
14 |
+
mean_vert=0.5):
|
15 |
+
# handle some defaults
|
16 |
+
if width is None:
|
17 |
+
width = size
|
18 |
+
if height is None:
|
19 |
+
height = size
|
20 |
+
if sigma_horz is None:
|
21 |
+
sigma_horz = sigma
|
22 |
+
if sigma_vert is None:
|
23 |
+
sigma_vert = sigma
|
24 |
+
center_x = mean_horz * width + 0.5
|
25 |
+
center_y = mean_vert * height + 0.5
|
26 |
+
gauss = np.empty((height, width), dtype=np.float32)
|
27 |
+
# generate kernel
|
28 |
+
for i in range(height):
|
29 |
+
for j in range(width):
|
30 |
+
gauss[i][j] = amplitude * math.exp(-(math.pow((j + 1 - center_x) / (
|
31 |
+
sigma_horz * width), 2) / 2.0 + math.pow((i + 1 - center_y) / (sigma_vert * height), 2) / 2.0))
|
32 |
+
if normalize:
|
33 |
+
gauss = gauss / np.sum(gauss)
|
34 |
+
return gauss
|
35 |
+
|
36 |
+
|
37 |
+
def draw_gaussian(image, point, sigma):
|
38 |
+
# Check if the gaussian is inside
|
39 |
+
ul = [math.floor(point[0] - 3 * sigma), math.floor(point[1] - 3 * sigma)]
|
40 |
+
br = [math.floor(point[0] + 3 * sigma), math.floor(point[1] + 3 * sigma)]
|
41 |
+
if (ul[0] > image.shape[1] or ul[1] > image.shape[0] or br[0] < 1 or br[1] < 1):
|
42 |
+
return image
|
43 |
+
size = 6 * sigma + 1
|
44 |
+
g = _gaussian(size)
|
45 |
+
g_x = [int(max(1, -ul[0])), int(min(br[0], image.shape[1])) - int(max(1, ul[0])) + int(max(1, -ul[0]))]
|
46 |
+
g_y = [int(max(1, -ul[1])), int(min(br[1], image.shape[0])) - int(max(1, ul[1])) + int(max(1, -ul[1]))]
|
47 |
+
img_x = [int(max(1, ul[0])), int(min(br[0], image.shape[1]))]
|
48 |
+
img_y = [int(max(1, ul[1])), int(min(br[1], image.shape[0]))]
|
49 |
+
assert (g_x[0] > 0 and g_y[1] > 0)
|
50 |
+
image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1]
|
51 |
+
] = image[img_y[0] - 1:img_y[1], img_x[0] - 1:img_x[1]] + g[g_y[0] - 1:g_y[1], g_x[0] - 1:g_x[1]]
|
52 |
+
image[image > 1] = 1
|
53 |
+
return image
|
54 |
+
|
55 |
+
|
56 |
+
def transform(point, center, scale, resolution, invert=False):
|
57 |
+
"""Generate and affine transformation matrix.
|
58 |
+
|
59 |
+
Given a set of points, a center, a scale and a targer resolution, the
|
60 |
+
function generates and affine transformation matrix. If invert is ``True``
|
61 |
+
it will produce the inverse transformation.
|
62 |
+
|
63 |
+
Arguments:
|
64 |
+
point {torch.tensor} -- the input 2D point
|
65 |
+
center {torch.tensor or numpy.array} -- the center around which to perform the transformations
|
66 |
+
scale {float} -- the scale of the face/object
|
67 |
+
resolution {float} -- the output resolution
|
68 |
+
|
69 |
+
Keyword Arguments:
|
70 |
+
invert {bool} -- define wherever the function should produce the direct or the
|
71 |
+
inverse transformation matrix (default: {False})
|
72 |
+
"""
|
73 |
+
_pt = torch.ones(3)
|
74 |
+
_pt[0] = point[0]
|
75 |
+
_pt[1] = point[1]
|
76 |
+
|
77 |
+
h = 200.0 * scale
|
78 |
+
t = torch.eye(3)
|
79 |
+
t[0, 0] = resolution / h
|
80 |
+
t[1, 1] = resolution / h
|
81 |
+
t[0, 2] = resolution * (-center[0] / h + 0.5)
|
82 |
+
t[1, 2] = resolution * (-center[1] / h + 0.5)
|
83 |
+
|
84 |
+
if invert:
|
85 |
+
t = torch.inverse(t)
|
86 |
+
|
87 |
+
new_point = (torch.matmul(t, _pt))[0:2]
|
88 |
+
|
89 |
+
return new_point.int()
|
90 |
+
|
91 |
+
|
92 |
+
def crop(image, center, scale, resolution=256.0):
|
93 |
+
"""Center crops an image or set of heatmaps
|
94 |
+
|
95 |
+
Arguments:
|
96 |
+
image {numpy.array} -- an rgb image
|
97 |
+
center {numpy.array} -- the center of the object, usually the same as of the bounding box
|
98 |
+
scale {float} -- scale of the face
|
99 |
+
|
100 |
+
Keyword Arguments:
|
101 |
+
resolution {float} -- the size of the output cropped image (default: {256.0})
|
102 |
+
|
103 |
+
Returns:
|
104 |
+
[type] -- [description]
|
105 |
+
""" # Crop around the center point
|
106 |
+
""" Crops the image around the center. Input is expected to be an np.ndarray """
|
107 |
+
ul = transform([1, 1], center, scale, resolution, True)
|
108 |
+
br = transform([resolution, resolution], center, scale, resolution, True)
|
109 |
+
# pad = math.ceil(torch.norm((ul - br).float()) / 2.0 - (br[0] - ul[0]) / 2.0)
|
110 |
+
if image.ndim > 2:
|
111 |
+
newDim = np.array([br[1] - ul[1], br[0] - ul[0],
|
112 |
+
image.shape[2]], dtype=np.int32)
|
113 |
+
newImg = np.zeros(newDim, dtype=np.uint8)
|
114 |
+
else:
|
115 |
+
newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
|
116 |
+
newImg = np.zeros(newDim, dtype=np.uint8)
|
117 |
+
ht = image.shape[0]
|
118 |
+
wd = image.shape[1]
|
119 |
+
newX = np.array(
|
120 |
+
[max(1, -ul[0] + 1), min(br[0], wd) - ul[0]], dtype=np.int32)
|
121 |
+
newY = np.array(
|
122 |
+
[max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
|
123 |
+
oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
|
124 |
+
oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
|
125 |
+
newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1]
|
126 |
+
] = image[oldY[0] - 1:oldY[1], oldX[0] - 1:oldX[1], :]
|
127 |
+
newImg = cv2.resize(newImg, dsize=(int(resolution), int(resolution)),
|
128 |
+
interpolation=cv2.INTER_LINEAR)
|
129 |
+
return newImg
|
130 |
+
|
131 |
+
|
132 |
+
def get_preds_fromhm(hm, center=None, scale=None):
|
133 |
+
"""Obtain (x,y) coordinates given a set of N heatmaps. If the center
|
134 |
+
and the scale is provided the function will return the points also in
|
135 |
+
the original coordinate frame.
|
136 |
+
|
137 |
+
Arguments:
|
138 |
+
hm {torch.tensor} -- the predicted heatmaps, of shape [B, N, W, H]
|
139 |
+
|
140 |
+
Keyword Arguments:
|
141 |
+
center {torch.tensor} -- the center of the bounding box (default: {None})
|
142 |
+
scale {float} -- face scale (default: {None})
|
143 |
+
"""
|
144 |
+
max, idx = torch.max(
|
145 |
+
hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2)
|
146 |
+
idx += 1
|
147 |
+
preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float()
|
148 |
+
preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1)
|
149 |
+
preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1)
|
150 |
+
|
151 |
+
for i in range(preds.size(0)):
|
152 |
+
for j in range(preds.size(1)):
|
153 |
+
hm_ = hm[i, j, :]
|
154 |
+
pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1
|
155 |
+
if pX > 0 and pX < 63 and pY > 0 and pY < 63:
|
156 |
+
diff = torch.FloatTensor(
|
157 |
+
[hm_[pY, pX + 1] - hm_[pY, pX - 1],
|
158 |
+
hm_[pY + 1, pX] - hm_[pY - 1, pX]])
|
159 |
+
preds[i, j].add_(diff.sign_().mul_(.25))
|
160 |
+
|
161 |
+
preds.add_(-.5)
|
162 |
+
|
163 |
+
preds_orig = torch.zeros(preds.size())
|
164 |
+
if center is not None and scale is not None:
|
165 |
+
for i in range(hm.size(0)):
|
166 |
+
for j in range(hm.size(1)):
|
167 |
+
preds_orig[i, j] = transform(
|
168 |
+
preds[i, j], center, scale, hm.size(2), True)
|
169 |
+
|
170 |
+
return preds, preds_orig
|
171 |
+
|
172 |
+
def get_preds_fromhm_batch(hm, centers=None, scales=None):
|
173 |
+
"""Obtain (x,y) coordinates given a set of N heatmaps. If the centers
|
174 |
+
and the scales is provided the function will return the points also in
|
175 |
+
the original coordinate frame.
|
176 |
+
|
177 |
+
Arguments:
|
178 |
+
hm {torch.tensor} -- the predicted heatmaps, of shape [B, N, W, H]
|
179 |
+
|
180 |
+
Keyword Arguments:
|
181 |
+
centers {torch.tensor} -- the centers of the bounding box (default: {None})
|
182 |
+
scales {float} -- face scales (default: {None})
|
183 |
+
"""
|
184 |
+
max, idx = torch.max(
|
185 |
+
hm.view(hm.size(0), hm.size(1), hm.size(2) * hm.size(3)), 2)
|
186 |
+
idx += 1
|
187 |
+
preds = idx.view(idx.size(0), idx.size(1), 1).repeat(1, 1, 2).float()
|
188 |
+
preds[..., 0].apply_(lambda x: (x - 1) % hm.size(3) + 1)
|
189 |
+
preds[..., 1].add_(-1).div_(hm.size(2)).floor_().add_(1)
|
190 |
+
|
191 |
+
for i in range(preds.size(0)):
|
192 |
+
for j in range(preds.size(1)):
|
193 |
+
hm_ = hm[i, j, :]
|
194 |
+
pX, pY = int(preds[i, j, 0]) - 1, int(preds[i, j, 1]) - 1
|
195 |
+
if pX > 0 and pX < 63 and pY > 0 and pY < 63:
|
196 |
+
diff = torch.FloatTensor(
|
197 |
+
[hm_[pY, pX + 1] - hm_[pY, pX - 1],
|
198 |
+
hm_[pY + 1, pX] - hm_[pY - 1, pX]])
|
199 |
+
preds[i, j].add_(diff.sign_().mul_(.25))
|
200 |
+
|
201 |
+
preds.add_(-.5)
|
202 |
+
|
203 |
+
preds_orig = torch.zeros(preds.size())
|
204 |
+
if centers is not None and scales is not None:
|
205 |
+
for i in range(hm.size(0)):
|
206 |
+
for j in range(hm.size(1)):
|
207 |
+
preds_orig[i, j] = transform(
|
208 |
+
preds[i, j], centers[i], scales[i], hm.size(2), True)
|
209 |
+
|
210 |
+
return preds, preds_orig
|
211 |
+
|
212 |
+
def shuffle_lr(parts, pairs=None):
|
213 |
+
"""Shuffle the points left-right according to the axis of symmetry
|
214 |
+
of the object.
|
215 |
+
|
216 |
+
Arguments:
|
217 |
+
parts {torch.tensor} -- a 3D or 4D object containing the
|
218 |
+
heatmaps.
|
219 |
+
|
220 |
+
Keyword Arguments:
|
221 |
+
pairs {list of integers} -- [order of the flipped points] (default: {None})
|
222 |
+
"""
|
223 |
+
if pairs is None:
|
224 |
+
pairs = [16, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
|
225 |
+
26, 25, 24, 23, 22, 21, 20, 19, 18, 17, 27, 28, 29, 30, 35,
|
226 |
+
34, 33, 32, 31, 45, 44, 43, 42, 47, 46, 39, 38, 37, 36, 41,
|
227 |
+
40, 54, 53, 52, 51, 50, 49, 48, 59, 58, 57, 56, 55, 64, 63,
|
228 |
+
62, 61, 60, 67, 66, 65]
|
229 |
+
if parts.ndimension() == 3:
|
230 |
+
parts = parts[pairs, ...]
|
231 |
+
else:
|
232 |
+
parts = parts[:, pairs, ...]
|
233 |
+
|
234 |
+
return parts
|
235 |
+
|
236 |
+
|
237 |
+
def flip(tensor, is_label=False):
|
238 |
+
"""Flip an image or a set of heatmaps left-right
|
239 |
+
|
240 |
+
Arguments:
|
241 |
+
tensor {numpy.array or torch.tensor} -- [the input image or heatmaps]
|
242 |
+
|
243 |
+
Keyword Arguments:
|
244 |
+
is_label {bool} -- [denote wherever the input is an image or a set of heatmaps ] (default: {False})
|
245 |
+
"""
|
246 |
+
if not torch.is_tensor(tensor):
|
247 |
+
tensor = torch.from_numpy(tensor)
|
248 |
+
|
249 |
+
if is_label:
|
250 |
+
tensor = shuffle_lr(tensor).flip(tensor.ndimension() - 1)
|
251 |
+
else:
|
252 |
+
tensor = tensor.flip(tensor.ndimension() - 1)
|
253 |
+
|
254 |
+
return tensor
|
255 |
+
|
256 |
+
# From pyzolib/paths.py (https://bitbucket.org/pyzo/pyzolib/src/tip/paths.py)
|
257 |
+
|
258 |
+
|
259 |
+
def appdata_dir(appname=None, roaming=False):
|
260 |
+
""" appdata_dir(appname=None, roaming=False)
|
261 |
+
|
262 |
+
Get the path to the application directory, where applications are allowed
|
263 |
+
to write user specific files (e.g. configurations). For non-user specific
|
264 |
+
data, consider using common_appdata_dir().
|
265 |
+
If appname is given, a subdir is appended (and created if necessary).
|
266 |
+
If roaming is True, will prefer a roaming directory (Windows Vista/7).
|
267 |
+
"""
|
268 |
+
|
269 |
+
# Define default user directory
|
270 |
+
userDir = os.getenv('FACEALIGNMENT_USERDIR', None)
|
271 |
+
if userDir is None:
|
272 |
+
userDir = os.path.expanduser('~')
|
273 |
+
if not os.path.isdir(userDir): # pragma: no cover
|
274 |
+
userDir = '/var/tmp' # issue #54
|
275 |
+
|
276 |
+
# Get system app data dir
|
277 |
+
path = None
|
278 |
+
if sys.platform.startswith('win'):
|
279 |
+
path1, path2 = os.getenv('LOCALAPPDATA'), os.getenv('APPDATA')
|
280 |
+
path = (path2 or path1) if roaming else (path1 or path2)
|
281 |
+
elif sys.platform.startswith('darwin'):
|
282 |
+
path = os.path.join(userDir, 'Library', 'Application Support')
|
283 |
+
# On Linux and as fallback
|
284 |
+
if not (path and os.path.isdir(path)):
|
285 |
+
path = userDir
|
286 |
+
|
287 |
+
# Maybe we should store things local to the executable (in case of a
|
288 |
+
# portable distro or a frozen application that wants to be portable)
|
289 |
+
prefix = sys.prefix
|
290 |
+
if getattr(sys, 'frozen', None):
|
291 |
+
prefix = os.path.abspath(os.path.dirname(sys.executable))
|
292 |
+
for reldir in ('settings', '../settings'):
|
293 |
+
localpath = os.path.abspath(os.path.join(prefix, reldir))
|
294 |
+
if os.path.isdir(localpath): # pragma: no cover
|
295 |
+
try:
|
296 |
+
open(os.path.join(localpath, 'test.write'), 'wb').close()
|
297 |
+
os.remove(os.path.join(localpath, 'test.write'))
|
298 |
+
except IOError:
|
299 |
+
pass # We cannot write in this directory
|
300 |
+
else:
|
301 |
+
path = localpath
|
302 |
+
break
|
303 |
+
|
304 |
+
# Get path specific for this app
|
305 |
+
if appname:
|
306 |
+
if path == userDir:
|
307 |
+
appname = '.' + appname.lstrip('.') # Make it a hidden directory
|
308 |
+
path = os.path.join(path, appname)
|
309 |
+
if not os.path.isdir(path): # pragma: no cover
|
310 |
+
os.mkdir(path)
|
311 |
+
|
312 |
+
# Done
|
313 |
+
return path
|
generate.py
ADDED
@@ -0,0 +1,399 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
''' consistent initial noise for video generation'''
|
2 |
+
import cv2
|
3 |
+
import os
|
4 |
+
from os.path import join, basename, dirname, splitext
|
5 |
+
import shutil
|
6 |
+
import argparse
|
7 |
+
import numpy as np
|
8 |
+
import random
|
9 |
+
import torch, torchvision
|
10 |
+
import subprocess
|
11 |
+
from audio import audio
|
12 |
+
import face_detection
|
13 |
+
from tqdm import tqdm
|
14 |
+
|
15 |
+
from guided_diffusion import dist_util, logger
|
16 |
+
from guided_diffusion.resample import create_named_schedule_sampler
|
17 |
+
from guided_diffusion.script_util import (
|
18 |
+
tfg_model_and_diffusion_defaults,
|
19 |
+
tfg_create_model_and_diffusion,
|
20 |
+
args_to_dict,
|
21 |
+
add_dict_to_argparser,
|
22 |
+
)
|
23 |
+
|
24 |
+
from guided_diffusion.tfg_data_util import (
|
25 |
+
tfg_process_batch,
|
26 |
+
)
|
27 |
+
|
28 |
+
def get_frame_id(frame):
|
29 |
+
return int(basename(frame).split('.')[0])
|
30 |
+
|
31 |
+
def crop_audio_window(spec, start_frame, args ):
|
32 |
+
if type(start_frame) == int:
|
33 |
+
start_frame_num = start_frame
|
34 |
+
else:
|
35 |
+
start_frame_num = get_frame_id(start_frame)
|
36 |
+
start_idx = int(args.mel_steps_per_sec * (start_frame_num / float(args.video_fps)))
|
37 |
+
end_idx = start_idx + args.syncnet_mel_step_size
|
38 |
+
return spec[start_idx : end_idx, :]
|
39 |
+
|
40 |
+
def load_all_indiv_mels(path, args):
|
41 |
+
in_path = path
|
42 |
+
out_dir = join(args.sample_path, "temp", basename(in_path).replace(".mp4", ""))
|
43 |
+
os.makedirs(out_dir, exist_ok= True)
|
44 |
+
out_path = join(out_dir, "audio.wav")
|
45 |
+
command2 = 'ffmpeg -loglevel error -y -i {} -strict -2 {}'.format(in_path, out_path)
|
46 |
+
subprocess.call(command2, shell=True)
|
47 |
+
wav = audio.load_wav(out_path, args.sample_rate)
|
48 |
+
orig_mel = audio.melspectrogram(wav).T
|
49 |
+
|
50 |
+
all_indiv_mels = []
|
51 |
+
# i=0
|
52 |
+
i=1
|
53 |
+
while True:
|
54 |
+
m = crop_audio_window(orig_mel.copy(), max(i - args.syncnet_T//2,0), args)
|
55 |
+
if (m.shape[0] != args.syncnet_mel_step_size):
|
56 |
+
break
|
57 |
+
all_indiv_mels.append(m.T)
|
58 |
+
i+=1
|
59 |
+
|
60 |
+
#clean up
|
61 |
+
shutil.rmtree(join(args.sample_path, "temp"))
|
62 |
+
|
63 |
+
return all_indiv_mels, wav
|
64 |
+
|
65 |
+
def load_video_frames(path, args):
|
66 |
+
in_path = path
|
67 |
+
out_dir = join(args.sample_path, "temp", basename(in_path).replace(".mp4", ""), "image")
|
68 |
+
os.makedirs(out_dir, exist_ok= True)
|
69 |
+
|
70 |
+
|
71 |
+
command = "ffmpeg -loglevel error -y -i {} -vf fps={} -q:v 2 -qmin 1 {}/%05d.jpg".format(in_path, args.video_fps, out_dir)
|
72 |
+
subprocess.call(command, shell=True)
|
73 |
+
|
74 |
+
video_frames=[]
|
75 |
+
for i, img_name in enumerate(sorted(os.listdir(out_dir))):
|
76 |
+
img_path=join(out_dir, img_name)
|
77 |
+
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
|
78 |
+
video_frames.append(img)
|
79 |
+
|
80 |
+
#clean up
|
81 |
+
shutil.rmtree(join(args.sample_path, "temp"))
|
82 |
+
|
83 |
+
|
84 |
+
return video_frames
|
85 |
+
|
86 |
+
|
87 |
+
def get_smoothened_boxes(boxes, T):
|
88 |
+
for i in range(len(boxes)):
|
89 |
+
if i + T > len(boxes):
|
90 |
+
window = boxes[len(boxes) - T:]
|
91 |
+
else:
|
92 |
+
window = boxes[i : i + T]
|
93 |
+
boxes[i] = np.mean(window, axis=0)
|
94 |
+
return boxes
|
95 |
+
|
96 |
+
def my_voxceleb2_crop(img):
|
97 |
+
return img[:-int(img.shape[0]*2.36/8) , int(img.shape[1]*1.8/8): -int(img.shape[1]*1.8/8)]
|
98 |
+
|
99 |
+
def my_voxceleb2_crop_bboxs(img):
|
100 |
+
return 0,img.shape[0]-int(img.shape[0]*2.36/8), int(img.shape[1]*1.8/8), img.shape[1]-int(img.shape[1]*1.8/8)
|
101 |
+
|
102 |
+
def face_detect(images, detector, args, resize=False):
|
103 |
+
batch_size = args.face_det_batch_size
|
104 |
+
|
105 |
+
while 1:
|
106 |
+
predictions = []
|
107 |
+
try:
|
108 |
+
for i in range(0, len(images), batch_size):
|
109 |
+
predictions.extend(detector.get_detections_for_batch(np.array(images[i:i + batch_size])))
|
110 |
+
except RuntimeError:
|
111 |
+
if batch_size == 1:
|
112 |
+
raise RuntimeError('Image too big to run face detection on GPU')
|
113 |
+
batch_size //= 2
|
114 |
+
args.face_det_batch_size = batch_size
|
115 |
+
print('Recovering from OOM error; New batch size: {}'.format(batch_size))
|
116 |
+
continue
|
117 |
+
break
|
118 |
+
|
119 |
+
results = []
|
120 |
+
if type(args.pads) == str :
|
121 |
+
args.pads = [int(x) for x in args.pads.split(",")]
|
122 |
+
pady1, pady2, padx1, padx2 = args.pads
|
123 |
+
for rect, image in zip(predictions, images):
|
124 |
+
if rect is None:
|
125 |
+
raise ValueError('Face not detected!')
|
126 |
+
|
127 |
+
y1 = max(0, rect[1] - pady1)
|
128 |
+
y2 = min(image.shape[0], rect[3] + pady2)
|
129 |
+
x1 = max(0, rect[0] - padx1)
|
130 |
+
x2 = min(image.shape[1], rect[2] + padx2)
|
131 |
+
|
132 |
+
results.append([x1, y1, x2, y2])
|
133 |
+
|
134 |
+
boxes = get_smoothened_boxes(np.array(results), T=5)
|
135 |
+
|
136 |
+
if resize:
|
137 |
+
if args.is_voxceleb2:
|
138 |
+
results = [[cv2.resize(my_voxceleb2_crop(image),(args.image_size, args.image_size)), my_voxceleb2_crop_bboxs(image), True] for image, (x1, y1, x2, y2) in zip(images, boxes)]
|
139 |
+
else:
|
140 |
+
results = [[cv2.resize(image[y1: y2, x1:x2],(args.image_size, args.image_size)), (y1, y2, x1, x2), True] for image, (x1, y1, x2, y2) in zip(images, boxes)]
|
141 |
+
else:
|
142 |
+
results = [[image[y1: y2, x1:x2], (y1, y2, x1, x2), True] for image, (x1, y1, x2, y2) in zip(images, boxes)]
|
143 |
+
return results
|
144 |
+
|
145 |
+
def normalise(tensor):
|
146 |
+
""" [-1,1]->[0,1]"""
|
147 |
+
return ((tensor+1)*0.5).clamp(0,1)
|
148 |
+
|
149 |
+
def normalise2(tensor):
|
150 |
+
""" [0,1]->[-1,1]"""
|
151 |
+
return (tensor*2-1).clamp(-1,1)
|
152 |
+
|
153 |
+
|
154 |
+
def sample_batch(batch, model, diffusion, args):
|
155 |
+
B, F, C, H, W = batch[f'image'].shape
|
156 |
+
sample_shape = (B*F, C, H, W)
|
157 |
+
|
158 |
+
|
159 |
+
#generate fixed noise
|
160 |
+
init_noise = None
|
161 |
+
if args.sampling_seed:
|
162 |
+
|
163 |
+
state = torch.get_rng_state()
|
164 |
+
torch.manual_seed(args.sampling_seed)
|
165 |
+
torch.cuda.manual_seed_all(args.sampling_seed)
|
166 |
+
init_noise = torch.randn((1,C,H,W))
|
167 |
+
#repeat noise for all frames
|
168 |
+
init_noise = init_noise.repeat(B*F,1,1,1)
|
169 |
+
torch.set_rng_state(state)
|
170 |
+
|
171 |
+
|
172 |
+
img_batch, model_kwargs = tfg_process_batch(batch, args.face_hide_percentage,
|
173 |
+
use_ref=args.use_ref,
|
174 |
+
use_audio=args.use_audio,
|
175 |
+
# sampling_use_gt_for_ref=args.sampling_use_gt_for_ref,
|
176 |
+
noise=init_noise)
|
177 |
+
|
178 |
+
|
179 |
+
img_batch = img_batch.to(dist_util.dev())
|
180 |
+
model_kwargs = {k: v.to(dist_util.dev()) for k,v in model_kwargs.items()}
|
181 |
+
init_noise = init_noise.to(dist_util.dev()) if init_noise is not None else None
|
182 |
+
|
183 |
+
sample_fn = (
|
184 |
+
diffusion.p_sample_loop if not args.use_ddim else diffusion.ddim_sample_loop
|
185 |
+
)
|
186 |
+
sample = sample_fn(
|
187 |
+
model,
|
188 |
+
sample_shape,
|
189 |
+
clip_denoised=args.clip_denoised,
|
190 |
+
model_kwargs=model_kwargs,
|
191 |
+
noise = init_noise
|
192 |
+
)
|
193 |
+
return sample, img_batch, model_kwargs
|
194 |
+
|
195 |
+
|
196 |
+
def generate(video_path, audio_path, model, diffusion, detector, args, out_path=None, save_orig=True):
|
197 |
+
video_frames = load_video_frames(video_path, args)
|
198 |
+
try:
|
199 |
+
face_det_results = face_detect(video_frames.copy(), detector, args, resize=True)
|
200 |
+
except Exception as e:
|
201 |
+
print("Error:", e, video_path, audio_path)
|
202 |
+
import traceback
|
203 |
+
print(traceback.format_exc())
|
204 |
+
wrong_all_indiv_mels, wrong_audio_wavform = load_all_indiv_mels(audio_path, args)
|
205 |
+
|
206 |
+
min_frames = min(len(video_frames), len(wrong_all_indiv_mels))
|
207 |
+
video_frames = video_frames[:min_frames]
|
208 |
+
face_det_results = face_det_results[:min_frames]
|
209 |
+
face_bboxes = [face_det_results[i][1] for i in range(min_frames)]
|
210 |
+
face_frames = torch.FloatTensor(np.transpose(np.asarray([face_det_results[i][0] for i in range(min_frames)], dtype=np.float32)/255.,(0,3,1,2)))#[N, C, H, W]
|
211 |
+
wrong_all_indiv_mels = torch.FloatTensor(np.asarray(wrong_all_indiv_mels[:min_frames])).unsqueeze(1) #[N, 1, h, w]
|
212 |
+
|
213 |
+
if save_orig:
|
214 |
+
if out_path is None:
|
215 |
+
out_path_orig = os.path.join(args.sample_path, splitext(basename(video_path))[0]+"_"+ splitext(basename(audio_path))[0]+"_orig.mp4")
|
216 |
+
else:
|
217 |
+
out_path_orig = out_path.replace(".mp4", "_orig.mp4")
|
218 |
+
torchvision.io.write_video(
|
219 |
+
out_path_orig,
|
220 |
+
video_array=torch.from_numpy(np.array(video_frames)), fps = args.video_fps, video_codec='libx264',
|
221 |
+
audio_array=torch.from_numpy(wrong_audio_wavform).unsqueeze(0), audio_fps=args.sample_rate, audio_codec='aac'
|
222 |
+
)
|
223 |
+
|
224 |
+
if args.sampling_ref_type=='gt':
|
225 |
+
ref_frames = face_frames.clone()
|
226 |
+
elif args.sampling_ref_type=='first_frame':
|
227 |
+
ref_frames = face_frames[0:1].repeat(len(face_frames),1,1,1)
|
228 |
+
elif args.sampling_ref_type=='random':
|
229 |
+
rand_idx = random.Random(args.sampling_seed).randint(0, len(face_frames)-1)
|
230 |
+
ref_frames = face_frames[rand_idx:rand_idx+1].repeat(len(face_frames),1,1,1)
|
231 |
+
|
232 |
+
if args.sampling_input_type=='first_frame':
|
233 |
+
face_frames = face_frames[0:1].repeat(len(face_frames),1,1,1)
|
234 |
+
video_frames = np.array(video_frames[0:1]*len(video_frames))
|
235 |
+
face_bboxes = np.array(face_bboxes[0:1]*len(face_bboxes))
|
236 |
+
|
237 |
+
|
238 |
+
generated_video_frames = []
|
239 |
+
b_s = args.sampling_batch_size
|
240 |
+
for i in range(0,min_frames, b_s*args.nframes):
|
241 |
+
video_frames_batch = video_frames[i:i+b_s*args.nframes]
|
242 |
+
face_bboxes_batch = face_bboxes[i:i+b_s*args.nframes]
|
243 |
+
|
244 |
+
try:
|
245 |
+
img_batch = face_frames[i:i+b_s*args.nframes] #[BF, C, H, W]
|
246 |
+
img_batch = img_batch.reshape(-1, args.nframes, img_batch.size(-3), img_batch.size(-2), img_batch.size(-1))
|
247 |
+
ref_batch = ref_frames[i:i+b_s*args.nframes]
|
248 |
+
ref_batch = ref_batch.reshape(-1, args.nframes, ref_batch.size(-3), ref_batch.size(-2), ref_batch.size(-1))
|
249 |
+
wrong_indiv_mel_batch = wrong_all_indiv_mels[i:i+b_s*args.nframes] #[BF, 1, h, w]
|
250 |
+
wrong_indiv_mel_batch = wrong_indiv_mel_batch.reshape(-1, args.nframes, wrong_indiv_mel_batch.size(-3),wrong_indiv_mel_batch.size(-2),wrong_indiv_mel_batch.size(-1))
|
251 |
+
except: # of the last batch, if B*F % nframes!=0, then the above reshape throws error
|
252 |
+
# but internally everything is going to get converted to BF
|
253 |
+
# ie. (B,F, C, H, W) -> (B*F, C, H, W) but (B*F, 1, C, H, W) -> (B*F, C, H, W)
|
254 |
+
img_batch = face_frames[i:i+b_s*args.nframes] #[BF, C, H, W]
|
255 |
+
img_batch = img_batch.reshape(-1, 1, img_batch.size(-3), img_batch.size(-2), img_batch.size(-1))
|
256 |
+
ref_batch = ref_frames[i:i+b_s*args.nframes]
|
257 |
+
ref_batch = ref_batch.reshape(-1, 1, ref_batch.size(-3), ref_batch.size(-2), ref_batch.size(-1))
|
258 |
+
wrong_indiv_mel_batch = wrong_all_indiv_mels[i:i+b_s*args.nframes] #[BF, 1, h, w]
|
259 |
+
wrong_indiv_mel_batch = wrong_indiv_mel_batch.reshape(-1, 1, wrong_indiv_mel_batch.size(-3),wrong_indiv_mel_batch.size(-2),wrong_indiv_mel_batch.size(-1))
|
260 |
+
|
261 |
+
|
262 |
+
batch = {"image":img_batch,
|
263 |
+
"ref_img":ref_batch,
|
264 |
+
"indiv_mels":wrong_indiv_mel_batch}
|
265 |
+
|
266 |
+
sample, img_batch, model_kwargs = sample_batch(batch, model, diffusion, args)
|
267 |
+
mask = model_kwargs['mask']
|
268 |
+
recon_batch = sample * mask + (1. -mask)*img_batch #[BF, C, H, W]
|
269 |
+
recon_batch = (normalise(recon_batch)*255).cpu().numpy().transpose(0,2,3,1) #[-1,1] -> [0,255]
|
270 |
+
|
271 |
+
for g,v,b in zip(recon_batch, video_frames_batch, face_bboxes_batch):
|
272 |
+
y1, y2, x1, x2 = b
|
273 |
+
g = cv2.resize(g.astype(np.uint8), (x2 - x1, y2 - y1))
|
274 |
+
v[y1:y2, x1:x2] = g
|
275 |
+
generated_video_frames.append(v)
|
276 |
+
|
277 |
+
|
278 |
+
|
279 |
+
print(wrong_audio_wavform.shape, np.array(generated_video_frames).shape)
|
280 |
+
min_time = len(generated_video_frames)/args.video_fps # because video is already smaller because it got chopped accoding to the mel array length
|
281 |
+
wrong_audio_wavform = wrong_audio_wavform[:int(min_time*args.sample_rate)]
|
282 |
+
print(wrong_audio_wavform.shape, np.array(generated_video_frames).shape)
|
283 |
+
if out_path is None:
|
284 |
+
out_path = os.path.join(args.sample_path, splitext(basename(video_path))[0]+"_"+ splitext(basename(audio_path))[0]+".mp4")
|
285 |
+
torchvision.io.write_video(
|
286 |
+
out_path,
|
287 |
+
video_array=torch.from_numpy(np.array(generated_video_frames)), fps = args.video_fps, video_codec='libx264',
|
288 |
+
audio_array=torch.from_numpy(wrong_audio_wavform).unsqueeze(0), audio_fps=args.sample_rate, audio_codec='aac'
|
289 |
+
)
|
290 |
+
|
291 |
+
|
292 |
+
|
293 |
+
|
294 |
+
|
295 |
+
def generate_from_filelist(test_video_dir, filelist, model, diffusion, detector, args):
|
296 |
+
video_names = []
|
297 |
+
audio_names = []
|
298 |
+
with open(filelist, "r") as f:
|
299 |
+
lines = f.readlines()
|
300 |
+
for line in tqdm(lines):
|
301 |
+
try:
|
302 |
+
audio_name, video_name = line.strip().split()
|
303 |
+
audio_path = join(test_video_dir, audio_name+'.mp4')
|
304 |
+
video_path = join(test_video_dir, video_name+'.mp4')
|
305 |
+
out_path = join(args.sample_path,audio_name.replace('/','.')+"_"+video_name.replace('/','.')+".mp4")
|
306 |
+
generate(video_path, audio_path, model, diffusion, detector, args, out_path=out_path ,save_orig=args.save_orig)
|
307 |
+
except Exception as e:
|
308 |
+
print("Error:", e, video_path, audio_path)
|
309 |
+
import traceback
|
310 |
+
print(traceback.format_exc())
|
311 |
+
|
312 |
+
|
313 |
+
|
314 |
+
def main():
|
315 |
+
args = create_argparser().parse_args()
|
316 |
+
dist_util.setup_dist()
|
317 |
+
logger.configure(dir=args.sample_path, format_strs=["stdout", "log"])
|
318 |
+
|
319 |
+
logger.log("creating model...")
|
320 |
+
model, diffusion = tfg_create_model_and_diffusion(
|
321 |
+
**args_to_dict(args, tfg_model_and_diffusion_defaults().keys())
|
322 |
+
)
|
323 |
+
print("Model Loaded")
|
324 |
+
model.load_state_dict(
|
325 |
+
dist_util.load_state_dict(args.model_path, map_location='cpu')
|
326 |
+
)
|
327 |
+
model.to(dist_util.dev())
|
328 |
+
if args.use_fp16:
|
329 |
+
model.convert_to_fp16()
|
330 |
+
model.eval()
|
331 |
+
|
332 |
+
detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cuda' if torch.cuda.is_available() else 'cpu')
|
333 |
+
|
334 |
+
if args.generate_from_filelist:
|
335 |
+
generate_from_filelist(args.test_video_dir, args.filelist, model, diffusion, detector, args)
|
336 |
+
else:
|
337 |
+
generate(args.video_path, args.audio_path, model, diffusion, detector, args, out_path=args.out_path, save_orig=args.save_orig)
|
338 |
+
|
339 |
+
|
340 |
+
def create_argparser():
|
341 |
+
defaults = dict(
|
342 |
+
# generate from a single audio-video pair
|
343 |
+
generate_from_filelist = False,
|
344 |
+
video_path = "",
|
345 |
+
audio_path = "",
|
346 |
+
out_path = None,
|
347 |
+
save_orig = True,
|
348 |
+
|
349 |
+
#generate from filelist : generate_from_filelist = True
|
350 |
+
test_video_dir = "test_videos",
|
351 |
+
filelist = "test_filelist.txt",
|
352 |
+
|
353 |
+
|
354 |
+
use_fp16 = True,
|
355 |
+
#tfg specific
|
356 |
+
face_hide_percentage=0.5,
|
357 |
+
use_ref=False,
|
358 |
+
use_audio=False,
|
359 |
+
audio_as_style=False,
|
360 |
+
audio_as_style_encoder_mlp=False,
|
361 |
+
|
362 |
+
#data args
|
363 |
+
nframes=1,
|
364 |
+
nrefer=0,
|
365 |
+
image_size=128,
|
366 |
+
syncnet_T = 5,
|
367 |
+
syncnet_mel_step_size = 16,
|
368 |
+
audio_frames_per_video = 16, #for tfg model, we use sound corresponding to 5 frames centred at that frame
|
369 |
+
audio_dim=80,
|
370 |
+
is_voxceleb2=True,
|
371 |
+
|
372 |
+
video_fps=25,
|
373 |
+
sample_rate=16000, #audio sampling rate
|
374 |
+
mel_steps_per_sec=80.,
|
375 |
+
|
376 |
+
#sampling args
|
377 |
+
clip_denoised=True, # not used in training
|
378 |
+
sampling_batch_size=2,
|
379 |
+
use_ddim=False,
|
380 |
+
model_path="",
|
381 |
+
sample_path="d2l_gen",
|
382 |
+
sample_partition="",
|
383 |
+
sampling_seed=None,
|
384 |
+
sampling_use_gt_for_ref=False,
|
385 |
+
sampling_ref_type='gt', #one of ['gt', 'first_frame', 'random']
|
386 |
+
sampling_input_type='gt', #one of ['gt', 'first_frame']
|
387 |
+
|
388 |
+
# face detection args
|
389 |
+
face_det_batch_size=64,
|
390 |
+
pads = "0,0,0,0"
|
391 |
+
)
|
392 |
+
defaults.update(tfg_model_and_diffusion_defaults())
|
393 |
+
parser = argparse.ArgumentParser()
|
394 |
+
add_dict_to_argparser(parser, defaults)
|
395 |
+
return parser
|
396 |
+
|
397 |
+
|
398 |
+
if __name__=="__main__":
|
399 |
+
main()
|
generate_dist.py
ADDED
@@ -0,0 +1,428 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
''' consistent initial noise for video generation'''
|
2 |
+
import cv2
|
3 |
+
import os
|
4 |
+
from os.path import join, basename, dirname, splitext
|
5 |
+
import shutil
|
6 |
+
import argparse
|
7 |
+
import numpy as np
|
8 |
+
import random
|
9 |
+
import torch, torchvision
|
10 |
+
import subprocess
|
11 |
+
from audio import audio
|
12 |
+
import face_detection
|
13 |
+
from tqdm import tqdm
|
14 |
+
from torch.nn.parallel.distributed import DistributedDataParallel as DDP
|
15 |
+
from guided_diffusion import dist_util, logger
|
16 |
+
from guided_diffusion.resample import create_named_schedule_sampler
|
17 |
+
from guided_diffusion.script_util import (
|
18 |
+
tfg_model_and_diffusion_defaults,
|
19 |
+
tfg_create_model_and_diffusion,
|
20 |
+
args_to_dict,
|
21 |
+
add_dict_to_argparser,
|
22 |
+
)
|
23 |
+
from time import time
|
24 |
+
import torch.distributed as dist
|
25 |
+
from guided_diffusion.tfg_data_util import (
|
26 |
+
tfg_process_batch,
|
27 |
+
)
|
28 |
+
|
29 |
+
def get_frame_id(frame):
|
30 |
+
return int(basename(frame).split('.')[0])
|
31 |
+
|
32 |
+
def crop_audio_window(spec, start_frame, args ):
|
33 |
+
if type(start_frame) == int:
|
34 |
+
start_frame_num = start_frame
|
35 |
+
else:
|
36 |
+
start_frame_num = get_frame_id(start_frame)
|
37 |
+
start_idx = int(args.mel_steps_per_sec * (start_frame_num / float(args.video_fps)))
|
38 |
+
end_idx = start_idx + args.syncnet_mel_step_size
|
39 |
+
return spec[start_idx : end_idx, :]
|
40 |
+
|
41 |
+
def load_all_indiv_mels(path, args):
|
42 |
+
in_path = path
|
43 |
+
out_dir = join(args.sample_path, "temp",str(dist.get_rank()), basename(in_path).replace(".mp4", ""))
|
44 |
+
os.makedirs(out_dir, exist_ok= True)
|
45 |
+
out_path = join(out_dir, "audio.wav")
|
46 |
+
command2 = 'ffmpeg -loglevel error -y -i {} -strict -2 {}'.format(in_path, out_path)
|
47 |
+
subprocess.call(command2, shell=True)
|
48 |
+
wav = audio.load_wav(out_path, args.sample_rate)
|
49 |
+
orig_mel = audio.melspectrogram(wav).T
|
50 |
+
|
51 |
+
all_indiv_mels = []
|
52 |
+
# i=0
|
53 |
+
i=1
|
54 |
+
while True:
|
55 |
+
m = crop_audio_window(orig_mel.copy(), max(i - args.syncnet_T//2,0), args)
|
56 |
+
if (m.shape[0] != args.syncnet_mel_step_size):
|
57 |
+
break
|
58 |
+
all_indiv_mels.append(m.T)
|
59 |
+
i+=1
|
60 |
+
|
61 |
+
#clean up
|
62 |
+
shutil.rmtree(join(args.sample_path, "temp", str(dist.get_rank())))
|
63 |
+
|
64 |
+
return all_indiv_mels, wav
|
65 |
+
|
66 |
+
def load_video_frames(path, args):
|
67 |
+
in_path = path
|
68 |
+
out_dir = join(args.sample_path, "temp", str(dist.get_rank()), basename(in_path).replace(".mp4", ""), "image")
|
69 |
+
os.makedirs(out_dir, exist_ok= True)
|
70 |
+
|
71 |
+
|
72 |
+
command = "ffmpeg -loglevel error -y -i {} -vf fps={} -q:v 2 -qmin 1 {}/%05d.jpg".format(in_path, args.video_fps, out_dir)
|
73 |
+
subprocess.call(command, shell=True)
|
74 |
+
|
75 |
+
video_frames=[]
|
76 |
+
for i, img_name in enumerate(sorted(os.listdir(out_dir))):
|
77 |
+
img_path=join(out_dir, img_name)
|
78 |
+
img = cv2.cvtColor(cv2.imread(img_path), cv2.COLOR_BGR2RGB)
|
79 |
+
video_frames.append(img)
|
80 |
+
|
81 |
+
#clean up
|
82 |
+
shutil.rmtree(join(args.sample_path, "temp", str(dist.get_rank())))
|
83 |
+
|
84 |
+
|
85 |
+
return video_frames
|
86 |
+
|
87 |
+
|
88 |
+
def get_smoothened_boxes(boxes, T):
|
89 |
+
for i in range(len(boxes)):
|
90 |
+
if i + T > len(boxes):
|
91 |
+
window = boxes[len(boxes) - T:]
|
92 |
+
else:
|
93 |
+
window = boxes[i : i + T]
|
94 |
+
boxes[i] = np.mean(window, axis=0)
|
95 |
+
return boxes
|
96 |
+
|
97 |
+
def my_voxceleb2_crop(img):
|
98 |
+
return img[:-int(img.shape[0]*2.36/8) , int(img.shape[1]*1.8/8): -int(img.shape[1]*1.8/8)]
|
99 |
+
|
100 |
+
def my_voxceleb2_crop_bboxs(img):
|
101 |
+
return 0,img.shape[0]-int(img.shape[0]*2.36/8), int(img.shape[1]*1.8/8), img.shape[1]-int(img.shape[1]*1.8/8)
|
102 |
+
|
103 |
+
def face_detect(images, detector, args, resize=False):
|
104 |
+
batch_size = args.face_det_batch_size
|
105 |
+
|
106 |
+
while 1:
|
107 |
+
predictions = []
|
108 |
+
try:
|
109 |
+
for i in range(0, len(images), batch_size):
|
110 |
+
predictions.extend(detector.get_detections_for_batch(np.array(images[i:i + batch_size])))
|
111 |
+
except RuntimeError:
|
112 |
+
if batch_size == 1:
|
113 |
+
raise RuntimeError('Image too big to run face detection on GPU')
|
114 |
+
batch_size //= 2
|
115 |
+
args.face_det_batch_size = batch_size
|
116 |
+
print('Recovering from OOM error; New batch size: {}'.format(batch_size))
|
117 |
+
continue
|
118 |
+
break
|
119 |
+
|
120 |
+
results = []
|
121 |
+
if type(args.pads) == str :
|
122 |
+
args.pads = [int(x) for x in args.pads.split(",")]
|
123 |
+
pady1, pady2, padx1, padx2 = args.pads
|
124 |
+
for rect, image in zip(predictions, images):
|
125 |
+
if rect is None:
|
126 |
+
raise ValueError('Face not detected!')
|
127 |
+
|
128 |
+
y1 = max(0, rect[1] - pady1)
|
129 |
+
y2 = min(image.shape[0], rect[3] + pady2)
|
130 |
+
x1 = max(0, rect[0] - padx1)
|
131 |
+
x2 = min(image.shape[1], rect[2] + padx2)
|
132 |
+
|
133 |
+
results.append([x1, y1, x2, y2])
|
134 |
+
|
135 |
+
boxes = get_smoothened_boxes(np.array(results), T=5)
|
136 |
+
|
137 |
+
if resize:
|
138 |
+
if args.is_voxceleb2:
|
139 |
+
results = [[cv2.resize(my_voxceleb2_crop(image),(args.image_size, args.image_size)), my_voxceleb2_crop_bboxs(image), True] for image, (x1, y1, x2, y2) in zip(images, boxes)]
|
140 |
+
else:
|
141 |
+
results = [[cv2.resize(image[y1: y2, x1:x2],(args.image_size, args.image_size)), (y1, y2, x1, x2), True] for image, (x1, y1, x2, y2) in zip(images, boxes)]
|
142 |
+
else:
|
143 |
+
results = [[image[y1: y2, x1:x2], (y1, y2, x1, x2), True] for image, (x1, y1, x2, y2) in zip(images, boxes)]
|
144 |
+
return results
|
145 |
+
|
146 |
+
def normalise(tensor):
|
147 |
+
""" [-1,1]->[0,1]"""
|
148 |
+
return ((tensor+1)*0.5).clamp(0,1)
|
149 |
+
|
150 |
+
def normalise2(tensor):
|
151 |
+
""" [0,1]->[-1,1]"""
|
152 |
+
return (tensor*2-1).clamp(-1,1)
|
153 |
+
|
154 |
+
|
155 |
+
def sample_batch(batch, model, diffusion, args):
|
156 |
+
B, F, C, H, W = batch[f'image'].shape
|
157 |
+
sample_shape = (B*F, C, H, W)
|
158 |
+
|
159 |
+
|
160 |
+
#generate fixed noise
|
161 |
+
init_noise = None
|
162 |
+
if args.sampling_seed:
|
163 |
+
|
164 |
+
state = torch.get_rng_state()
|
165 |
+
torch.manual_seed(args.sampling_seed)
|
166 |
+
torch.cuda.manual_seed_all(args.sampling_seed)
|
167 |
+
init_noise = torch.randn((1,C,H,W))
|
168 |
+
#repeat noise for all frames
|
169 |
+
init_noise = init_noise.repeat(B*F,1,1,1)
|
170 |
+
torch.set_rng_state(state)
|
171 |
+
|
172 |
+
|
173 |
+
img_batch, model_kwargs = tfg_process_batch(batch, args.face_hide_percentage,
|
174 |
+
use_ref=args.use_ref,
|
175 |
+
use_audio=args.use_audio,
|
176 |
+
# sampling_use_gt_for_ref=args.sampling_use_gt_for_ref,
|
177 |
+
noise=init_noise)
|
178 |
+
|
179 |
+
|
180 |
+
img_batch = img_batch.to(dist_util.dev())
|
181 |
+
model_kwargs = {k: v.to(dist_util.dev()) for k,v in model_kwargs.items()}
|
182 |
+
init_noise = init_noise.to(dist_util.dev()) if init_noise is not None else None
|
183 |
+
|
184 |
+
sample_fn = (
|
185 |
+
diffusion.p_sample_loop if not args.use_ddim else diffusion.ddim_sample_loop
|
186 |
+
)
|
187 |
+
sample = sample_fn(
|
188 |
+
model,
|
189 |
+
sample_shape,
|
190 |
+
clip_denoised=args.clip_denoised,
|
191 |
+
model_kwargs=model_kwargs,
|
192 |
+
noise = init_noise
|
193 |
+
)
|
194 |
+
return sample, img_batch, model_kwargs
|
195 |
+
|
196 |
+
|
197 |
+
def generate(video_path, audio_path, model, diffusion, detector, args, out_path=None, save_orig=True):
|
198 |
+
video_frames = load_video_frames(video_path, args)
|
199 |
+
try:
|
200 |
+
face_det_results = face_detect(video_frames.copy(), detector, args, resize=True)
|
201 |
+
except Exception as e:
|
202 |
+
print("Error:", e, video_path, audio_path)
|
203 |
+
import traceback
|
204 |
+
print(traceback.format_exc())
|
205 |
+
wrong_all_indiv_mels, wrong_audio_wavform = load_all_indiv_mels(audio_path, args)
|
206 |
+
|
207 |
+
min_frames = min(len(video_frames), len(wrong_all_indiv_mels))
|
208 |
+
video_frames = video_frames[:min_frames]
|
209 |
+
face_det_results = face_det_results[:min_frames]
|
210 |
+
face_bboxes = [face_det_results[i][1] for i in range(min_frames)]
|
211 |
+
face_frames = torch.FloatTensor(np.transpose(np.asarray([face_det_results[i][0] for i in range(min_frames)], dtype=np.float32)/255.,(0,3,1,2)))#[N, C, H, W]
|
212 |
+
wrong_all_indiv_mels = torch.FloatTensor(np.asarray(wrong_all_indiv_mels[:min_frames])).unsqueeze(1) #[N, 1, h, w]
|
213 |
+
|
214 |
+
if save_orig:
|
215 |
+
if out_path is None:
|
216 |
+
out_path_orig = os.path.join(args.sample_path, splitext(basename(video_path))[0]+"_"+ splitext(basename(audio_path))[0]+"_orig.mp4")
|
217 |
+
else:
|
218 |
+
out_path_orig = out_path.replace(".mp4", "_orig.mp4")
|
219 |
+
torchvision.io.write_video(
|
220 |
+
out_path_orig,
|
221 |
+
video_array=torch.from_numpy(np.array(video_frames)), fps = args.video_fps, video_codec='libx264',
|
222 |
+
audio_array=torch.from_numpy(wrong_audio_wavform).unsqueeze(0), audio_fps=args.sample_rate, audio_codec='aac'
|
223 |
+
)
|
224 |
+
|
225 |
+
if args.sampling_ref_type=='gt':
|
226 |
+
ref_frames = face_frames.clone()
|
227 |
+
elif args.sampling_ref_type=='first_frame':
|
228 |
+
ref_frames = face_frames[0:1].repeat(len(face_frames),1,1,1)
|
229 |
+
elif args.sampling_ref_type=='random':
|
230 |
+
rand_idx = random.Random(args.sampling_seed).randint(0, len(face_frames)-1)
|
231 |
+
ref_frames = face_frames[rand_idx:rand_idx+1].repeat(len(face_frames),1,1,1)
|
232 |
+
|
233 |
+
if args.sampling_input_type=='first_frame':
|
234 |
+
face_frames = face_frames[0:1].repeat(len(face_frames),1,1,1)
|
235 |
+
video_frames = np.array(video_frames[0:1]*len(video_frames))
|
236 |
+
face_bboxes = np.array(face_bboxes[0:1]*len(face_bboxes))
|
237 |
+
|
238 |
+
|
239 |
+
rank = dist.get_rank()
|
240 |
+
world_size = dist.get_world_size()
|
241 |
+
chunk_size = int(np.ceil(min_frames/world_size))
|
242 |
+
start_idx = rank * chunk_size
|
243 |
+
end_idx = min(start_idx + chunk_size, min_frames)
|
244 |
+
generated_video_frames = []
|
245 |
+
b_s = args.sampling_batch_size
|
246 |
+
|
247 |
+
# print(rank,"/",world_size, "chunk: [",start_idx,"-", end_idx,"/",min_frames,"]")
|
248 |
+
|
249 |
+
dist.barrier()
|
250 |
+
torch.cuda.synchronize()
|
251 |
+
t1=time()
|
252 |
+
# for i in range(0,min_frames, b_s*args.nframes):
|
253 |
+
for i in range(start_idx,end_idx, b_s*args.nframes):
|
254 |
+
slice_end = min(i+b_s*args.nframes, end_idx)
|
255 |
+
# if rank==0:
|
256 |
+
# print("rank 0: slice:",i,":",slice_end)
|
257 |
+
video_frames_batch = video_frames[i:slice_end]
|
258 |
+
face_bboxes_batch = face_bboxes[i:slice_end]
|
259 |
+
|
260 |
+
# try:
|
261 |
+
if (slice_end-i) % args.nframes==0:
|
262 |
+
img_batch = face_frames[i:slice_end] #[BF, C, H, W]
|
263 |
+
img_batch = img_batch.reshape(-1, args.nframes, img_batch.size(-3), img_batch.size(-2), img_batch.size(-1))
|
264 |
+
ref_batch = ref_frames[i:slice_end]
|
265 |
+
ref_batch = ref_batch.reshape(-1, args.nframes, ref_batch.size(-3), ref_batch.size(-2), ref_batch.size(-1))
|
266 |
+
wrong_indiv_mel_batch = wrong_all_indiv_mels[i:slice_end] #[BF, 1, h, w]
|
267 |
+
wrong_indiv_mel_batch = wrong_indiv_mel_batch.reshape(-1, args.nframes, wrong_indiv_mel_batch.size(-3),wrong_indiv_mel_batch.size(-2),wrong_indiv_mel_batch.size(-1))
|
268 |
+
# except:
|
269 |
+
else: # of the last batch, if B*F % nframes!=0, then the above reshape throws error
|
270 |
+
# but internally everything is going to get converted to BF
|
271 |
+
# ie. (B,F, C, H, W) -> (B*F, C, H, W) but (B*F, 1, C, H, W) -> (B*F, C, H, W)
|
272 |
+
img_batch = face_frames[i:slice_end] #[BF, C, H, W]
|
273 |
+
img_batch = img_batch.reshape(-1, 1, img_batch.size(-3), img_batch.size(-2), img_batch.size(-1))
|
274 |
+
ref_batch = ref_frames[i:slice_end]
|
275 |
+
ref_batch = ref_batch.reshape(-1, 1, ref_batch.size(-3), ref_batch.size(-2), ref_batch.size(-1))
|
276 |
+
wrong_indiv_mel_batch = wrong_all_indiv_mels[i:slice_end] #[BF, 1, h, w]
|
277 |
+
wrong_indiv_mel_batch = wrong_indiv_mel_batch.reshape(-1, 1, wrong_indiv_mel_batch.size(-3),wrong_indiv_mel_batch.size(-2),wrong_indiv_mel_batch.size(-1))
|
278 |
+
|
279 |
+
|
280 |
+
batch = {"image":img_batch,
|
281 |
+
"ref_img":ref_batch,
|
282 |
+
"indiv_mels":wrong_indiv_mel_batch}
|
283 |
+
|
284 |
+
sample, img_batch, model_kwargs = sample_batch(batch, model, diffusion, args)
|
285 |
+
mask = model_kwargs['mask']
|
286 |
+
recon_batch = sample * mask + (1. -mask)*img_batch #[BF, C, H, W]
|
287 |
+
recon_batch = (normalise(recon_batch)*255).cpu().numpy().transpose(0,2,3,1) #[-1,1] -> [0,255]
|
288 |
+
|
289 |
+
for g,v,b in zip(recon_batch, video_frames_batch, face_bboxes_batch):
|
290 |
+
y1, y2, x1, x2 = b
|
291 |
+
g = cv2.resize(g.astype(np.uint8), (x2 - x1, y2 - y1))
|
292 |
+
v[y1:y2, x1:x2] = g
|
293 |
+
generated_video_frames.append(v)
|
294 |
+
|
295 |
+
torch.cuda.synchronize()
|
296 |
+
t3=time()
|
297 |
+
all_generated_video_frames = [None for _ in range(dist.get_world_size())]
|
298 |
+
dist.all_gather_object(all_generated_video_frames, generated_video_frames) # gather not supported with NCCL
|
299 |
+
all_generated_video_frames_combined = []
|
300 |
+
[all_generated_video_frames_combined.extend(gvf) for gvf in all_generated_video_frames]
|
301 |
+
generated_video_frames = all_generated_video_frames_combined
|
302 |
+
|
303 |
+
torch.cuda.synchronize()
|
304 |
+
t2=time()
|
305 |
+
|
306 |
+
if dist.get_rank() == 0:
|
307 |
+
print("Time taken for sampling, ", t2-t1, ",time without all gather, ", t3-t1, ",frames/gpu, ", len(generated_video_frames), ",total frames, ", min_frames)
|
308 |
+
print(wrong_audio_wavform.shape, np.array(generated_video_frames).shape)
|
309 |
+
min_time = len(generated_video_frames)/args.video_fps # because video is already smaller because it got chopped accoding to the mel array length
|
310 |
+
wrong_audio_wavform = wrong_audio_wavform[:int(min_time*args.sample_rate)]
|
311 |
+
print(wrong_audio_wavform.shape, np.array(generated_video_frames).shape)
|
312 |
+
if out_path is None:
|
313 |
+
out_path = os.path.join(args.sample_path, splitext(basename(video_path))[0]+"_"+ splitext(basename(audio_path))[0]+".mp4")
|
314 |
+
torchvision.io.write_video(
|
315 |
+
out_path,
|
316 |
+
video_array=torch.from_numpy(np.array(generated_video_frames)), fps = args.video_fps, video_codec='libx264',
|
317 |
+
audio_array=torch.from_numpy(wrong_audio_wavform).unsqueeze(0), audio_fps=args.sample_rate, audio_codec='aac'
|
318 |
+
)
|
319 |
+
dist.barrier()
|
320 |
+
|
321 |
+
|
322 |
+
|
323 |
+
|
324 |
+
|
325 |
+
def generate_from_filelist(test_video_dir, filelist, model, diffusion, detector, args):
|
326 |
+
video_names = []
|
327 |
+
audio_names = []
|
328 |
+
with open(filelist, "r") as f:
|
329 |
+
lines = f.readlines()
|
330 |
+
for line in tqdm(lines):
|
331 |
+
try:
|
332 |
+
audio_name, video_name = line.strip().split()
|
333 |
+
audio_path = join(test_video_dir, audio_name+'.mp4')
|
334 |
+
video_path = join(test_video_dir, video_name+'.mp4')
|
335 |
+
out_path = join(args.sample_path,audio_name.replace('/','.')+"_"+video_name.replace('/','.')+".mp4")
|
336 |
+
generate(video_path, audio_path, model, diffusion, detector, args, out_path=out_path ,save_orig=args.save_orig)
|
337 |
+
except Exception as e:
|
338 |
+
print("Error:", e, video_path, audio_path)
|
339 |
+
import traceback
|
340 |
+
print(traceback.format_exc())
|
341 |
+
|
342 |
+
|
343 |
+
|
344 |
+
def main():
|
345 |
+
args = create_argparser().parse_args()
|
346 |
+
dist_util.setup_dist()
|
347 |
+
logger.configure(dir=args.sample_path, format_strs=["stdout", "log"])
|
348 |
+
|
349 |
+
logger.log("creating model...")
|
350 |
+
model, diffusion = tfg_create_model_and_diffusion(
|
351 |
+
**args_to_dict(args, tfg_model_and_diffusion_defaults().keys())
|
352 |
+
)
|
353 |
+
model.load_state_dict(
|
354 |
+
dist_util.load_state_dict(args.model_path, map_location='cpu')
|
355 |
+
)
|
356 |
+
model.to(dist_util.dev())
|
357 |
+
if args.use_fp16:
|
358 |
+
model.convert_to_fp16()
|
359 |
+
model.eval()
|
360 |
+
|
361 |
+
detector = face_detection.FaceAlignment(face_detection.LandmarksType._2D, flip_input=False, device='cuda' if torch.cuda.is_available() else 'cpu')
|
362 |
+
|
363 |
+
if args.generate_from_filelist:
|
364 |
+
generate_from_filelist(args.test_video_dir, args.filelist, model, diffusion, detector, args)
|
365 |
+
else:
|
366 |
+
generate(args.video_path, args.audio_path, model, diffusion, detector, args, out_path=args.out_path, save_orig=args.save_orig)
|
367 |
+
|
368 |
+
|
369 |
+
def create_argparser():
|
370 |
+
defaults = dict(
|
371 |
+
# generate from a single audio-video pair
|
372 |
+
generate_from_filelist = False,
|
373 |
+
video_path = "",
|
374 |
+
audio_path = "",
|
375 |
+
out_path = None,
|
376 |
+
save_orig = True,
|
377 |
+
|
378 |
+
#generate from filelist : generate_from_filelist = True
|
379 |
+
test_video_dir = "test_videos",
|
380 |
+
filelist = "test_filelist.txt",
|
381 |
+
|
382 |
+
|
383 |
+
use_fp16 = True,
|
384 |
+
#tfg specific
|
385 |
+
face_hide_percentage=0.5,
|
386 |
+
use_ref=False,
|
387 |
+
use_audio=False,
|
388 |
+
audio_as_style=False,
|
389 |
+
audio_as_style_encoder_mlp=False,
|
390 |
+
|
391 |
+
#data args
|
392 |
+
nframes=1,
|
393 |
+
nrefer=0,
|
394 |
+
image_size=128,
|
395 |
+
syncnet_T = 5,
|
396 |
+
syncnet_mel_step_size = 16,
|
397 |
+
audio_frames_per_video = 16, #for tfg model, we use sound corresponding to 5 frames centred at that frame
|
398 |
+
audio_dim=80,
|
399 |
+
is_voxceleb2=True,
|
400 |
+
|
401 |
+
video_fps=25,
|
402 |
+
sample_rate=16000, #audio sampling rate
|
403 |
+
mel_steps_per_sec=80.,
|
404 |
+
|
405 |
+
#sampling args
|
406 |
+
clip_denoised=True, # not used in training
|
407 |
+
sampling_batch_size=2,
|
408 |
+
use_ddim=False,
|
409 |
+
model_path="",
|
410 |
+
sample_path="d2l_gen",
|
411 |
+
sample_partition="",
|
412 |
+
sampling_seed=None,
|
413 |
+
sampling_use_gt_for_ref=False,
|
414 |
+
sampling_ref_type='gt', #one of ['gt', 'first_frame', 'random']
|
415 |
+
sampling_input_type='gt', #one of ['gt', 'first_frame']
|
416 |
+
|
417 |
+
# face detection args
|
418 |
+
face_det_batch_size=64,
|
419 |
+
pads = "0,0,0,0"
|
420 |
+
)
|
421 |
+
defaults.update(tfg_model_and_diffusion_defaults())
|
422 |
+
parser = argparse.ArgumentParser()
|
423 |
+
add_dict_to_argparser(parser, defaults)
|
424 |
+
return parser
|
425 |
+
|
426 |
+
|
427 |
+
if __name__=="__main__":
|
428 |
+
main()
|
guided-diffusion/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
MIT License
|
2 |
+
|
3 |
+
Copyright (c) 2021 OpenAI
|
4 |
+
|
5 |
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6 |
+
of this software and associated documentation files (the "Software"), to deal
|
7 |
+
in the Software without restriction, including without limitation the rights
|
8 |
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9 |
+
copies of the Software, and to permit persons to whom the Software is
|
10 |
+
furnished to do so, subject to the following conditions:
|
11 |
+
|
12 |
+
The above copyright notice and this permission notice shall be included in all
|
13 |
+
copies or substantial portions of the Software.
|
14 |
+
|
15 |
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16 |
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17 |
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18 |
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19 |
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20 |
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21 |
+
SOFTWARE.
|
guided-diffusion/guided_diffusion.egg-info/PKG-INFO
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
Metadata-Version: 2.1
|
2 |
+
Name: guided-diffusion
|
3 |
+
Version: 0.0.0
|
4 |
+
License-File: LICENSE
|
5 |
+
Requires-Dist: blobfile>=1.0.5
|
6 |
+
Requires-Dist: torch
|
7 |
+
Requires-Dist: tqdm
|
guided-diffusion/guided_diffusion.egg-info/SOURCES.txt
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
LICENSE
|
2 |
+
setup.py
|
3 |
+
guided_diffusion.egg-info/PKG-INFO
|
4 |
+
guided_diffusion.egg-info/SOURCES.txt
|
5 |
+
guided_diffusion.egg-info/dependency_links.txt
|
6 |
+
guided_diffusion.egg-info/requires.txt
|
7 |
+
guided_diffusion.egg-info/top_level.txt
|
guided-diffusion/guided_diffusion.egg-info/dependency_links.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
|
guided-diffusion/guided_diffusion.egg-info/requires.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
blobfile>=1.0.5
|
2 |
+
torch
|
3 |
+
tqdm
|
guided-diffusion/guided_diffusion.egg-info/top_level.txt
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
guided_diffusion
|
guided-diffusion/guided_diffusion/__init__.py
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Codebase for "Improved Denoising Diffusion Probabilistic Models".
|
3 |
+
"""
|
guided-diffusion/guided_diffusion/dist_util.py
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Helpers for distributed training.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import io
|
6 |
+
import os
|
7 |
+
import socket
|
8 |
+
|
9 |
+
import blobfile as bf
|
10 |
+
from mpi4py import MPI
|
11 |
+
import torch as th
|
12 |
+
import torch.distributed as dist
|
13 |
+
|
14 |
+
# Change this to reflect your cluster layout.
|
15 |
+
# The GPU for a given rank is (rank % GPUS_PER_NODE).
|
16 |
+
GPUS_PER_NODE = 8
|
17 |
+
|
18 |
+
SETUP_RETRY_COUNT = 3
|
19 |
+
|
20 |
+
|
21 |
+
def setup_dist():
|
22 |
+
"""
|
23 |
+
Setup a distributed process group.
|
24 |
+
"""
|
25 |
+
if dist.is_initialized():
|
26 |
+
return
|
27 |
+
print("MPI.COMM_WORLD.Get_rank()", MPI.COMM_WORLD.Get_rank())
|
28 |
+
os.environ["CUDA_VISIBLE_DEVICES"] = f"{MPI.COMM_WORLD.Get_rank() % GPUS_PER_NODE}"
|
29 |
+
print('os.environ["CUDA_VISIBLE_DEVICES"]', os.environ["CUDA_VISIBLE_DEVICES"])
|
30 |
+
comm = MPI.COMM_WORLD
|
31 |
+
backend = "gloo" if not th.cuda.is_available() else "nccl"
|
32 |
+
|
33 |
+
if backend == "gloo":
|
34 |
+
hostname = "localhost"
|
35 |
+
else:
|
36 |
+
hostname = socket.gethostbyname(socket.getfqdn())
|
37 |
+
os.environ["MASTER_ADDR"] = comm.bcast(hostname, root=0)
|
38 |
+
os.environ["RANK"] = str(comm.rank)
|
39 |
+
os.environ["WORLD_SIZE"] = str(comm.size)
|
40 |
+
|
41 |
+
port = comm.bcast(_find_free_port(), root=0)
|
42 |
+
os.environ["MASTER_PORT"] = str(port)
|
43 |
+
dist.init_process_group(backend=backend, init_method="env://")
|
44 |
+
|
45 |
+
|
46 |
+
def dev():
|
47 |
+
"""
|
48 |
+
Get the device to use for torch.distributed.
|
49 |
+
"""
|
50 |
+
if th.cuda.is_available():
|
51 |
+
return th.device(f"cuda")
|
52 |
+
return th.device("cpu")
|
53 |
+
|
54 |
+
|
55 |
+
def load_state_dict(path, **kwargs):
|
56 |
+
"""
|
57 |
+
Load a PyTorch file without redundant fetches across MPI ranks.
|
58 |
+
"""
|
59 |
+
chunk_size = 2 ** 30 # MPI has a relatively small size limit
|
60 |
+
if MPI.COMM_WORLD.Get_rank() == 0:
|
61 |
+
with bf.BlobFile(path, "rb") as f:
|
62 |
+
data = f.read()
|
63 |
+
num_chunks = len(data) // chunk_size
|
64 |
+
if len(data) % chunk_size:
|
65 |
+
num_chunks += 1
|
66 |
+
MPI.COMM_WORLD.bcast(num_chunks)
|
67 |
+
for i in range(0, len(data), chunk_size):
|
68 |
+
MPI.COMM_WORLD.bcast(data[i : i + chunk_size])
|
69 |
+
else:
|
70 |
+
num_chunks = MPI.COMM_WORLD.bcast(None)
|
71 |
+
data = bytes()
|
72 |
+
for _ in range(num_chunks):
|
73 |
+
data += MPI.COMM_WORLD.bcast(None)
|
74 |
+
|
75 |
+
return th.load(io.BytesIO(data), **kwargs)
|
76 |
+
|
77 |
+
|
78 |
+
def sync_params(params):
|
79 |
+
"""
|
80 |
+
Synchronize a sequence of Tensors across ranks from rank 0.
|
81 |
+
"""
|
82 |
+
for p in params:
|
83 |
+
with th.no_grad():
|
84 |
+
dist.broadcast(p, 0)
|
85 |
+
|
86 |
+
|
87 |
+
def _find_free_port():
|
88 |
+
try:
|
89 |
+
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
90 |
+
s.bind(("", 0))
|
91 |
+
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
92 |
+
return s.getsockname()[1]
|
93 |
+
finally:
|
94 |
+
s.close()
|
guided-diffusion/guided_diffusion/fp16_util.py
ADDED
@@ -0,0 +1,237 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Helpers to train with 16-bit precision.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import numpy as np
|
6 |
+
import torch as th
|
7 |
+
import torch.nn as nn
|
8 |
+
from torch._utils import _flatten_dense_tensors, _unflatten_dense_tensors
|
9 |
+
|
10 |
+
from . import logger
|
11 |
+
|
12 |
+
INITIAL_LOG_LOSS_SCALE = 20.0
|
13 |
+
|
14 |
+
|
15 |
+
def convert_module_to_f16(l):
|
16 |
+
"""
|
17 |
+
Convert primitive modules to float16.
|
18 |
+
"""
|
19 |
+
if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
|
20 |
+
l.weight.data = l.weight.data.half()
|
21 |
+
if l.bias is not None:
|
22 |
+
l.bias.data = l.bias.data.half()
|
23 |
+
|
24 |
+
|
25 |
+
def convert_module_to_f32(l):
|
26 |
+
"""
|
27 |
+
Convert primitive modules to float32, undoing convert_module_to_f16().
|
28 |
+
"""
|
29 |
+
if isinstance(l, (nn.Conv1d, nn.Conv2d, nn.Conv3d)):
|
30 |
+
l.weight.data = l.weight.data.float()
|
31 |
+
if l.bias is not None:
|
32 |
+
l.bias.data = l.bias.data.float()
|
33 |
+
|
34 |
+
|
35 |
+
def make_master_params(param_groups_and_shapes):
|
36 |
+
"""
|
37 |
+
Copy model parameters into a (differently-shaped) list of full-precision
|
38 |
+
parameters.
|
39 |
+
"""
|
40 |
+
master_params = []
|
41 |
+
for param_group, shape in param_groups_and_shapes:
|
42 |
+
master_param = nn.Parameter(
|
43 |
+
_flatten_dense_tensors(
|
44 |
+
[param.detach().float() for (_, param) in param_group]
|
45 |
+
).view(shape)
|
46 |
+
)
|
47 |
+
master_param.requires_grad = True
|
48 |
+
master_params.append(master_param)
|
49 |
+
return master_params
|
50 |
+
|
51 |
+
|
52 |
+
def model_grads_to_master_grads(param_groups_and_shapes, master_params):
|
53 |
+
"""
|
54 |
+
Copy the gradients from the model parameters into the master parameters
|
55 |
+
from make_master_params().
|
56 |
+
"""
|
57 |
+
for master_param, (param_group, shape) in zip(
|
58 |
+
master_params, param_groups_and_shapes
|
59 |
+
):
|
60 |
+
master_param.grad = _flatten_dense_tensors(
|
61 |
+
[param_grad_or_zeros(param) for (_, param) in param_group]
|
62 |
+
).view(shape)
|
63 |
+
|
64 |
+
|
65 |
+
def master_params_to_model_params(param_groups_and_shapes, master_params):
|
66 |
+
"""
|
67 |
+
Copy the master parameter data back into the model parameters.
|
68 |
+
"""
|
69 |
+
# Without copying to a list, if a generator is passed, this will
|
70 |
+
# silently not copy any parameters.
|
71 |
+
for master_param, (param_group, _) in zip(master_params, param_groups_and_shapes):
|
72 |
+
for (_, param), unflat_master_param in zip(
|
73 |
+
param_group, unflatten_master_params(param_group, master_param.view(-1))
|
74 |
+
):
|
75 |
+
param.detach().copy_(unflat_master_param)
|
76 |
+
|
77 |
+
|
78 |
+
def unflatten_master_params(param_group, master_param):
|
79 |
+
return _unflatten_dense_tensors(master_param, [param for (_, param) in param_group])
|
80 |
+
|
81 |
+
|
82 |
+
def get_param_groups_and_shapes(named_model_params):
|
83 |
+
named_model_params = list(named_model_params)
|
84 |
+
scalar_vector_named_params = (
|
85 |
+
[(n, p) for (n, p) in named_model_params if p.ndim <= 1],
|
86 |
+
(-1),
|
87 |
+
)
|
88 |
+
matrix_named_params = (
|
89 |
+
[(n, p) for (n, p) in named_model_params if p.ndim > 1],
|
90 |
+
(1, -1),
|
91 |
+
)
|
92 |
+
return [scalar_vector_named_params, matrix_named_params]
|
93 |
+
|
94 |
+
|
95 |
+
def master_params_to_state_dict(
|
96 |
+
model, param_groups_and_shapes, master_params, use_fp16
|
97 |
+
):
|
98 |
+
if use_fp16:
|
99 |
+
state_dict = model.state_dict()
|
100 |
+
for master_param, (param_group, _) in zip(
|
101 |
+
master_params, param_groups_and_shapes
|
102 |
+
):
|
103 |
+
for (name, _), unflat_master_param in zip(
|
104 |
+
param_group, unflatten_master_params(param_group, master_param.view(-1))
|
105 |
+
):
|
106 |
+
assert name in state_dict
|
107 |
+
state_dict[name] = unflat_master_param
|
108 |
+
else:
|
109 |
+
state_dict = model.state_dict()
|
110 |
+
for i, (name, _value) in enumerate(model.named_parameters()):
|
111 |
+
assert name in state_dict
|
112 |
+
state_dict[name] = master_params[i]
|
113 |
+
return state_dict
|
114 |
+
|
115 |
+
|
116 |
+
def state_dict_to_master_params(model, state_dict, use_fp16):
|
117 |
+
if use_fp16:
|
118 |
+
named_model_params = [
|
119 |
+
(name, state_dict[name]) for name, _ in model.named_parameters()
|
120 |
+
]
|
121 |
+
param_groups_and_shapes = get_param_groups_and_shapes(named_model_params)
|
122 |
+
master_params = make_master_params(param_groups_and_shapes)
|
123 |
+
else:
|
124 |
+
master_params = [state_dict[name] for name, _ in model.named_parameters()]
|
125 |
+
return master_params
|
126 |
+
|
127 |
+
|
128 |
+
def zero_master_grads(master_params):
|
129 |
+
for param in master_params:
|
130 |
+
param.grad = None
|
131 |
+
|
132 |
+
|
133 |
+
def zero_grad(model_params):
|
134 |
+
for param in model_params:
|
135 |
+
# Taken from https://pytorch.org/docs/stable/_modules/torch/optim/optimizer.html#Optimizer.add_param_group
|
136 |
+
if param.grad is not None:
|
137 |
+
param.grad.detach_()
|
138 |
+
param.grad.zero_()
|
139 |
+
|
140 |
+
|
141 |
+
def param_grad_or_zeros(param):
|
142 |
+
if param.grad is not None:
|
143 |
+
return param.grad.data.detach()
|
144 |
+
else:
|
145 |
+
return th.zeros_like(param)
|
146 |
+
|
147 |
+
|
148 |
+
class MixedPrecisionTrainer:
|
149 |
+
def __init__(
|
150 |
+
self,
|
151 |
+
*,
|
152 |
+
model,
|
153 |
+
use_fp16=False,
|
154 |
+
fp16_scale_growth=1e-3,
|
155 |
+
initial_lg_loss_scale=INITIAL_LOG_LOSS_SCALE,
|
156 |
+
):
|
157 |
+
self.model = model
|
158 |
+
self.use_fp16 = use_fp16
|
159 |
+
self.fp16_scale_growth = fp16_scale_growth
|
160 |
+
|
161 |
+
self.model_params = list(self.model.parameters())
|
162 |
+
self.master_params = self.model_params
|
163 |
+
self.param_groups_and_shapes = None
|
164 |
+
self.lg_loss_scale = initial_lg_loss_scale
|
165 |
+
|
166 |
+
if self.use_fp16:
|
167 |
+
self.param_groups_and_shapes = get_param_groups_and_shapes(
|
168 |
+
self.model.named_parameters()
|
169 |
+
)
|
170 |
+
self.master_params = make_master_params(self.param_groups_and_shapes)
|
171 |
+
self.model.convert_to_fp16()
|
172 |
+
|
173 |
+
def zero_grad(self):
|
174 |
+
zero_grad(self.model_params)
|
175 |
+
|
176 |
+
def backward(self, loss: th.Tensor):
|
177 |
+
if self.use_fp16:
|
178 |
+
loss_scale = 2 ** self.lg_loss_scale
|
179 |
+
(loss * loss_scale).backward()
|
180 |
+
else:
|
181 |
+
loss.backward()
|
182 |
+
|
183 |
+
def optimize(self, opt: th.optim.Optimizer):
|
184 |
+
if self.use_fp16:
|
185 |
+
return self._optimize_fp16(opt)
|
186 |
+
else:
|
187 |
+
return self._optimize_normal(opt)
|
188 |
+
|
189 |
+
def _optimize_fp16(self, opt: th.optim.Optimizer):
|
190 |
+
logger.logkv_mean("lg_loss_scale", self.lg_loss_scale)
|
191 |
+
model_grads_to_master_grads(self.param_groups_and_shapes, self.master_params)
|
192 |
+
grad_norm, param_norm = self._compute_norms(grad_scale=2 ** self.lg_loss_scale)
|
193 |
+
if check_overflow(grad_norm):
|
194 |
+
self.lg_loss_scale -= 1
|
195 |
+
logger.log(f"Found NaN, decreased lg_loss_scale to {self.lg_loss_scale}")
|
196 |
+
zero_master_grads(self.master_params)
|
197 |
+
return False
|
198 |
+
|
199 |
+
logger.logkv_mean("grad_norm", grad_norm)
|
200 |
+
logger.logkv_mean("param_norm", param_norm)
|
201 |
+
|
202 |
+
for p in self.master_params:
|
203 |
+
p.grad.mul_(1.0 / (2 ** self.lg_loss_scale))
|
204 |
+
opt.step()
|
205 |
+
zero_master_grads(self.master_params)
|
206 |
+
master_params_to_model_params(self.param_groups_and_shapes, self.master_params)
|
207 |
+
self.lg_loss_scale += self.fp16_scale_growth
|
208 |
+
return True
|
209 |
+
|
210 |
+
def _optimize_normal(self, opt: th.optim.Optimizer):
|
211 |
+
grad_norm, param_norm = self._compute_norms()
|
212 |
+
logger.logkv_mean("grad_norm", grad_norm)
|
213 |
+
logger.logkv_mean("param_norm", param_norm)
|
214 |
+
opt.step()
|
215 |
+
return True
|
216 |
+
|
217 |
+
def _compute_norms(self, grad_scale=1.0):
|
218 |
+
grad_norm = 0.0
|
219 |
+
param_norm = 0.0
|
220 |
+
for p in self.master_params:
|
221 |
+
with th.no_grad():
|
222 |
+
param_norm += th.norm(p, p=2, dtype=th.float32).item() ** 2
|
223 |
+
if p.grad is not None:
|
224 |
+
grad_norm += th.norm(p.grad, p=2, dtype=th.float32).item() ** 2
|
225 |
+
return np.sqrt(grad_norm) / grad_scale, np.sqrt(param_norm)
|
226 |
+
|
227 |
+
def master_params_to_state_dict(self, master_params):
|
228 |
+
return master_params_to_state_dict(
|
229 |
+
self.model, self.param_groups_and_shapes, master_params, self.use_fp16
|
230 |
+
)
|
231 |
+
|
232 |
+
def state_dict_to_master_params(self, state_dict):
|
233 |
+
return state_dict_to_master_params(self.model, state_dict, self.use_fp16)
|
234 |
+
|
235 |
+
|
236 |
+
def check_overflow(value):
|
237 |
+
return (value == float("inf")) or (value == -float("inf")) or (value != value)
|
guided-diffusion/guided_diffusion/gaussian_diffusion.py
ADDED
@@ -0,0 +1,843 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This code started out as a PyTorch port of Ho et al's diffusion models:
|
3 |
+
https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py
|
4 |
+
|
5 |
+
Docstrings have been added, as well as DDIM sampling and a new collection of beta schedules.
|
6 |
+
"""
|
7 |
+
|
8 |
+
import enum
|
9 |
+
import math
|
10 |
+
|
11 |
+
import numpy as np
|
12 |
+
import torch as th
|
13 |
+
import os
|
14 |
+
|
15 |
+
from . import dist_util
|
16 |
+
from .nn import mean_flat
|
17 |
+
from .losses import normal_kl, discretized_gaussian_log_likelihood
|
18 |
+
|
19 |
+
|
20 |
+
def get_named_beta_schedule(schedule_name, num_diffusion_timesteps):
|
21 |
+
"""
|
22 |
+
Get a pre-defined beta schedule for the given name.
|
23 |
+
|
24 |
+
The beta schedule library consists of beta schedules which remain similar
|
25 |
+
in the limit of num_diffusion_timesteps.
|
26 |
+
Beta schedules may be added, but should not be removed or changed once
|
27 |
+
they are committed to maintain backwards compatibility.
|
28 |
+
"""
|
29 |
+
if schedule_name == "linear":
|
30 |
+
# Linear schedule from Ho et al, extended to work for any number of
|
31 |
+
# diffusion steps.
|
32 |
+
scale = 1000 / num_diffusion_timesteps
|
33 |
+
beta_start = scale * 0.0001
|
34 |
+
beta_end = scale * 0.02
|
35 |
+
return np.linspace(
|
36 |
+
beta_start, beta_end, num_diffusion_timesteps, dtype=np.float64
|
37 |
+
)
|
38 |
+
elif schedule_name == "cosine":
|
39 |
+
return betas_for_alpha_bar(
|
40 |
+
num_diffusion_timesteps,
|
41 |
+
lambda t: math.cos((t + 0.008) / 1.008 * math.pi / 2) ** 2,
|
42 |
+
)
|
43 |
+
else:
|
44 |
+
raise NotImplementedError(f"unknown beta schedule: {schedule_name}")
|
45 |
+
|
46 |
+
|
47 |
+
def betas_for_alpha_bar(num_diffusion_timesteps, alpha_bar, max_beta=0.999):
|
48 |
+
"""
|
49 |
+
Create a beta schedule that discretizes the given alpha_t_bar function,
|
50 |
+
which defines the cumulative product of (1-beta) over time from t = [0,1].
|
51 |
+
|
52 |
+
:param num_diffusion_timesteps: the number of betas to produce.
|
53 |
+
:param alpha_bar: a lambda that takes an argument t from 0 to 1 and
|
54 |
+
produces the cumulative product of (1-beta) up to that
|
55 |
+
part of the diffusion process.
|
56 |
+
:param max_beta: the maximum beta to use; use values lower than 1 to
|
57 |
+
prevent singularities.
|
58 |
+
"""
|
59 |
+
betas = []
|
60 |
+
for i in range(num_diffusion_timesteps):
|
61 |
+
t1 = i / num_diffusion_timesteps
|
62 |
+
t2 = (i + 1) / num_diffusion_timesteps
|
63 |
+
betas.append(min(1 - alpha_bar(t2) / alpha_bar(t1), max_beta))
|
64 |
+
return np.array(betas)
|
65 |
+
|
66 |
+
|
67 |
+
class ModelMeanType(enum.Enum):
|
68 |
+
"""
|
69 |
+
Which type of output the model predicts.
|
70 |
+
"""
|
71 |
+
|
72 |
+
PREVIOUS_X = enum.auto() # the model predicts x_{t-1}
|
73 |
+
START_X = enum.auto() # the model predicts x_0
|
74 |
+
EPSILON = enum.auto() # the model predicts epsilon
|
75 |
+
|
76 |
+
|
77 |
+
class ModelVarType(enum.Enum):
|
78 |
+
"""
|
79 |
+
What is used as the model's output variance.
|
80 |
+
|
81 |
+
The LEARNED_RANGE option has been added to allow the model to predict
|
82 |
+
values between FIXED_SMALL and FIXED_LARGE, making its job easier.
|
83 |
+
"""
|
84 |
+
|
85 |
+
LEARNED = enum.auto()
|
86 |
+
FIXED_SMALL = enum.auto()
|
87 |
+
FIXED_LARGE = enum.auto()
|
88 |
+
LEARNED_RANGE = enum.auto()
|
89 |
+
|
90 |
+
|
91 |
+
class LossType(enum.Enum):
|
92 |
+
MSE = enum.auto() # use raw MSE loss (and KL when learning variances)
|
93 |
+
RESCALED_MSE = (
|
94 |
+
enum.auto()
|
95 |
+
) # use raw MSE loss (with RESCALED_KL when learning variances)
|
96 |
+
KL = enum.auto() # use the variational lower-bound
|
97 |
+
RESCALED_KL = enum.auto() # like KL, but rescale to estimate the full VLB
|
98 |
+
|
99 |
+
def is_vb(self):
|
100 |
+
return self == LossType.KL or self == LossType.RESCALED_KL
|
101 |
+
|
102 |
+
|
103 |
+
class GaussianDiffusion:
|
104 |
+
"""
|
105 |
+
Utilities for training and sampling diffusion models.
|
106 |
+
|
107 |
+
Ported directly from here, and then adapted over time to further experimentation.
|
108 |
+
https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/diffusion_utils_2.py#L42
|
109 |
+
|
110 |
+
:param betas: a 1-D numpy array of betas for each diffusion timestep,
|
111 |
+
starting at T and going to 1.
|
112 |
+
:param model_mean_type: a ModelMeanType determining what the model outputs.
|
113 |
+
:param model_var_type: a ModelVarType determining how variance is output.
|
114 |
+
:param loss_type: a LossType determining the loss function to use.
|
115 |
+
:param rescale_timesteps: if True, pass floating point timesteps into the
|
116 |
+
model so that they are always scaled like in the
|
117 |
+
original paper (0 to 1000).
|
118 |
+
:param loss_variation: if True, then use composite loss
|
119 |
+
"""
|
120 |
+
|
121 |
+
def __init__(
|
122 |
+
self,
|
123 |
+
*,
|
124 |
+
betas,
|
125 |
+
model_mean_type,
|
126 |
+
model_var_type,
|
127 |
+
loss_type,
|
128 |
+
rescale_timesteps=False,
|
129 |
+
loss_variation=False,
|
130 |
+
):
|
131 |
+
self.model_mean_type = model_mean_type
|
132 |
+
self.model_var_type = model_var_type
|
133 |
+
self.loss_type = loss_type
|
134 |
+
self.rescale_timesteps = rescale_timesteps
|
135 |
+
self.loss_variation = loss_variation
|
136 |
+
|
137 |
+
# Use float64 for accuracy.
|
138 |
+
betas = np.array(betas, dtype=np.float64)
|
139 |
+
self.betas = betas
|
140 |
+
assert len(betas.shape) == 1, "betas must be 1-D"
|
141 |
+
assert (betas > 0).all() and (betas <= 1).all()
|
142 |
+
|
143 |
+
self.num_timesteps = int(betas.shape[0])
|
144 |
+
|
145 |
+
alphas = 1.0 - betas
|
146 |
+
self.alphas_cumprod = np.cumprod(alphas, axis=0)
|
147 |
+
self.alphas_cumprod_prev = np.append(1.0, self.alphas_cumprod[:-1])
|
148 |
+
self.alphas_cumprod_next = np.append(self.alphas_cumprod[1:], 0.0)
|
149 |
+
assert self.alphas_cumprod_prev.shape == (self.num_timesteps,)
|
150 |
+
|
151 |
+
# calculations for diffusion q(x_t | x_{t-1}) and others
|
152 |
+
self.sqrt_alphas_cumprod = np.sqrt(self.alphas_cumprod)
|
153 |
+
self.sqrt_one_minus_alphas_cumprod = np.sqrt(1.0 - self.alphas_cumprod)
|
154 |
+
self.log_one_minus_alphas_cumprod = np.log(1.0 - self.alphas_cumprod)
|
155 |
+
self.sqrt_recip_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod)
|
156 |
+
self.sqrt_recipm1_alphas_cumprod = np.sqrt(1.0 / self.alphas_cumprod - 1)
|
157 |
+
|
158 |
+
# calculations for posterior q(x_{t-1} | x_t, x_0)
|
159 |
+
self.posterior_variance = (
|
160 |
+
betas * (1.0 - self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)
|
161 |
+
)
|
162 |
+
# log calculation clipped because the posterior variance is 0 at the
|
163 |
+
# beginning of the diffusion chain.
|
164 |
+
self.posterior_log_variance_clipped = np.log(
|
165 |
+
np.append(self.posterior_variance[1], self.posterior_variance[1:])
|
166 |
+
)
|
167 |
+
self.posterior_mean_coef1 = (
|
168 |
+
betas * np.sqrt(self.alphas_cumprod_prev) / (1.0 - self.alphas_cumprod)
|
169 |
+
)
|
170 |
+
self.posterior_mean_coef2 = (
|
171 |
+
(1.0 - self.alphas_cumprod_prev)
|
172 |
+
* np.sqrt(alphas)
|
173 |
+
/ (1.0 - self.alphas_cumprod)
|
174 |
+
)
|
175 |
+
|
176 |
+
def q_mean_variance(self, x_start, t):
|
177 |
+
"""
|
178 |
+
Get the distribution q(x_t | x_0).
|
179 |
+
|
180 |
+
:param x_start: the [N x C x ...] tensor of noiseless inputs.
|
181 |
+
:param t: the number of diffusion steps (minus 1). Here, 0 means one step.
|
182 |
+
:return: A tuple (mean, variance, log_variance), all of x_start's shape.
|
183 |
+
"""
|
184 |
+
mean = (
|
185 |
+
_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start
|
186 |
+
)
|
187 |
+
variance = _extract_into_tensor(1.0 - self.alphas_cumprod, t, x_start.shape)
|
188 |
+
log_variance = _extract_into_tensor(
|
189 |
+
self.log_one_minus_alphas_cumprod, t, x_start.shape
|
190 |
+
)
|
191 |
+
return mean, variance, log_variance
|
192 |
+
|
193 |
+
def q_sample(self, x_start, t, noise=None):
|
194 |
+
"""
|
195 |
+
Diffuse the data for a given number of diffusion steps.
|
196 |
+
|
197 |
+
In other words, sample from q(x_t | x_0).
|
198 |
+
|
199 |
+
:param x_start: the initial data batch.
|
200 |
+
:param t: the number of diffusion steps (minus 1). Here, 0 means one step.
|
201 |
+
:param noise: if specified, the split-out normal noise.
|
202 |
+
:return: A noisy version of x_start.
|
203 |
+
"""
|
204 |
+
if noise is None:
|
205 |
+
noise = th.randn_like(x_start)
|
206 |
+
assert noise.shape == x_start.shape
|
207 |
+
return (
|
208 |
+
_extract_into_tensor(self.sqrt_alphas_cumprod, t, x_start.shape) * x_start
|
209 |
+
+ _extract_into_tensor(self.sqrt_one_minus_alphas_cumprod, t, x_start.shape)
|
210 |
+
* noise
|
211 |
+
)
|
212 |
+
|
213 |
+
def q_posterior_mean_variance(self, x_start, x_t, t):
|
214 |
+
"""
|
215 |
+
Compute the mean and variance of the diffusion posterior:
|
216 |
+
|
217 |
+
q(x_{t-1} | x_t, x_0)
|
218 |
+
|
219 |
+
"""
|
220 |
+
assert x_start.shape == x_t.shape
|
221 |
+
posterior_mean = (
|
222 |
+
_extract_into_tensor(self.posterior_mean_coef1, t, x_t.shape) * x_start
|
223 |
+
+ _extract_into_tensor(self.posterior_mean_coef2, t, x_t.shape) * x_t
|
224 |
+
)
|
225 |
+
posterior_variance = _extract_into_tensor(self.posterior_variance, t, x_t.shape)
|
226 |
+
posterior_log_variance_clipped = _extract_into_tensor(
|
227 |
+
self.posterior_log_variance_clipped, t, x_t.shape
|
228 |
+
)
|
229 |
+
assert (
|
230 |
+
posterior_mean.shape[0]
|
231 |
+
== posterior_variance.shape[0]
|
232 |
+
== posterior_log_variance_clipped.shape[0]
|
233 |
+
== x_start.shape[0]
|
234 |
+
)
|
235 |
+
return posterior_mean, posterior_variance, posterior_log_variance_clipped
|
236 |
+
|
237 |
+
def p_mean_variance(
|
238 |
+
self, model, x, t, clip_denoised=True, denoised_fn=None, model_kwargs=None
|
239 |
+
):
|
240 |
+
"""
|
241 |
+
Apply the model to get p(x_{t-1} | x_t), as well as a prediction of
|
242 |
+
the initial x, x_0.
|
243 |
+
|
244 |
+
:param model: the model, which takes a signal and a batch of timesteps
|
245 |
+
as input.
|
246 |
+
:param x: the [N x C x ...] tensor at time t.
|
247 |
+
:param t: a 1-D Tensor of timesteps.
|
248 |
+
:param clip_denoised: if True, clip the denoised signal into [-1, 1].
|
249 |
+
:param denoised_fn: if not None, a function which applies to the
|
250 |
+
x_start prediction before it is used to sample. Applies before
|
251 |
+
clip_denoised.
|
252 |
+
:param model_kwargs: if not None, a dict of extra keyword arguments to
|
253 |
+
pass to the model. This can be used for conditioning.
|
254 |
+
:return: a dict with the following keys:
|
255 |
+
- 'mean': the model mean output.
|
256 |
+
- 'variance': the model variance output.
|
257 |
+
- 'log_variance': the log of 'variance'.
|
258 |
+
- 'pred_xstart': the prediction for x_0.
|
259 |
+
"""
|
260 |
+
if model_kwargs is None:
|
261 |
+
model_kwargs = {}
|
262 |
+
|
263 |
+
B, C = x.shape[:2]
|
264 |
+
assert t.shape == (B,)
|
265 |
+
model_output = model(x, self._scale_timesteps(t), **model_kwargs)
|
266 |
+
|
267 |
+
if self.model_var_type in [ModelVarType.LEARNED, ModelVarType.LEARNED_RANGE]:
|
268 |
+
assert model_output.shape == (B, C * 2, *x.shape[2:])
|
269 |
+
model_output, model_var_values = th.split(model_output, C, dim=1)
|
270 |
+
if self.model_var_type == ModelVarType.LEARNED:
|
271 |
+
model_log_variance = model_var_values
|
272 |
+
model_variance = th.exp(model_log_variance)
|
273 |
+
else:
|
274 |
+
min_log = _extract_into_tensor(
|
275 |
+
self.posterior_log_variance_clipped, t, x.shape
|
276 |
+
)
|
277 |
+
max_log = _extract_into_tensor(np.log(self.betas), t, x.shape)
|
278 |
+
# The model_var_values is [-1, 1] for [min_var, max_var].
|
279 |
+
frac = (model_var_values + 1) / 2
|
280 |
+
model_log_variance = frac * max_log + (1 - frac) * min_log
|
281 |
+
model_variance = th.exp(model_log_variance)
|
282 |
+
else:
|
283 |
+
model_variance, model_log_variance = {
|
284 |
+
# for fixedlarge, we set the initial (log-)variance like so
|
285 |
+
# to get a better decoder log likelihood.
|
286 |
+
ModelVarType.FIXED_LARGE: (
|
287 |
+
np.append(self.posterior_variance[1], self.betas[1:]),
|
288 |
+
np.log(np.append(self.posterior_variance[1], self.betas[1:])),
|
289 |
+
),
|
290 |
+
ModelVarType.FIXED_SMALL: (
|
291 |
+
self.posterior_variance,
|
292 |
+
self.posterior_log_variance_clipped,
|
293 |
+
),
|
294 |
+
}[self.model_var_type]
|
295 |
+
model_variance = _extract_into_tensor(model_variance, t, x.shape)
|
296 |
+
model_log_variance = _extract_into_tensor(model_log_variance, t, x.shape)
|
297 |
+
|
298 |
+
def process_xstart(x):
|
299 |
+
if denoised_fn is not None:
|
300 |
+
x = denoised_fn(x)
|
301 |
+
if clip_denoised:
|
302 |
+
return x.clamp(-1, 1)
|
303 |
+
return x
|
304 |
+
|
305 |
+
if self.model_mean_type == ModelMeanType.PREVIOUS_X:
|
306 |
+
pred_xstart = process_xstart(
|
307 |
+
self._predict_xstart_from_xprev(x_t=x, t=t, xprev=model_output)
|
308 |
+
)
|
309 |
+
model_mean = model_output
|
310 |
+
elif self.model_mean_type in [ModelMeanType.START_X, ModelMeanType.EPSILON]:
|
311 |
+
if self.model_mean_type == ModelMeanType.START_X:
|
312 |
+
pred_xstart = process_xstart(model_output)
|
313 |
+
else:
|
314 |
+
pred_xstart = process_xstart(
|
315 |
+
self._predict_xstart_from_eps(x_t=x, t=t, eps=model_output)
|
316 |
+
)
|
317 |
+
model_mean, _, _ = self.q_posterior_mean_variance(
|
318 |
+
x_start=pred_xstart, x_t=x, t=t
|
319 |
+
)
|
320 |
+
else:
|
321 |
+
raise NotImplementedError(self.model_mean_type)
|
322 |
+
|
323 |
+
assert (
|
324 |
+
model_mean.shape == model_log_variance.shape == pred_xstart.shape == x.shape
|
325 |
+
)
|
326 |
+
return {
|
327 |
+
"mean": model_mean,
|
328 |
+
"variance": model_variance,
|
329 |
+
"log_variance": model_log_variance,
|
330 |
+
"pred_xstart": pred_xstart,
|
331 |
+
}
|
332 |
+
|
333 |
+
def _predict_xstart_from_eps(self, x_t, t, eps):
|
334 |
+
assert x_t.shape == eps.shape
|
335 |
+
return (
|
336 |
+
_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t
|
337 |
+
- _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape) * eps
|
338 |
+
)
|
339 |
+
|
340 |
+
def _predict_xstart_from_xprev(self, x_t, t, xprev):
|
341 |
+
assert x_t.shape == xprev.shape
|
342 |
+
return ( # (xprev - coef2*x_t) / coef1
|
343 |
+
_extract_into_tensor(1.0 / self.posterior_mean_coef1, t, x_t.shape) * xprev
|
344 |
+
- _extract_into_tensor(
|
345 |
+
self.posterior_mean_coef2 / self.posterior_mean_coef1, t, x_t.shape
|
346 |
+
)
|
347 |
+
* x_t
|
348 |
+
)
|
349 |
+
|
350 |
+
def _predict_eps_from_xstart(self, x_t, t, pred_xstart):
|
351 |
+
return (
|
352 |
+
_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x_t.shape) * x_t
|
353 |
+
- pred_xstart
|
354 |
+
) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x_t.shape)
|
355 |
+
|
356 |
+
def _scale_timesteps(self, t):
|
357 |
+
if self.rescale_timesteps:
|
358 |
+
return t.float() * (1000.0 / self.num_timesteps)
|
359 |
+
return t
|
360 |
+
|
361 |
+
def condition_mean(self, cond_fn, p_mean_var, x, t, model_kwargs=None):
|
362 |
+
"""
|
363 |
+
Compute the mean for the previous step, given a function cond_fn that
|
364 |
+
computes the gradient of a conditional log probability with respect to
|
365 |
+
x. In particular, cond_fn computes grad(log(p(y|x))), and we want to
|
366 |
+
condition on y.
|
367 |
+
|
368 |
+
This uses the conditioning strategy from Sohl-Dickstein et al. (2015).
|
369 |
+
"""
|
370 |
+
gradient = cond_fn(x, self._scale_timesteps(t), **model_kwargs)
|
371 |
+
new_mean = (
|
372 |
+
p_mean_var["mean"].float() + p_mean_var["variance"] * gradient.float()
|
373 |
+
)
|
374 |
+
return new_mean
|
375 |
+
|
376 |
+
def condition_score(self, cond_fn, p_mean_var, x, t, model_kwargs=None):
|
377 |
+
"""
|
378 |
+
Compute what the p_mean_variance output would have been, should the
|
379 |
+
model's score function be conditioned by cond_fn.
|
380 |
+
|
381 |
+
See condition_mean() for details on cond_fn.
|
382 |
+
|
383 |
+
Unlike condition_mean(), this instead uses the conditioning strategy
|
384 |
+
from Song et al (2020).
|
385 |
+
"""
|
386 |
+
alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape)
|
387 |
+
|
388 |
+
eps = self._predict_eps_from_xstart(x, t, p_mean_var["pred_xstart"])
|
389 |
+
eps = eps - (1 - alpha_bar).sqrt() * cond_fn(
|
390 |
+
x, self._scale_timesteps(t), **model_kwargs
|
391 |
+
)
|
392 |
+
|
393 |
+
out = p_mean_var.copy()
|
394 |
+
out["pred_xstart"] = self._predict_xstart_from_eps(x, t, eps)
|
395 |
+
out["mean"], _, _ = self.q_posterior_mean_variance(
|
396 |
+
x_start=out["pred_xstart"], x_t=x, t=t
|
397 |
+
)
|
398 |
+
return out
|
399 |
+
|
400 |
+
def p_sample(
|
401 |
+
self,
|
402 |
+
model,
|
403 |
+
x,
|
404 |
+
t,
|
405 |
+
clip_denoised=True,
|
406 |
+
denoised_fn=None,
|
407 |
+
cond_fn=None,
|
408 |
+
model_kwargs=None,
|
409 |
+
):
|
410 |
+
"""
|
411 |
+
Sample x_{t-1} from the model at the given timestep.
|
412 |
+
|
413 |
+
:param model: the model to sample from.
|
414 |
+
:param x: the current tensor at x_{t-1}.
|
415 |
+
:param t: the value of t, starting at 0 for the first diffusion step.
|
416 |
+
:param clip_denoised: if True, clip the x_start prediction to [-1, 1].
|
417 |
+
:param denoised_fn: if not None, a function which applies to the
|
418 |
+
x_start prediction before it is used to sample.
|
419 |
+
:param cond_fn: if not None, this is a gradient function that acts
|
420 |
+
similarly to the model.
|
421 |
+
:param model_kwargs: if not None, a dict of extra keyword arguments to
|
422 |
+
pass to the model. This can be used for conditioning.
|
423 |
+
:return: a dict containing the following keys:
|
424 |
+
- 'sample': a random sample from the model.
|
425 |
+
- 'pred_xstart': a prediction of x_0.
|
426 |
+
"""
|
427 |
+
out = self.p_mean_variance(
|
428 |
+
model,
|
429 |
+
x,
|
430 |
+
t,
|
431 |
+
clip_denoised=clip_denoised,
|
432 |
+
denoised_fn=denoised_fn,
|
433 |
+
model_kwargs=model_kwargs,
|
434 |
+
)
|
435 |
+
noise = th.randn_like(x)
|
436 |
+
nonzero_mask = (
|
437 |
+
(t != 0).float().view(-1, *([1] * (len(x.shape) - 1)))
|
438 |
+
) # no noise when t == 0
|
439 |
+
if cond_fn is not None:
|
440 |
+
out["mean"] = self.condition_mean(
|
441 |
+
cond_fn, out, x, t, model_kwargs=model_kwargs
|
442 |
+
)
|
443 |
+
sample = out["mean"] + nonzero_mask * th.exp(0.5 * out["log_variance"]) * noise
|
444 |
+
return {"sample": sample, "pred_xstart": out["pred_xstart"]}
|
445 |
+
|
446 |
+
def p_sample_loop(
|
447 |
+
self,
|
448 |
+
model,
|
449 |
+
shape,
|
450 |
+
noise=None,
|
451 |
+
clip_denoised=True,
|
452 |
+
denoised_fn=None,
|
453 |
+
cond_fn=None,
|
454 |
+
model_kwargs=None,
|
455 |
+
device=None,
|
456 |
+
progress=False,
|
457 |
+
):
|
458 |
+
"""
|
459 |
+
Generate samples from the model.
|
460 |
+
|
461 |
+
:param model: the model module.
|
462 |
+
:param shape: the shape of the samples, (N, C, H, W).
|
463 |
+
:param noise: if specified, the noise from the encoder to sample.
|
464 |
+
Should be of the same shape as `shape`.
|
465 |
+
:param clip_denoised: if True, clip x_start predictions to [-1, 1].
|
466 |
+
:param denoised_fn: if not None, a function which applies to the
|
467 |
+
x_start prediction before it is used to sample.
|
468 |
+
:param cond_fn: if not None, this is a gradient function that acts
|
469 |
+
similarly to the model.
|
470 |
+
:param model_kwargs: if not None, a dict of extra keyword arguments to
|
471 |
+
pass to the model. This can be used for conditioning.
|
472 |
+
:param device: if specified, the device to create the samples on.
|
473 |
+
If not specified, use a model parameter's device.
|
474 |
+
:param progress: if True, show a tqdm progress bar.
|
475 |
+
:return: a non-differentiable batch of samples.
|
476 |
+
"""
|
477 |
+
final = None
|
478 |
+
for sample in self.p_sample_loop_progressive(
|
479 |
+
model,
|
480 |
+
shape,
|
481 |
+
noise=noise,
|
482 |
+
clip_denoised=clip_denoised,
|
483 |
+
denoised_fn=denoised_fn,
|
484 |
+
cond_fn=cond_fn,
|
485 |
+
model_kwargs=model_kwargs,
|
486 |
+
device=device,
|
487 |
+
progress=progress,
|
488 |
+
):
|
489 |
+
final = sample
|
490 |
+
return final["sample"]
|
491 |
+
|
492 |
+
def p_sample_loop_progressive(
|
493 |
+
self,
|
494 |
+
model,
|
495 |
+
shape,
|
496 |
+
noise=None,
|
497 |
+
clip_denoised=True,
|
498 |
+
denoised_fn=None,
|
499 |
+
cond_fn=None,
|
500 |
+
model_kwargs=None,
|
501 |
+
device=None,
|
502 |
+
progress=False,
|
503 |
+
):
|
504 |
+
"""
|
505 |
+
Generate samples from the model and yield intermediate samples from
|
506 |
+
each timestep of diffusion.
|
507 |
+
|
508 |
+
Arguments are the same as p_sample_loop().
|
509 |
+
Returns a generator over dicts, where each dict is the return value of
|
510 |
+
p_sample().
|
511 |
+
"""
|
512 |
+
if device is None:
|
513 |
+
device = next(model.parameters()).device
|
514 |
+
assert isinstance(shape, (tuple, list))
|
515 |
+
if noise is not None:
|
516 |
+
img = noise
|
517 |
+
else:
|
518 |
+
img = th.randn(*shape, device=device)
|
519 |
+
indices = list(range(self.num_timesteps))[::-1]
|
520 |
+
|
521 |
+
if progress:
|
522 |
+
# Lazy import so that we don't depend on tqdm.
|
523 |
+
from tqdm.auto import tqdm
|
524 |
+
|
525 |
+
indices = tqdm(indices)
|
526 |
+
|
527 |
+
for i in indices:
|
528 |
+
t = th.tensor([i] * shape[0], device=device)
|
529 |
+
with th.no_grad():
|
530 |
+
out = self.p_sample(
|
531 |
+
model,
|
532 |
+
img,
|
533 |
+
t,
|
534 |
+
clip_denoised=clip_denoised,
|
535 |
+
denoised_fn=denoised_fn,
|
536 |
+
cond_fn=cond_fn,
|
537 |
+
model_kwargs=model_kwargs,
|
538 |
+
)
|
539 |
+
yield out
|
540 |
+
img = out["sample"]
|
541 |
+
|
542 |
+
def ddim_sample(
|
543 |
+
self,
|
544 |
+
model,
|
545 |
+
x,
|
546 |
+
t,
|
547 |
+
clip_denoised=True,
|
548 |
+
denoised_fn=None,
|
549 |
+
cond_fn=None,
|
550 |
+
model_kwargs=None,
|
551 |
+
eta=0.0,
|
552 |
+
):
|
553 |
+
"""
|
554 |
+
Sample x_{t-1} from the model using DDIM.
|
555 |
+
|
556 |
+
Same usage as p_sample().
|
557 |
+
"""
|
558 |
+
out = self.p_mean_variance(
|
559 |
+
model,
|
560 |
+
x,
|
561 |
+
t,
|
562 |
+
clip_denoised=clip_denoised,
|
563 |
+
denoised_fn=denoised_fn,
|
564 |
+
model_kwargs=model_kwargs,
|
565 |
+
)
|
566 |
+
if cond_fn is not None:
|
567 |
+
out = self.condition_score(cond_fn, out, x, t, model_kwargs=model_kwargs)
|
568 |
+
|
569 |
+
# Usually our model outputs epsilon, but we re-derive it
|
570 |
+
# in case we used x_start or x_prev prediction.
|
571 |
+
eps = self._predict_eps_from_xstart(x, t, out["pred_xstart"])
|
572 |
+
|
573 |
+
alpha_bar = _extract_into_tensor(self.alphas_cumprod, t, x.shape)
|
574 |
+
alpha_bar_prev = _extract_into_tensor(self.alphas_cumprod_prev, t, x.shape)
|
575 |
+
sigma = (
|
576 |
+
eta
|
577 |
+
* th.sqrt((1 - alpha_bar_prev) / (1 - alpha_bar))
|
578 |
+
* th.sqrt(1 - alpha_bar / alpha_bar_prev)
|
579 |
+
)
|
580 |
+
# Equation 12.
|
581 |
+
noise = th.randn_like(x)
|
582 |
+
mean_pred = (
|
583 |
+
out["pred_xstart"] * th.sqrt(alpha_bar_prev)
|
584 |
+
+ th.sqrt(1 - alpha_bar_prev - sigma ** 2) * eps
|
585 |
+
)
|
586 |
+
nonzero_mask = (
|
587 |
+
(t != 0).float().view(-1, *([1] * (len(x.shape) - 1)))
|
588 |
+
) # no noise when t == 0
|
589 |
+
sample = mean_pred + nonzero_mask * sigma * noise
|
590 |
+
return {"sample": sample, "pred_xstart": out["pred_xstart"]}
|
591 |
+
|
592 |
+
def ddim_reverse_sample(
|
593 |
+
self,
|
594 |
+
model,
|
595 |
+
x,
|
596 |
+
t,
|
597 |
+
clip_denoised=True,
|
598 |
+
denoised_fn=None,
|
599 |
+
model_kwargs=None,
|
600 |
+
eta=0.0,
|
601 |
+
):
|
602 |
+
"""
|
603 |
+
Sample x_{t+1} from the model using DDIM reverse ODE.
|
604 |
+
"""
|
605 |
+
assert eta == 0.0, "Reverse ODE only for deterministic path"
|
606 |
+
out = self.p_mean_variance(
|
607 |
+
model,
|
608 |
+
x,
|
609 |
+
t,
|
610 |
+
clip_denoised=clip_denoised,
|
611 |
+
denoised_fn=denoised_fn,
|
612 |
+
model_kwargs=model_kwargs,
|
613 |
+
)
|
614 |
+
# Usually our model outputs epsilon, but we re-derive it
|
615 |
+
# in case we used x_start or x_prev prediction.
|
616 |
+
eps = (
|
617 |
+
_extract_into_tensor(self.sqrt_recip_alphas_cumprod, t, x.shape) * x
|
618 |
+
- out["pred_xstart"]
|
619 |
+
) / _extract_into_tensor(self.sqrt_recipm1_alphas_cumprod, t, x.shape)
|
620 |
+
alpha_bar_next = _extract_into_tensor(self.alphas_cumprod_next, t, x.shape)
|
621 |
+
|
622 |
+
# Equation 12. reversed
|
623 |
+
mean_pred = (
|
624 |
+
out["pred_xstart"] * th.sqrt(alpha_bar_next)
|
625 |
+
+ th.sqrt(1 - alpha_bar_next) * eps
|
626 |
+
)
|
627 |
+
|
628 |
+
return {"sample": mean_pred, "pred_xstart": out["pred_xstart"]}
|
629 |
+
|
630 |
+
def ddim_sample_loop(
|
631 |
+
self,
|
632 |
+
model,
|
633 |
+
shape,
|
634 |
+
noise=None,
|
635 |
+
clip_denoised=True,
|
636 |
+
denoised_fn=None,
|
637 |
+
cond_fn=None,
|
638 |
+
model_kwargs=None,
|
639 |
+
device=None,
|
640 |
+
progress=False,
|
641 |
+
eta=0.0,
|
642 |
+
):
|
643 |
+
"""
|
644 |
+
Generate samples from the model using DDIM.
|
645 |
+
|
646 |
+
Same usage as p_sample_loop().
|
647 |
+
"""
|
648 |
+
final = None
|
649 |
+
for sample in self.ddim_sample_loop_progressive(
|
650 |
+
model,
|
651 |
+
shape,
|
652 |
+
noise=noise,
|
653 |
+
clip_denoised=clip_denoised,
|
654 |
+
denoised_fn=denoised_fn,
|
655 |
+
cond_fn=cond_fn,
|
656 |
+
model_kwargs=model_kwargs,
|
657 |
+
device=device,
|
658 |
+
progress=progress,
|
659 |
+
eta=eta,
|
660 |
+
):
|
661 |
+
final = sample
|
662 |
+
return final["sample"]
|
663 |
+
|
664 |
+
def ddim_sample_loop_progressive(
|
665 |
+
self,
|
666 |
+
model,
|
667 |
+
shape,
|
668 |
+
noise=None,
|
669 |
+
clip_denoised=True,
|
670 |
+
denoised_fn=None,
|
671 |
+
cond_fn=None,
|
672 |
+
model_kwargs=None,
|
673 |
+
device=None,
|
674 |
+
progress=False,
|
675 |
+
eta=0.0,
|
676 |
+
):
|
677 |
+
"""
|
678 |
+
Use DDIM to sample from the model and yield intermediate samples from
|
679 |
+
each timestep of DDIM.
|
680 |
+
|
681 |
+
Same usage as p_sample_loop_progressive().
|
682 |
+
"""
|
683 |
+
if device is None:
|
684 |
+
device = next(model.parameters()).device
|
685 |
+
assert isinstance(shape, (tuple, list))
|
686 |
+
if noise is not None:
|
687 |
+
img = noise
|
688 |
+
else:
|
689 |
+
img = th.randn(*shape, device=device)
|
690 |
+
indices = list(range(self.num_timesteps))[::-1]
|
691 |
+
|
692 |
+
if progress:
|
693 |
+
# Lazy import so that we don't depend on tqdm.
|
694 |
+
from tqdm.auto import tqdm
|
695 |
+
|
696 |
+
indices = tqdm(indices)
|
697 |
+
|
698 |
+
for i in indices:
|
699 |
+
t = th.tensor([i] * shape[0], device=device)
|
700 |
+
with th.no_grad():
|
701 |
+
out = self.ddim_sample(
|
702 |
+
model,
|
703 |
+
img,
|
704 |
+
t,
|
705 |
+
clip_denoised=clip_denoised,
|
706 |
+
denoised_fn=denoised_fn,
|
707 |
+
cond_fn=cond_fn,
|
708 |
+
model_kwargs=model_kwargs,
|
709 |
+
eta=eta,
|
710 |
+
)
|
711 |
+
yield out
|
712 |
+
img = out["sample"]
|
713 |
+
|
714 |
+
def _vb_terms_bpd(
|
715 |
+
self, model, x_start, x_t, t, clip_denoised=True, model_kwargs=None
|
716 |
+
):
|
717 |
+
"""
|
718 |
+
Get a term for the variational lower-bound.
|
719 |
+
|
720 |
+
The resulting units are bits (rather than nats, as one might expect).
|
721 |
+
This allows for comparison to other papers.
|
722 |
+
|
723 |
+
:return: a dict with the following keys:
|
724 |
+
- 'output': a shape [N] tensor of NLLs or KLs.
|
725 |
+
- 'pred_xstart': the x_0 predictions.
|
726 |
+
"""
|
727 |
+
true_mean, _, true_log_variance_clipped = self.q_posterior_mean_variance(
|
728 |
+
x_start=x_start, x_t=x_t, t=t
|
729 |
+
)
|
730 |
+
out = self.p_mean_variance(
|
731 |
+
model, x_t, t, clip_denoised=clip_denoised, model_kwargs=model_kwargs
|
732 |
+
)
|
733 |
+
kl = normal_kl(
|
734 |
+
true_mean, true_log_variance_clipped, out["mean"], out["log_variance"]
|
735 |
+
)
|
736 |
+
if ("cond_img" in model_kwargs) and ("mask" in model_kwargs): #added by soumik
|
737 |
+
kl = kl*model_kwargs["mask"]
|
738 |
+
kl = mean_flat(kl) / np.log(2.0)
|
739 |
+
|
740 |
+
decoder_nll = -discretized_gaussian_log_likelihood(
|
741 |
+
x_start, means=out["mean"], log_scales=0.5 * out["log_variance"]
|
742 |
+
)
|
743 |
+
assert decoder_nll.shape == x_start.shape
|
744 |
+
if ("cond_img" in model_kwargs) and ("mask" in model_kwargs): #added by soumik
|
745 |
+
decoder_nll=decoder_nll*model_kwargs["mask"]
|
746 |
+
decoder_nll = mean_flat(decoder_nll) / np.log(2.0)
|
747 |
+
|
748 |
+
# At the first timestep return the decoder NLL,
|
749 |
+
# otherwise return KL(q(x_{t-1}|x_t,x_0) || p(x_{t-1}|x_t))
|
750 |
+
output = th.where((t == 0), decoder_nll, kl)
|
751 |
+
return {"output": output, "pred_xstart": out["pred_xstart"]}
|
752 |
+
|
753 |
+
|
754 |
+
def _prior_bpd(self, x_start):
|
755 |
+
"""
|
756 |
+
Get the prior KL term for the variational lower-bound, measured in
|
757 |
+
bits-per-dim.
|
758 |
+
|
759 |
+
This term can't be optimized, as it only depends on the encoder.
|
760 |
+
|
761 |
+
:param x_start: the [N x C x ...] tensor of inputs.
|
762 |
+
:return: a batch of [N] KL values (in bits), one per batch element.
|
763 |
+
"""
|
764 |
+
batch_size = x_start.shape[0]
|
765 |
+
t = th.tensor([self.num_timesteps - 1] * batch_size, device=x_start.device)
|
766 |
+
qt_mean, _, qt_log_variance = self.q_mean_variance(x_start, t)
|
767 |
+
kl_prior = normal_kl(
|
768 |
+
mean1=qt_mean, logvar1=qt_log_variance, mean2=0.0, logvar2=0.0
|
769 |
+
)
|
770 |
+
return mean_flat(kl_prior) / np.log(2.0)
|
771 |
+
|
772 |
+
def calc_bpd_loop(self, model, x_start, clip_denoised=True, model_kwargs=None):
|
773 |
+
"""
|
774 |
+
Compute the entire variational lower-bound, measured in bits-per-dim,
|
775 |
+
as well as other related quantities.
|
776 |
+
|
777 |
+
:param model: the model to evaluate loss on.
|
778 |
+
:param x_start: the [N x C x ...] tensor of inputs.
|
779 |
+
:param clip_denoised: if True, clip denoised samples.
|
780 |
+
:param model_kwargs: if not None, a dict of extra keyword arguments to
|
781 |
+
pass to the model. This can be used for conditioning.
|
782 |
+
|
783 |
+
:return: a dict containing the following keys:
|
784 |
+
- total_bpd: the total variational lower-bound, per batch element.
|
785 |
+
- prior_bpd: the prior term in the lower-bound.
|
786 |
+
- vb: an [N x T] tensor of terms in the lower-bound.
|
787 |
+
- xstart_mse: an [N x T] tensor of x_0 MSEs for each timestep.
|
788 |
+
- mse: an [N x T] tensor of epsilon MSEs for each timestep.
|
789 |
+
"""
|
790 |
+
device = x_start.device
|
791 |
+
batch_size = x_start.shape[0]
|
792 |
+
|
793 |
+
vb = []
|
794 |
+
xstart_mse = []
|
795 |
+
mse = []
|
796 |
+
for t in list(range(self.num_timesteps))[::-1]:
|
797 |
+
t_batch = th.tensor([t] * batch_size, device=device)
|
798 |
+
noise = th.randn_like(x_start)
|
799 |
+
x_t = self.q_sample(x_start=x_start, t=t_batch, noise=noise)
|
800 |
+
# Calculate VLB term at the current timestep
|
801 |
+
with th.no_grad():
|
802 |
+
out = self._vb_terms_bpd(
|
803 |
+
model,
|
804 |
+
x_start=x_start,
|
805 |
+
x_t=x_t,
|
806 |
+
t=t_batch,
|
807 |
+
clip_denoised=clip_denoised,
|
808 |
+
model_kwargs=model_kwargs,
|
809 |
+
)
|
810 |
+
vb.append(out["output"])
|
811 |
+
xstart_mse.append(mean_flat((out["pred_xstart"] - x_start) ** 2))
|
812 |
+
eps = self._predict_eps_from_xstart(x_t, t_batch, out["pred_xstart"])
|
813 |
+
mse.append(mean_flat((eps - noise) ** 2))
|
814 |
+
|
815 |
+
vb = th.stack(vb, dim=1)
|
816 |
+
xstart_mse = th.stack(xstart_mse, dim=1)
|
817 |
+
mse = th.stack(mse, dim=1)
|
818 |
+
|
819 |
+
prior_bpd = self._prior_bpd(x_start)
|
820 |
+
total_bpd = vb.sum(dim=1) + prior_bpd
|
821 |
+
return {
|
822 |
+
"total_bpd": total_bpd,
|
823 |
+
"prior_bpd": prior_bpd,
|
824 |
+
"vb": vb,
|
825 |
+
"xstart_mse": xstart_mse,
|
826 |
+
"mse": mse,
|
827 |
+
}
|
828 |
+
|
829 |
+
|
830 |
+
def _extract_into_tensor(arr, timesteps, broadcast_shape):
|
831 |
+
"""
|
832 |
+
Extract values from a 1-D numpy array for a batch of indices.
|
833 |
+
|
834 |
+
:param arr: the 1-D numpy array.
|
835 |
+
:param timesteps: a tensor of indices into the array to extract.
|
836 |
+
:param broadcast_shape: a larger shape of K dimensions with the batch
|
837 |
+
dimension equal to the length of timesteps.
|
838 |
+
:return: a tensor of shape [batch_size, 1, ...] where the shape has K dims.
|
839 |
+
"""
|
840 |
+
res = th.from_numpy(arr).to(device=timesteps.device)[timesteps].float()
|
841 |
+
while len(res.shape) < len(broadcast_shape):
|
842 |
+
res = res[..., None]
|
843 |
+
return res.expand(broadcast_shape)
|
guided-diffusion/guided_diffusion/image_datasets.py
ADDED
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import math
|
2 |
+
import random
|
3 |
+
|
4 |
+
from PIL import Image
|
5 |
+
import blobfile as bf
|
6 |
+
from mpi4py import MPI
|
7 |
+
import numpy as np
|
8 |
+
from torch.utils.data import DataLoader, Dataset
|
9 |
+
|
10 |
+
|
11 |
+
def load_data(
|
12 |
+
*,
|
13 |
+
data_dir,
|
14 |
+
batch_size,
|
15 |
+
image_size,
|
16 |
+
class_cond=False,
|
17 |
+
deterministic=False,
|
18 |
+
random_crop=False,
|
19 |
+
random_flip=True,
|
20 |
+
):
|
21 |
+
"""
|
22 |
+
For a dataset, create a generator over (images, kwargs) pairs.
|
23 |
+
|
24 |
+
Each images is an NCHW float tensor, and the kwargs dict contains zero or
|
25 |
+
more keys, each of which map to a batched Tensor of their own.
|
26 |
+
The kwargs dict can be used for class labels, in which case the key is "y"
|
27 |
+
and the values are integer tensors of class labels.
|
28 |
+
|
29 |
+
:param data_dir: a dataset directory.
|
30 |
+
:param batch_size: the batch size of each returned pair.
|
31 |
+
:param image_size: the size to which images are resized.
|
32 |
+
:param class_cond: if True, include a "y" key in returned dicts for class
|
33 |
+
label. If classes are not available and this is true, an
|
34 |
+
exception will be raised.
|
35 |
+
:param deterministic: if True, yield results in a deterministic order.
|
36 |
+
:param random_crop: if True, randomly crop the images for augmentation.
|
37 |
+
:param random_flip: if True, randomly flip the images for augmentation.
|
38 |
+
"""
|
39 |
+
if not data_dir:
|
40 |
+
raise ValueError("unspecified data directory")
|
41 |
+
all_files = _list_image_files_recursively(data_dir)
|
42 |
+
classes = None
|
43 |
+
if class_cond:
|
44 |
+
# Assume classes are the first part of the filename,
|
45 |
+
# before an underscore.
|
46 |
+
class_names = [bf.basename(path).split("_")[0] for path in all_files]
|
47 |
+
sorted_classes = {x: i for i, x in enumerate(sorted(set(class_names)))}
|
48 |
+
classes = [sorted_classes[x] for x in class_names]
|
49 |
+
dataset = ImageDataset(
|
50 |
+
image_size,
|
51 |
+
all_files,
|
52 |
+
classes=classes,
|
53 |
+
shard=MPI.COMM_WORLD.Get_rank(),
|
54 |
+
num_shards=MPI.COMM_WORLD.Get_size(),
|
55 |
+
random_crop=random_crop,
|
56 |
+
random_flip=random_flip,
|
57 |
+
)
|
58 |
+
if deterministic:
|
59 |
+
loader = DataLoader(
|
60 |
+
dataset, batch_size=batch_size, shuffle=False, num_workers=1, drop_last=True
|
61 |
+
)
|
62 |
+
else:
|
63 |
+
loader = DataLoader(
|
64 |
+
dataset, batch_size=batch_size, shuffle=True, num_workers=1, drop_last=True
|
65 |
+
)
|
66 |
+
while True:
|
67 |
+
yield from loader
|
68 |
+
|
69 |
+
|
70 |
+
def _list_image_files_recursively(data_dir):
|
71 |
+
results = []
|
72 |
+
for entry in sorted(bf.listdir(data_dir)):
|
73 |
+
full_path = bf.join(data_dir, entry)
|
74 |
+
ext = entry.split(".")[-1]
|
75 |
+
if "." in entry and ext.lower() in ["jpg", "jpeg", "png", "gif"]:
|
76 |
+
results.append(full_path)
|
77 |
+
elif bf.isdir(full_path):
|
78 |
+
results.extend(_list_image_files_recursively(full_path))
|
79 |
+
return results
|
80 |
+
|
81 |
+
|
82 |
+
class ImageDataset(Dataset):
|
83 |
+
def __init__(
|
84 |
+
self,
|
85 |
+
resolution,
|
86 |
+
image_paths,
|
87 |
+
classes=None,
|
88 |
+
shard=0,
|
89 |
+
num_shards=1,
|
90 |
+
random_crop=False,
|
91 |
+
random_flip=True,
|
92 |
+
):
|
93 |
+
super().__init__()
|
94 |
+
self.resolution = resolution
|
95 |
+
self.local_images = image_paths[shard:][::num_shards]
|
96 |
+
self.local_classes = None if classes is None else classes[shard:][::num_shards]
|
97 |
+
self.random_crop = random_crop
|
98 |
+
self.random_flip = random_flip
|
99 |
+
|
100 |
+
def __len__(self):
|
101 |
+
return len(self.local_images)
|
102 |
+
|
103 |
+
def __getitem__(self, idx):
|
104 |
+
path = self.local_images[idx]
|
105 |
+
with bf.BlobFile(path, "rb") as f:
|
106 |
+
pil_image = Image.open(f)
|
107 |
+
pil_image.load()
|
108 |
+
pil_image = pil_image.convert("RGB")
|
109 |
+
|
110 |
+
if self.random_crop:
|
111 |
+
arr = random_crop_arr(pil_image, self.resolution)
|
112 |
+
else:
|
113 |
+
arr = center_crop_arr(pil_image, self.resolution)
|
114 |
+
|
115 |
+
if self.random_flip and random.random() < 0.5:
|
116 |
+
arr = arr[:, ::-1]
|
117 |
+
|
118 |
+
arr = arr.astype(np.float32) / 127.5 - 1
|
119 |
+
|
120 |
+
out_dict = {}
|
121 |
+
if self.local_classes is not None:
|
122 |
+
out_dict["y"] = np.array(self.local_classes[idx], dtype=np.int64)
|
123 |
+
return np.transpose(arr, [2, 0, 1]), out_dict
|
124 |
+
|
125 |
+
|
126 |
+
def center_crop_arr(pil_image, image_size):
|
127 |
+
# We are not on a new enough PIL to support the `reducing_gap`
|
128 |
+
# argument, which uses BOX downsampling at powers of two first.
|
129 |
+
# Thus, we do it by hand to improve downsample quality.
|
130 |
+
while min(*pil_image.size) >= 2 * image_size:
|
131 |
+
pil_image = pil_image.resize(
|
132 |
+
tuple(x // 2 for x in pil_image.size), resample=Image.BOX
|
133 |
+
)
|
134 |
+
|
135 |
+
scale = image_size / min(*pil_image.size)
|
136 |
+
pil_image = pil_image.resize(
|
137 |
+
tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
|
138 |
+
)
|
139 |
+
|
140 |
+
arr = np.array(pil_image)
|
141 |
+
crop_y = (arr.shape[0] - image_size) // 2
|
142 |
+
crop_x = (arr.shape[1] - image_size) // 2
|
143 |
+
return arr[crop_y : crop_y + image_size, crop_x : crop_x + image_size]
|
144 |
+
|
145 |
+
|
146 |
+
def random_crop_arr(pil_image, image_size, min_crop_frac=0.8, max_crop_frac=1.0):
|
147 |
+
min_smaller_dim_size = math.ceil(image_size / max_crop_frac)
|
148 |
+
max_smaller_dim_size = math.ceil(image_size / min_crop_frac)
|
149 |
+
smaller_dim_size = random.randrange(min_smaller_dim_size, max_smaller_dim_size + 1)
|
150 |
+
|
151 |
+
# We are not on a new enough PIL to support the `reducing_gap`
|
152 |
+
# argument, which uses BOX downsampling at powers of two first.
|
153 |
+
# Thus, we do it by hand to improve downsample quality.
|
154 |
+
while min(*pil_image.size) >= 2 * smaller_dim_size:
|
155 |
+
pil_image = pil_image.resize(
|
156 |
+
tuple(x // 2 for x in pil_image.size), resample=Image.BOX
|
157 |
+
)
|
158 |
+
|
159 |
+
scale = smaller_dim_size / min(*pil_image.size)
|
160 |
+
pil_image = pil_image.resize(
|
161 |
+
tuple(round(x * scale) for x in pil_image.size), resample=Image.BICUBIC
|
162 |
+
)
|
163 |
+
|
164 |
+
arr = np.array(pil_image)
|
165 |
+
crop_y = random.randrange(arr.shape[0] - image_size + 1)
|
166 |
+
crop_x = random.randrange(arr.shape[1] - image_size + 1)
|
167 |
+
return arr[crop_y : crop_y + image_size, crop_x : crop_x + image_size]
|
guided-diffusion/guided_diffusion/logger.py
ADDED
@@ -0,0 +1,491 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Logger copied from OpenAI baselines to avoid extra RL-based dependencies:
|
3 |
+
https://github.com/openai/baselines/blob/ea25b9e8b234e6ee1bca43083f8f3cf974143998/baselines/logger.py
|
4 |
+
"""
|
5 |
+
|
6 |
+
import os
|
7 |
+
import sys
|
8 |
+
import shutil
|
9 |
+
import os.path as osp
|
10 |
+
import json
|
11 |
+
import time
|
12 |
+
import datetime
|
13 |
+
import tempfile
|
14 |
+
import warnings
|
15 |
+
from collections import defaultdict
|
16 |
+
from contextlib import contextmanager
|
17 |
+
from torch.utils.tensorboard import SummaryWriter
|
18 |
+
|
19 |
+
DEBUG = 10
|
20 |
+
INFO = 20
|
21 |
+
WARN = 30
|
22 |
+
ERROR = 40
|
23 |
+
|
24 |
+
DISABLED = 50
|
25 |
+
|
26 |
+
|
27 |
+
class KVWriter(object):
|
28 |
+
def writekvs(self, kvs):
|
29 |
+
raise NotImplementedError
|
30 |
+
|
31 |
+
|
32 |
+
class SeqWriter(object):
|
33 |
+
def writeseq(self, seq):
|
34 |
+
raise NotImplementedError
|
35 |
+
|
36 |
+
|
37 |
+
class HumanOutputFormat(KVWriter, SeqWriter):
|
38 |
+
def __init__(self, filename_or_file):
|
39 |
+
if isinstance(filename_or_file, str):
|
40 |
+
self.file = open(filename_or_file, "wt")
|
41 |
+
self.own_file = True
|
42 |
+
else:
|
43 |
+
assert hasattr(filename_or_file, "read"), (
|
44 |
+
"expected file or str, got %s" % filename_or_file
|
45 |
+
)
|
46 |
+
self.file = filename_or_file
|
47 |
+
self.own_file = False
|
48 |
+
|
49 |
+
def writekvs(self, kvs):
|
50 |
+
# Create strings for printing
|
51 |
+
key2str = {}
|
52 |
+
for (key, val) in sorted(kvs.items()):
|
53 |
+
if hasattr(val, "__float__"):
|
54 |
+
valstr = "%-8.3g" % val
|
55 |
+
else:
|
56 |
+
valstr = str(val)
|
57 |
+
key2str[self._truncate(key)] = self._truncate(valstr)
|
58 |
+
|
59 |
+
# Find max widths
|
60 |
+
if len(key2str) == 0:
|
61 |
+
print("WARNING: tried to write empty key-value dict")
|
62 |
+
return
|
63 |
+
else:
|
64 |
+
keywidth = max(map(len, key2str.keys()))
|
65 |
+
valwidth = max(map(len, key2str.values()))
|
66 |
+
|
67 |
+
# Write out the data
|
68 |
+
dashes = "-" * (keywidth + valwidth + 7)
|
69 |
+
lines = [dashes]
|
70 |
+
for (key, val) in sorted(key2str.items(), key=lambda kv: kv[0].lower()):
|
71 |
+
lines.append(
|
72 |
+
"| %s%s | %s%s |"
|
73 |
+
% (key, " " * (keywidth - len(key)), val, " " * (valwidth - len(val)))
|
74 |
+
)
|
75 |
+
lines.append(dashes)
|
76 |
+
self.file.write("\n".join(lines) + "\n")
|
77 |
+
|
78 |
+
# Flush the output to the file
|
79 |
+
self.file.flush()
|
80 |
+
|
81 |
+
def _truncate(self, s):
|
82 |
+
maxlen = 30
|
83 |
+
return s[: maxlen - 3] + "..." if len(s) > maxlen else s
|
84 |
+
|
85 |
+
def writeseq(self, seq):
|
86 |
+
seq = list(seq)
|
87 |
+
for (i, elem) in enumerate(seq):
|
88 |
+
self.file.write(elem)
|
89 |
+
if i < len(seq) - 1: # add space unless this is the last one
|
90 |
+
self.file.write(" ")
|
91 |
+
self.file.write("\n")
|
92 |
+
self.file.flush()
|
93 |
+
|
94 |
+
def close(self):
|
95 |
+
if self.own_file:
|
96 |
+
self.file.close()
|
97 |
+
|
98 |
+
|
99 |
+
class JSONOutputFormat(KVWriter):
|
100 |
+
def __init__(self, filename):
|
101 |
+
self.file = open(filename, "wt")
|
102 |
+
|
103 |
+
def writekvs(self, kvs):
|
104 |
+
for k, v in sorted(kvs.items()):
|
105 |
+
if hasattr(v, "dtype"):
|
106 |
+
kvs[k] = float(v)
|
107 |
+
self.file.write(json.dumps(kvs) + "\n")
|
108 |
+
self.file.flush()
|
109 |
+
|
110 |
+
def close(self):
|
111 |
+
self.file.close()
|
112 |
+
|
113 |
+
|
114 |
+
class CSVOutputFormat(KVWriter):
|
115 |
+
def __init__(self, filename):
|
116 |
+
self.file = open(filename, "w+t")
|
117 |
+
self.keys = []
|
118 |
+
self.sep = ","
|
119 |
+
|
120 |
+
def writekvs(self, kvs):
|
121 |
+
# Add our current row to the history
|
122 |
+
extra_keys = list(kvs.keys() - self.keys)
|
123 |
+
extra_keys.sort()
|
124 |
+
if extra_keys:
|
125 |
+
self.keys.extend(extra_keys)
|
126 |
+
self.file.seek(0)
|
127 |
+
lines = self.file.readlines()
|
128 |
+
self.file.seek(0)
|
129 |
+
for (i, k) in enumerate(self.keys):
|
130 |
+
if i > 0:
|
131 |
+
self.file.write(",")
|
132 |
+
self.file.write(k)
|
133 |
+
self.file.write("\n")
|
134 |
+
for line in lines[1:]:
|
135 |
+
self.file.write(line[:-1])
|
136 |
+
self.file.write(self.sep * len(extra_keys))
|
137 |
+
self.file.write("\n")
|
138 |
+
for (i, k) in enumerate(self.keys):
|
139 |
+
if i > 0:
|
140 |
+
self.file.write(",")
|
141 |
+
v = kvs.get(k)
|
142 |
+
if v is not None:
|
143 |
+
self.file.write(str(v))
|
144 |
+
self.file.write("\n")
|
145 |
+
self.file.flush()
|
146 |
+
|
147 |
+
def close(self):
|
148 |
+
self.file.close()
|
149 |
+
|
150 |
+
|
151 |
+
class TensorBoardOutputFormat(KVWriter):
|
152 |
+
"""
|
153 |
+
Dumps key/value pairs into TensorBoard's numeric format.
|
154 |
+
"""
|
155 |
+
|
156 |
+
def __init__(self, dir):
|
157 |
+
os.makedirs(dir, exist_ok=True)
|
158 |
+
self.dir = dir
|
159 |
+
self.step = -1
|
160 |
+
self.writer = SummaryWriter(self.dir)
|
161 |
+
|
162 |
+
def writekvs(self, kvs):
|
163 |
+
self.step = int(kvs["step"])
|
164 |
+
for k, v in sorted(kvs.items()):
|
165 |
+
self.writer.add_scalar(k, float(v), self.step)
|
166 |
+
self.writer.flush()
|
167 |
+
|
168 |
+
def writeimage(self, key, image_tensor):
|
169 |
+
self.writer.add_image(key, image_tensor, self.step)
|
170 |
+
self.writer.flush()
|
171 |
+
|
172 |
+
def close(self):
|
173 |
+
if self.writer:
|
174 |
+
self.writer.close()
|
175 |
+
self.writer = None
|
176 |
+
|
177 |
+
|
178 |
+
def make_output_format(format, ev_dir, log_suffix=""):
|
179 |
+
os.makedirs(ev_dir, exist_ok=True)
|
180 |
+
if format == "stdout":
|
181 |
+
return HumanOutputFormat(sys.stdout)
|
182 |
+
elif format == "log":
|
183 |
+
return HumanOutputFormat(osp.join(ev_dir, "log%s.txt" % log_suffix))
|
184 |
+
elif format == "json":
|
185 |
+
return JSONOutputFormat(osp.join(ev_dir, "progress%s.json" % log_suffix))
|
186 |
+
elif format == "csv":
|
187 |
+
return CSVOutputFormat(osp.join(ev_dir, "progress%s.csv" % log_suffix))
|
188 |
+
elif format == "tensorboard":
|
189 |
+
return TensorBoardOutputFormat(osp.join(ev_dir, "tb%s" % log_suffix))
|
190 |
+
else:
|
191 |
+
raise ValueError("Unknown format specified: %s" % (format,))
|
192 |
+
|
193 |
+
|
194 |
+
# ================================================================
|
195 |
+
# API
|
196 |
+
# ================================================================
|
197 |
+
|
198 |
+
def logimage(key, image_tensor):
|
199 |
+
"""
|
200 |
+
Log one image to tensorboard
|
201 |
+
"""
|
202 |
+
for fmt in get_current().output_formats:
|
203 |
+
if isinstance(fmt, TensorBoardOutputFormat):
|
204 |
+
tb_logger = fmt
|
205 |
+
tb_logger.writeimage(key, image_tensor)
|
206 |
+
|
207 |
+
|
208 |
+
def logkv(key, val):
|
209 |
+
"""
|
210 |
+
Log a value of some diagnostic
|
211 |
+
Call this once for each diagnostic quantity, each iteration
|
212 |
+
If called many times, last value will be used.
|
213 |
+
"""
|
214 |
+
get_current().logkv(key, val)
|
215 |
+
|
216 |
+
|
217 |
+
def logkv_mean(key, val):
|
218 |
+
"""
|
219 |
+
The same as logkv(), but if called many times, values averaged.
|
220 |
+
"""
|
221 |
+
get_current().logkv_mean(key, val)
|
222 |
+
|
223 |
+
|
224 |
+
def logkvs(d):
|
225 |
+
"""
|
226 |
+
Log a dictionary of key-value pairs
|
227 |
+
"""
|
228 |
+
for (k, v) in d.items():
|
229 |
+
logkv(k, v)
|
230 |
+
|
231 |
+
|
232 |
+
def dumpkvs():
|
233 |
+
"""
|
234 |
+
Write all of the diagnostics from the current iteration
|
235 |
+
"""
|
236 |
+
return get_current().dumpkvs()
|
237 |
+
|
238 |
+
|
239 |
+
def getkvs():
|
240 |
+
return get_current().name2val
|
241 |
+
|
242 |
+
|
243 |
+
def log(*args, level=INFO):
|
244 |
+
"""
|
245 |
+
Write the sequence of args, with no separators, to the console and output files (if you've configured an output file).
|
246 |
+
"""
|
247 |
+
get_current().log(*args, level=level)
|
248 |
+
|
249 |
+
|
250 |
+
def debug(*args):
|
251 |
+
log(*args, level=DEBUG)
|
252 |
+
|
253 |
+
|
254 |
+
def info(*args):
|
255 |
+
log(*args, level=INFO)
|
256 |
+
|
257 |
+
|
258 |
+
def warn(*args):
|
259 |
+
log(*args, level=WARN)
|
260 |
+
|
261 |
+
|
262 |
+
def error(*args):
|
263 |
+
log(*args, level=ERROR)
|
264 |
+
|
265 |
+
|
266 |
+
def set_level(level):
|
267 |
+
"""
|
268 |
+
Set logging threshold on current logger.
|
269 |
+
"""
|
270 |
+
get_current().set_level(level)
|
271 |
+
|
272 |
+
|
273 |
+
def set_comm(comm):
|
274 |
+
get_current().set_comm(comm)
|
275 |
+
|
276 |
+
|
277 |
+
def get_dir():
|
278 |
+
"""
|
279 |
+
Get directory that log files are being written to.
|
280 |
+
will be None if there is no output directory (i.e., if you didn't call start)
|
281 |
+
"""
|
282 |
+
return get_current().get_dir()
|
283 |
+
|
284 |
+
|
285 |
+
record_tabular = logkv
|
286 |
+
dump_tabular = dumpkvs
|
287 |
+
|
288 |
+
|
289 |
+
@contextmanager
|
290 |
+
def profile_kv(scopename):
|
291 |
+
logkey = "wait_" + scopename
|
292 |
+
tstart = time.time()
|
293 |
+
try:
|
294 |
+
yield
|
295 |
+
finally:
|
296 |
+
get_current().name2val[logkey] += time.time() - tstart
|
297 |
+
|
298 |
+
|
299 |
+
def profile(n):
|
300 |
+
"""
|
301 |
+
Usage:
|
302 |
+
@profile("my_func")
|
303 |
+
def my_func(): code
|
304 |
+
"""
|
305 |
+
|
306 |
+
def decorator_with_name(func):
|
307 |
+
def func_wrapper(*args, **kwargs):
|
308 |
+
with profile_kv(n):
|
309 |
+
return func(*args, **kwargs)
|
310 |
+
|
311 |
+
return func_wrapper
|
312 |
+
|
313 |
+
return decorator_with_name
|
314 |
+
|
315 |
+
|
316 |
+
# ================================================================
|
317 |
+
# Backend
|
318 |
+
# ================================================================
|
319 |
+
|
320 |
+
|
321 |
+
def get_current():
|
322 |
+
if Logger.CURRENT is None:
|
323 |
+
_configure_default_logger()
|
324 |
+
|
325 |
+
return Logger.CURRENT
|
326 |
+
|
327 |
+
|
328 |
+
class Logger(object):
|
329 |
+
DEFAULT = None # A logger with no output files. (See right below class definition)
|
330 |
+
# So that you can still log to the terminal without setting up any output files
|
331 |
+
CURRENT = None # Current logger being used by the free functions above
|
332 |
+
|
333 |
+
def __init__(self, dir, output_formats, comm=None):
|
334 |
+
self.name2val = defaultdict(float) # values this iteration
|
335 |
+
self.name2cnt = defaultdict(int)
|
336 |
+
self.level = INFO
|
337 |
+
self.dir = dir
|
338 |
+
self.output_formats = output_formats
|
339 |
+
self.comm = comm
|
340 |
+
|
341 |
+
# Logging API, forwarded
|
342 |
+
# ----------------------------------------
|
343 |
+
def logkv(self, key, val):
|
344 |
+
self.name2val[key] = val
|
345 |
+
|
346 |
+
def logkv_mean(self, key, val):
|
347 |
+
oldval, cnt = self.name2val[key], self.name2cnt[key]
|
348 |
+
self.name2val[key] = oldval * cnt / (cnt + 1) + val / (cnt + 1)
|
349 |
+
self.name2cnt[key] = cnt + 1
|
350 |
+
|
351 |
+
def dumpkvs(self):
|
352 |
+
if self.comm is None:
|
353 |
+
d = self.name2val
|
354 |
+
else:
|
355 |
+
d = mpi_weighted_mean(
|
356 |
+
self.comm,
|
357 |
+
{
|
358 |
+
name: (val, self.name2cnt.get(name, 1))
|
359 |
+
for (name, val) in self.name2val.items()
|
360 |
+
},
|
361 |
+
)
|
362 |
+
if self.comm.rank != 0:
|
363 |
+
d["dummy"] = 1 # so we don't get a warning about empty dict
|
364 |
+
out = d.copy() # Return the dict for unit testing purposes
|
365 |
+
for fmt in self.output_formats:
|
366 |
+
if isinstance(fmt, KVWriter):
|
367 |
+
fmt.writekvs(d)
|
368 |
+
self.name2val.clear()
|
369 |
+
self.name2cnt.clear()
|
370 |
+
return out
|
371 |
+
|
372 |
+
def log(self, *args, level=INFO):
|
373 |
+
if self.level <= level:
|
374 |
+
self._do_log(args)
|
375 |
+
|
376 |
+
# Configuration
|
377 |
+
# ----------------------------------------
|
378 |
+
def set_level(self, level):
|
379 |
+
self.level = level
|
380 |
+
|
381 |
+
def set_comm(self, comm):
|
382 |
+
self.comm = comm
|
383 |
+
|
384 |
+
def get_dir(self):
|
385 |
+
return self.dir
|
386 |
+
|
387 |
+
def close(self):
|
388 |
+
for fmt in self.output_formats:
|
389 |
+
fmt.close()
|
390 |
+
|
391 |
+
# Misc
|
392 |
+
# ----------------------------------------
|
393 |
+
def _do_log(self, args):
|
394 |
+
for fmt in self.output_formats:
|
395 |
+
if isinstance(fmt, SeqWriter):
|
396 |
+
fmt.writeseq(map(str, args))
|
397 |
+
|
398 |
+
|
399 |
+
def get_rank_without_mpi_import():
|
400 |
+
# check environment variables here instead of importing mpi4py
|
401 |
+
# to avoid calling MPI_Init() when this module is imported
|
402 |
+
for varname in ["PMI_RANK", "OMPI_COMM_WORLD_RANK"]:
|
403 |
+
if varname in os.environ:
|
404 |
+
return int(os.environ[varname])
|
405 |
+
return 0
|
406 |
+
|
407 |
+
|
408 |
+
def mpi_weighted_mean(comm, local_name2valcount):
|
409 |
+
"""
|
410 |
+
Copied from: https://github.com/openai/baselines/blob/ea25b9e8b234e6ee1bca43083f8f3cf974143998/baselines/common/mpi_util.py#L110
|
411 |
+
Perform a weighted average over dicts that are each on a different node
|
412 |
+
Input: local_name2valcount: dict mapping key -> (value, count)
|
413 |
+
Returns: key -> mean
|
414 |
+
"""
|
415 |
+
all_name2valcount = comm.gather(local_name2valcount)
|
416 |
+
if comm.rank == 0:
|
417 |
+
name2sum = defaultdict(float)
|
418 |
+
name2count = defaultdict(float)
|
419 |
+
for n2vc in all_name2valcount:
|
420 |
+
for (name, (val, count)) in n2vc.items():
|
421 |
+
try:
|
422 |
+
val = float(val)
|
423 |
+
except ValueError:
|
424 |
+
if comm.rank == 0:
|
425 |
+
warnings.warn(
|
426 |
+
"WARNING: tried to compute mean on non-float {}={}".format(
|
427 |
+
name, val
|
428 |
+
)
|
429 |
+
)
|
430 |
+
else:
|
431 |
+
name2sum[name] += val * count
|
432 |
+
name2count[name] += count
|
433 |
+
return {name: name2sum[name] / name2count[name] for name in name2sum}
|
434 |
+
else:
|
435 |
+
return {}
|
436 |
+
|
437 |
+
|
438 |
+
def configure(dir=None, format_strs=None, comm=None, log_suffix=""):
|
439 |
+
"""
|
440 |
+
If comm is provided, average all numerical stats across that comm
|
441 |
+
"""
|
442 |
+
if dir is None:
|
443 |
+
dir = os.getenv("OPENAI_LOGDIR")
|
444 |
+
if dir is None:
|
445 |
+
dir = osp.join(
|
446 |
+
tempfile.gettempdir(),
|
447 |
+
datetime.datetime.now().strftime("openai-%Y-%m-%d-%H-%M-%S-%f"),
|
448 |
+
)
|
449 |
+
assert isinstance(dir, str)
|
450 |
+
dir = os.path.expanduser(dir)
|
451 |
+
os.makedirs(os.path.expanduser(dir), exist_ok=True)
|
452 |
+
|
453 |
+
rank = get_rank_without_mpi_import()
|
454 |
+
if rank > 0:
|
455 |
+
log_suffix = log_suffix + "-rank%03i" % rank
|
456 |
+
|
457 |
+
if format_strs is None:
|
458 |
+
if rank == 0:
|
459 |
+
format_strs = os.getenv("OPENAI_LOG_FORMAT", "stdout,log,csv,tensorboard").split(",")
|
460 |
+
else:
|
461 |
+
format_strs = os.getenv("OPENAI_LOG_FORMAT_MPI", "log").split(",")
|
462 |
+
format_strs = filter(None, format_strs)
|
463 |
+
output_formats = [make_output_format(f, dir, log_suffix) for f in format_strs]
|
464 |
+
|
465 |
+
Logger.CURRENT = Logger(dir=dir, output_formats=output_formats, comm=comm)
|
466 |
+
if output_formats:
|
467 |
+
log("Logging to %s" % dir)
|
468 |
+
|
469 |
+
|
470 |
+
def _configure_default_logger():
|
471 |
+
configure()
|
472 |
+
Logger.DEFAULT = Logger.CURRENT
|
473 |
+
|
474 |
+
|
475 |
+
def reset():
|
476 |
+
if Logger.CURRENT is not Logger.DEFAULT:
|
477 |
+
Logger.CURRENT.close()
|
478 |
+
Logger.CURRENT = Logger.DEFAULT
|
479 |
+
log("Reset logger")
|
480 |
+
|
481 |
+
|
482 |
+
@contextmanager
|
483 |
+
def scoped_configure(dir=None, format_strs=None, comm=None):
|
484 |
+
prevlogger = Logger.CURRENT
|
485 |
+
configure(dir=dir, format_strs=format_strs, comm=comm)
|
486 |
+
try:
|
487 |
+
yield
|
488 |
+
finally:
|
489 |
+
Logger.CURRENT.close()
|
490 |
+
Logger.CURRENT = prevlogger
|
491 |
+
|
guided-diffusion/guided_diffusion/losses.py
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Helpers for various likelihood-based losses. These are ported from the original
|
3 |
+
Ho et al. diffusion models codebase:
|
4 |
+
https://github.com/hojonathanho/diffusion/blob/1e0dceb3b3495bbe19116a5e1b3596cd0706c543/diffusion_tf/utils.py
|
5 |
+
"""
|
6 |
+
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
import torch as th
|
10 |
+
|
11 |
+
|
12 |
+
def normal_kl(mean1, logvar1, mean2, logvar2):
|
13 |
+
"""
|
14 |
+
Compute the KL divergence between two gaussians.
|
15 |
+
|
16 |
+
Shapes are automatically broadcasted, so batches can be compared to
|
17 |
+
scalars, among other use cases.
|
18 |
+
"""
|
19 |
+
tensor = None
|
20 |
+
for obj in (mean1, logvar1, mean2, logvar2):
|
21 |
+
if isinstance(obj, th.Tensor):
|
22 |
+
tensor = obj
|
23 |
+
break
|
24 |
+
assert tensor is not None, "at least one argument must be a Tensor"
|
25 |
+
|
26 |
+
# Force variances to be Tensors. Broadcasting helps convert scalars to
|
27 |
+
# Tensors, but it does not work for th.exp().
|
28 |
+
logvar1, logvar2 = [
|
29 |
+
x if isinstance(x, th.Tensor) else th.tensor(x).to(tensor)
|
30 |
+
for x in (logvar1, logvar2)
|
31 |
+
]
|
32 |
+
|
33 |
+
return 0.5 * (
|
34 |
+
-1.0
|
35 |
+
+ logvar2
|
36 |
+
- logvar1
|
37 |
+
+ th.exp(logvar1 - logvar2)
|
38 |
+
+ ((mean1 - mean2) ** 2) * th.exp(-logvar2)
|
39 |
+
)
|
40 |
+
|
41 |
+
|
42 |
+
def approx_standard_normal_cdf(x):
|
43 |
+
"""
|
44 |
+
A fast approximation of the cumulative distribution function of the
|
45 |
+
standard normal.
|
46 |
+
"""
|
47 |
+
return 0.5 * (1.0 + th.tanh(np.sqrt(2.0 / np.pi) * (x + 0.044715 * th.pow(x, 3))))
|
48 |
+
|
49 |
+
|
50 |
+
def discretized_gaussian_log_likelihood(x, *, means, log_scales):
|
51 |
+
"""
|
52 |
+
Compute the log-likelihood of a Gaussian distribution discretizing to a
|
53 |
+
given image.
|
54 |
+
|
55 |
+
:param x: the target images. It is assumed that this was uint8 values,
|
56 |
+
rescaled to the range [-1, 1].
|
57 |
+
:param means: the Gaussian mean Tensor.
|
58 |
+
:param log_scales: the Gaussian log stddev Tensor.
|
59 |
+
:return: a tensor like x of log probabilities (in nats).
|
60 |
+
"""
|
61 |
+
assert x.shape == means.shape == log_scales.shape
|
62 |
+
centered_x = x - means
|
63 |
+
inv_stdv = th.exp(-log_scales)
|
64 |
+
plus_in = inv_stdv * (centered_x + 1.0 / 255.0)
|
65 |
+
cdf_plus = approx_standard_normal_cdf(plus_in)
|
66 |
+
min_in = inv_stdv * (centered_x - 1.0 / 255.0)
|
67 |
+
cdf_min = approx_standard_normal_cdf(min_in)
|
68 |
+
log_cdf_plus = th.log(cdf_plus.clamp(min=1e-12))
|
69 |
+
log_one_minus_cdf_min = th.log((1.0 - cdf_min).clamp(min=1e-12))
|
70 |
+
cdf_delta = cdf_plus - cdf_min
|
71 |
+
log_probs = th.where(
|
72 |
+
x < -0.999,
|
73 |
+
log_cdf_plus,
|
74 |
+
th.where(x > 0.999, log_one_minus_cdf_min, th.log(cdf_delta.clamp(min=1e-12))),
|
75 |
+
)
|
76 |
+
assert log_probs.shape == x.shape
|
77 |
+
return log_probs
|
guided-diffusion/guided_diffusion/lpips.py
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from lpips_pytorch import LPIPS
|
2 |
+
import torch
|
3 |
+
|
4 |
+
class LPIPS1(LPIPS):
|
5 |
+
r"""
|
6 |
+
Overrriding the LPIPS to send loss without reducing the batch
|
7 |
+
Arguments:
|
8 |
+
net_type (str): the network type to compare the features:
|
9 |
+
'alex' | 'squeeze' | 'vgg'. Default: 'alex'.
|
10 |
+
version (str): the version of LPIPS. Default: 0.1.
|
11 |
+
"""
|
12 |
+
def __init__(self, net_type: str = 'alex', version: str = '0.1'):
|
13 |
+
super(LPIPS1, self).__init__(net_type = 'alex', version ='0.1')
|
14 |
+
|
15 |
+
def forward(self, x: torch.Tensor, y: torch.Tensor):
|
16 |
+
feat_x, feat_y = self.net(x), self.net(y)
|
17 |
+
diff = [(fx - fy) ** 2 for fx, fy in zip(feat_x, feat_y)]
|
18 |
+
res = [l(d).mean((2, 3), True) for d, l in zip(diff, self.lin)]
|
19 |
+
# return torch.sum(torch.cat(res, 0), 0, True)
|
20 |
+
return torch.sum(torch.cat(res, 1), 1, True)
|
guided-diffusion/guided_diffusion/nn.py
ADDED
@@ -0,0 +1,170 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
Various utilities for neural networks.
|
3 |
+
"""
|
4 |
+
|
5 |
+
import math
|
6 |
+
|
7 |
+
import torch as th
|
8 |
+
import torch.nn as nn
|
9 |
+
|
10 |
+
|
11 |
+
# PyTorch 1.7 has SiLU, but we support PyTorch 1.5.
|
12 |
+
class SiLU(nn.Module):
|
13 |
+
def forward(self, x):
|
14 |
+
return x * th.sigmoid(x)
|
15 |
+
|
16 |
+
|
17 |
+
class GroupNorm32(nn.GroupNorm):
|
18 |
+
def forward(self, x):
|
19 |
+
return super().forward(x.float()).type(x.dtype)
|
20 |
+
|
21 |
+
|
22 |
+
def conv_nd(dims, *args, **kwargs):
|
23 |
+
"""
|
24 |
+
Create a 1D, 2D, or 3D convolution module.
|
25 |
+
"""
|
26 |
+
if dims == 1:
|
27 |
+
return nn.Conv1d(*args, **kwargs)
|
28 |
+
elif dims == 2:
|
29 |
+
return nn.Conv2d(*args, **kwargs)
|
30 |
+
elif dims == 3:
|
31 |
+
return nn.Conv3d(*args, **kwargs)
|
32 |
+
raise ValueError(f"unsupported dimensions: {dims}")
|
33 |
+
|
34 |
+
|
35 |
+
def linear(*args, **kwargs):
|
36 |
+
"""
|
37 |
+
Create a linear module.
|
38 |
+
"""
|
39 |
+
return nn.Linear(*args, **kwargs)
|
40 |
+
|
41 |
+
|
42 |
+
def avg_pool_nd(dims, *args, **kwargs):
|
43 |
+
"""
|
44 |
+
Create a 1D, 2D, or 3D average pooling module.
|
45 |
+
"""
|
46 |
+
if dims == 1:
|
47 |
+
return nn.AvgPool1d(*args, **kwargs)
|
48 |
+
elif dims == 2:
|
49 |
+
return nn.AvgPool2d(*args, **kwargs)
|
50 |
+
elif dims == 3:
|
51 |
+
return nn.AvgPool3d(*args, **kwargs)
|
52 |
+
raise ValueError(f"unsupported dimensions: {dims}")
|
53 |
+
|
54 |
+
|
55 |
+
def update_ema(target_params, source_params, rate=0.99):
|
56 |
+
"""
|
57 |
+
Update target parameters to be closer to those of source parameters using
|
58 |
+
an exponential moving average.
|
59 |
+
|
60 |
+
:param target_params: the target parameter sequence.
|
61 |
+
:param source_params: the source parameter sequence.
|
62 |
+
:param rate: the EMA rate (closer to 1 means slower).
|
63 |
+
"""
|
64 |
+
for targ, src in zip(target_params, source_params):
|
65 |
+
targ.detach().mul_(rate).add_(src, alpha=1 - rate)
|
66 |
+
|
67 |
+
|
68 |
+
def zero_module(module):
|
69 |
+
"""
|
70 |
+
Zero out the parameters of a module and return it.
|
71 |
+
"""
|
72 |
+
for p in module.parameters():
|
73 |
+
p.detach().zero_()
|
74 |
+
return module
|
75 |
+
|
76 |
+
|
77 |
+
def scale_module(module, scale):
|
78 |
+
"""
|
79 |
+
Scale the parameters of a module and return it.
|
80 |
+
"""
|
81 |
+
for p in module.parameters():
|
82 |
+
p.detach().mul_(scale)
|
83 |
+
return module
|
84 |
+
|
85 |
+
|
86 |
+
def mean_flat(tensor):
|
87 |
+
"""
|
88 |
+
Take the mean over all non-batch dimensions.
|
89 |
+
"""
|
90 |
+
return tensor.mean(dim=list(range(1, len(tensor.shape))))
|
91 |
+
|
92 |
+
|
93 |
+
def normalization(channels):
|
94 |
+
"""
|
95 |
+
Make a standard normalization layer.
|
96 |
+
|
97 |
+
:param channels: number of input channels.
|
98 |
+
:return: an nn.Module for normalization.
|
99 |
+
"""
|
100 |
+
return GroupNorm32(32, channels)
|
101 |
+
|
102 |
+
|
103 |
+
def timestep_embedding(timesteps, dim, max_period=10000):
|
104 |
+
"""
|
105 |
+
Create sinusoidal timestep embeddings.
|
106 |
+
|
107 |
+
:param timesteps: a 1-D Tensor of N indices, one per batch element.
|
108 |
+
These may be fractional.
|
109 |
+
:param dim: the dimension of the output.
|
110 |
+
:param max_period: controls the minimum frequency of the embeddings.
|
111 |
+
:return: an [N x dim] Tensor of positional embeddings.
|
112 |
+
"""
|
113 |
+
half = dim // 2
|
114 |
+
freqs = th.exp(
|
115 |
+
-math.log(max_period) * th.arange(start=0, end=half, dtype=th.float32) / half
|
116 |
+
).to(device=timesteps.device)
|
117 |
+
args = timesteps[:, None].float() * freqs[None]
|
118 |
+
embedding = th.cat([th.cos(args), th.sin(args)], dim=-1)
|
119 |
+
if dim % 2:
|
120 |
+
embedding = th.cat([embedding, th.zeros_like(embedding[:, :1])], dim=-1)
|
121 |
+
return embedding
|
122 |
+
|
123 |
+
|
124 |
+
def checkpoint(func, inputs, params, flag):
|
125 |
+
"""
|
126 |
+
Evaluate a function without caching intermediate activations, allowing for
|
127 |
+
reduced memory at the expense of extra compute in the backward pass.
|
128 |
+
|
129 |
+
:param func: the function to evaluate.
|
130 |
+
:param inputs: the argument sequence to pass to `func`.
|
131 |
+
:param params: a sequence of parameters `func` depends on but does not
|
132 |
+
explicitly take as arguments.
|
133 |
+
:param flag: if False, disable gradient checkpointing.
|
134 |
+
"""
|
135 |
+
if flag:
|
136 |
+
args = tuple(inputs) + tuple(params)
|
137 |
+
return CheckpointFunction.apply(func, len(inputs), *args)
|
138 |
+
else:
|
139 |
+
return func(*inputs)
|
140 |
+
|
141 |
+
|
142 |
+
class CheckpointFunction(th.autograd.Function):
|
143 |
+
@staticmethod
|
144 |
+
def forward(ctx, run_function, length, *args):
|
145 |
+
ctx.run_function = run_function
|
146 |
+
ctx.input_tensors = list(args[:length])
|
147 |
+
ctx.input_params = list(args[length:])
|
148 |
+
with th.no_grad():
|
149 |
+
output_tensors = ctx.run_function(*ctx.input_tensors)
|
150 |
+
return output_tensors
|
151 |
+
|
152 |
+
@staticmethod
|
153 |
+
def backward(ctx, *output_grads):
|
154 |
+
ctx.input_tensors = [x.detach().requires_grad_(True) for x in ctx.input_tensors]
|
155 |
+
with th.enable_grad():
|
156 |
+
# Fixes a bug where the first op in run_function modifies the
|
157 |
+
# Tensor storage in place, which is not allowed for detach()'d
|
158 |
+
# Tensors.
|
159 |
+
shallow_copies = [x.view_as(x) for x in ctx.input_tensors]
|
160 |
+
output_tensors = ctx.run_function(*shallow_copies)
|
161 |
+
input_grads = th.autograd.grad(
|
162 |
+
output_tensors,
|
163 |
+
ctx.input_tensors + ctx.input_params,
|
164 |
+
output_grads,
|
165 |
+
allow_unused=True,
|
166 |
+
)
|
167 |
+
del ctx.input_tensors
|
168 |
+
del ctx.input_params
|
169 |
+
del output_tensors
|
170 |
+
return (None, None) + input_grads
|